{
  "prompts": 300,
  "models": [
    "claude-opus-4.8",
    "claude-sonnet-4.6",
    "gpt-5.5",
    "o3",
    "grok-4.3",
    "mistral-large",
    "llama-4-maverick",
    "deepseek-v3.2",
    "qwen3.7-max",
    "gemini-3.1-pro"
  ],
  "judge": "anthropic/claude-sonnet-4.6",
  "leaderboard": [
    {
      "model": "claude-opus-4.8",
      "avg_total": 30.02,
      "avg_quality": 8.95,
      "avg_accuracy": 8.39,
      "avg_speed": 2.84,
      "avg_style": 4.98,
      "avg_creativity": 4.86,
      "wins": 130,
      "top3": 265,
      "halluc_count": 17,
      "answered": 300,
      "errors": 0,
      "rank": 1
    },
    {
      "model": "grok-4.3",
      "avg_total": 28.98,
      "avg_quality": 8.06,
      "avg_accuracy": 7.56,
      "avg_speed": 4.69,
      "avg_style": 4.8,
      "avg_creativity": 3.87,
      "wins": 61,
      "top3": 176,
      "halluc_count": 51,
      "answered": 300,
      "errors": 0,
      "rank": 2
    },
    {
      "model": "gemini-3.1-pro",
      "avg_total": 28.66,
      "avg_quality": 8.46,
      "avg_accuracy": 7.59,
      "avg_speed": 3.54,
      "avg_style": 4.92,
      "avg_creativity": 4.14,
      "wins": 34,
      "top3": 152,
      "halluc_count": 46,
      "answered": 291,
      "errors": 9,
      "rank": 3
    },
    {
      "model": "claude-sonnet-4.6",
      "avg_total": 27.77,
      "avg_quality": 8.75,
      "avg_accuracy": 7.73,
      "avg_speed": 1.75,
      "avg_style": 4.88,
      "avg_creativity": 4.66,
      "wins": 5,
      "top3": 63,
      "halluc_count": 72,
      "answered": 300,
      "errors": 0,
      "rank": 4
    },
    {
      "model": "gpt-5.5",
      "avg_total": 27.27,
      "avg_quality": 8.53,
      "avg_accuracy": 8.41,
      "avg_speed": 1.14,
      "avg_style": 4.93,
      "avg_creativity": 4.27,
      "wins": 1,
      "top3": 35,
      "halluc_count": 9,
      "answered": 300,
      "errors": 0,
      "rank": 5
    },
    {
      "model": "qwen3.7-max",
      "avg_total": 26.69,
      "avg_quality": 8.39,
      "avg_accuracy": 7.45,
      "avg_speed": 1.72,
      "avg_style": 4.79,
      "avg_creativity": 4.34,
      "wins": 2,
      "top3": 29,
      "halluc_count": 52,
      "answered": 300,
      "errors": 0,
      "rank": 6
    },
    {
      "model": "deepseek-v3.2",
      "avg_total": 26.24,
      "avg_quality": 8.11,
      "avg_accuracy": 7.15,
      "avg_speed": 2.29,
      "avg_style": 4.85,
      "avg_creativity": 3.84,
      "wins": 1,
      "top3": 15,
      "halluc_count": 80,
      "answered": 300,
      "errors": 0,
      "rank": 7
    },
    {
      "model": "o3",
      "avg_total": 24.38,
      "avg_quality": 6.99,
      "avg_accuracy": 5.89,
      "avg_speed": 4.08,
      "avg_style": 3.98,
      "avg_creativity": 3.44,
      "wins": 66,
      "top3": 164,
      "halluc_count": 96,
      "answered": 300,
      "errors": 0,
      "rank": 8
    },
    {
      "model": "mistral-large",
      "avg_total": 21.88,
      "avg_quality": 6.58,
      "avg_accuracy": 4.74,
      "avg_speed": 3.08,
      "avg_style": 4.2,
      "avg_creativity": 3.27,
      "wins": 0,
      "top3": 1,
      "halluc_count": 191,
      "answered": 300,
      "errors": 0,
      "rank": 9
    },
    {
      "model": "llama-4-maverick",
      "avg_total": 20.01,
      "avg_quality": 4.84,
      "avg_accuracy": 4.97,
      "avg_speed": 4.9,
      "avg_style": 3.05,
      "avg_creativity": 2.25,
      "wins": 0,
      "top3": 0,
      "halluc_count": 111,
      "answered": 300,
      "errors": 0,
      "rank": 10
    }
  ],
  "per_prompt": [
    {
      "id": 1,
      "category": "Contract & Commercial",
      "use_case": "SaaS Agreement Redline",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 34.9,
          "note": "Thorough redline, accurate statutory cites, strong market-standard comparison table.",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 50.3,
          "note": "Exceptional depth; all citations verified; production-ready redline language.",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 42.1,
          "note": "Thorough redline, accurate UCC citations, strong market comparison table.",
          "rank": 6
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.8,
          "note": "Strong redline; UCC SaaS applicability caveat appropriately flagged; no hallucinations.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 7.6,
          "note": "Thorough redline, accurate market benchmarks, UCC/Restatement cited appropriately.",
          "rank": 1
        },
        "mistral-large": {
          "total": 3.2,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.2,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 26.5,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 26.0,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 6.2,
          "note": "Cavendish case misapplied; unrelated to pricing clauses, penalized accordingly.",
          "rank": 8
        },
        "deepseek-v3.2": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 48.4,
          "note": "Thorough redline, solid market benchmarks, minor UCC applicability stretch for SaaS",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 26.0,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 56.2,
          "note": "Williams v. Walker-Thomas cited correctly but tangentially; UCC/Restatement sound.",
          "rank": 9
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 19.7,
          "note": "Solid redline; UCC citation technically inapplicable to SaaS but not hallucinated.",
          "rank": 4
        }
      }
    },
    {
      "id": 2,
      "category": "Contract & Commercial",
      "use_case": "IP Assignment",
      "models": {
        "claude-opus-4.8": {
          "total": 30.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 45.5,
          "note": "Thorough, jurisdiction-aware; Stanford v. Roche and CCNV correctly cited.",
          "rank": 4
        },
        "claude-sonnet-4.6": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 52.8,
          "note": "Exceptional clause: correct WFH caveat, OSS contamination risk, multi-jurisdiction flags.",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 30.6,
          "note": "Comprehensive, well-structured; correctly cites 17 U.S.C. §§101/201(b) accurately.",
          "rank": 5
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.8,
          "note": "Production-ready clause; moral rights waiver and open-source handling are standout additions.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 31.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 4.7,
          "note": "Solid, production-ready clause; misses open-source license conflict and work-for-hire nuance.",
          "rank": 3
        },
        "mistral-large": {
          "total": 28.2,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 26.5,
          "note": "VARA §106A misapplied to software; otherwise legally sound and thorough.",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 26.6,
          "quality": 7.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 5.3,
          "note": "Solid draft; truncated cite, missing work-for-hire analysis and open-source risk flags.",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 25.9,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 49.7,
          "note": "Crosstown Music citation unverifiable/misapplied; deduct for hallucinated precedent",
          "rank": 10
        },
        "qwen3.7-max": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 62.7,
          "note": "Stanford v. Roche cited correctly; CA §2870 and moral rights nuances excellent.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 31.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 23.2,
          "note": "Stanford v. Roche cite accurate; Cal. §2870, OSS copyleft, moral rights all flagged.",
          "rank": 1
        }
      }
    },
    {
      "id": 3,
      "category": "Contract & Commercial",
      "use_case": "Limitation of Liability",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 44.1,
          "note": "Supercap-insurance linkage and trade matrix are standout practical tools.",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 25.4,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 67.2,
          "note": "Schrier cite unverifiable; Biotronik case real but misapplied here",
          "rank": 7
        },
        "gpt-5.5": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 65.8,
          "note": "Comprehensive, production-ready vendor counter with strong fallback positions throughout.",
          "rank": 4
        },
        "o3": {
          "total": 25.1,
          "quality": 8.0,
          "accuracy": 4.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 11.9,
          "note": "MSA Technology v Antec and GB Gas citations appear fabricated or misrepresented.",
          "rank": 8
        },
        "grok-4.3": {
          "total": 32.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 6.6,
          "note": "Commercially sound, well-structured, accurate statutory refs, strong negotiation rationale.",
          "rank": 1
        },
        "mistral-large": {
          "total": 3.2,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.2,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 43.0,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 23.6,
          "quality": 6.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 6.7,
          "note": "Solid basics but lacks mutual indemnity triggers, insurance backstops, and GDPR nuance.",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 46.5,
          "note": "Solid vendor-side positions; lacks GDPR/cross-border nuance and insurance specifics.",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 27.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 80.9,
          "note": "Solid vendor-side redlines; CJEU cite accurate; SCA Hygiene correctly cited.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 25.4,
          "note": "Production-ready, well-structured vendor counter-positions with accurate statutory references.",
          "rank": 2
        }
      }
    },
    {
      "id": 4,
      "category": "Contract & Commercial",
      "use_case": "MSA Termination Section",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 58.1,
          "note": "Cavendish/Makdessi and NIST SP 800-88 citations verified and accurate.",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 26.0,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 111.1,
          "note": "Wassenaar cite is real but misapplied; IACCM cite unverifiable; otherwise excellent",
          "rank": 8
        },
        "gpt-5.5": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 102.1,
          "note": "Comprehensive, production-ready draft with strong cross-jurisdictional drafting notes.",
          "rank": 6
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 13.3,
          "note": "Production-ready, well-structured; statutory cites verified and appropriately caveated.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 6.3,
          "note": "Comprehensive, well-structured, production-ready with appropriate bracketed placeholders throughout.",
          "rank": 1
        },
        "mistral-large": {
          "total": 3.2,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.2,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 33.6,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 22.0,
          "quality": 6.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 5.3,
          "note": "UCC §2-309 and Restatement §235 misapplied; transition cost clause supplier-unfavorable",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 27.3,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 70.3,
          "note": "Dunlop Pneumatic Tyre cite is real but jurisdiction-mismatched for US MSA context",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 29.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 72.3,
          "note": "Production-ready draft; NIST 800-88 and §365 citations verified and accurate.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 28.6,
          "note": "Production-ready draft; NIST 800-88 and 11 USC 365 correctly cited.",
          "rank": 4
        }
      }
    },
    {
      "id": 5,
      "category": "Contract & Commercial",
      "use_case": "Revenue Share Agreement",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 46.8,
          "note": "Comprehensive, production-ready clause with AI-specific deprecation risks well-addressed.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 86.3,
          "note": "Dunlop and Wassenaar are real, verifiable cases; comprehensive AI-specific deprecation provisions.",
          "rank": 8
        },
        "gpt-5.5": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 51.2,
          "note": "Comprehensive, production-ready clause with strong deprecation and audit provisions.",
          "rank": 4
        },
        "o3": {
          "total": 28.1,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 13.5,
          "note": "Milne v. C.I.R. cite is fabricated/misapplied; deduct accuracy points accordingly.",
          "rank": 7
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 7.5,
          "note": "Production-ready draft; covers all requirements with precise, enforceable language.",
          "rank": 1
        },
        "mistral-large": {
          "total": 28.2,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 35.4,
          "note": "N.Y. Gen. Oblig. Law §5-328 and UCC §2-515 citations are dubious/misapplied",
          "rank": 6
        },
        "llama-4-maverick": {
          "total": 20.0,
          "quality": 6.0,
          "accuracy": 3.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 5.6,
          "note": "Tesoro case cited is unverifiable/irrelevant; deprecation clause lacks successor model provisions",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 59.8,
          "note": "Production-ready, well-structured; IRC §197 note adds practical tax depth.",
          "rank": 5
        },
        "qwen3.7-max": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 76.5,
          "note": "Specht v. Netscape misapplied; not a goods/services UCC case.",
          "rank": 9
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 25.7,
          "note": "Verifiable citations, strong drafting notes, usury savings clause well-handled.",
          "rank": 3
        }
      }
    },
    {
      "id": 6,
      "category": "NDA & Confidentiality",
      "use_case": "Mutual NDA Triage",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 20.5,
          "note": "eBay and DTSA citations accurate; bond-waiver analysis nuanced and correct.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 22.9,
          "note": "Solid triage; bond-waiver analysis strong; trade secret cap insight valuable.",
          "rank": 7
        },
        "gpt-5.5": {
          "total": 29.7,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 16.0,
          "note": "Solid triage; DTSA cite accurate; bond waiver caveat well-flagged.",
          "rank": 2
        },
        "o3": {
          "total": 27.0,
          "quality": 7.0,
          "accuracy": 8.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 2.4,
          "note": "Correct YELLOW flag; missed residuals clause risk and mutual vs unilateral bond issue.",
          "rank": 5
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 5.1,
          "note": "Solid triage; bond waiver nuance correct; CI overbreadth well-flagged.",
          "rank": 3
        },
        "mistral-large": {
          "total": 22.2,
          "quality": 7.0,
          "accuracy": 3.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 20.4,
          "note": "Multiple unverifiable/hallucinated case citations severely undermine legal accuracy.",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 16.1,
          "quality": 5.0,
          "accuracy": 2.0,
          "speed": 4.1,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 5.6,
          "note": "Hallucinated Continuum case; no definitive GREEN/YELLOW/RED verdict given",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.4,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 29.8,
          "note": "Solid YELLOW triage; bond waiver risk well-flagged; CI carve-outs actionable.",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 25.0,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 68.3,
          "note": "Brown v. TGS and Kos Pharma citations appear fabricated or misapplied.",
          "rank": 8
        },
        "gemini-3.1-pro": {
          "total": 27.9,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 24.3,
          "note": "Solid DTSA/UTSA trade secret bifurcation point; bond waiver nuance well-handled.",
          "rank": 4
        }
      }
    },
    {
      "id": 7,
      "category": "NDA & Confidentiality",
      "use_case": "NDA with Carve-Outs",
      "models": {
        "claude-opus-4.8": {
          "total": 28.8,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 72.3,
          "note": "Computer Associates/Gemstar cited as gun-jumping precedents; not accurate characterizations.",
          "rank": 5
        },
        "claude-sonnet-4.6": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 115.7,
          "note": "Comprehensive, well-structured; case citations appear accurate; minor truncation at end",
          "rank": 7
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 120.8,
          "note": "Comprehensive, well-structured NDA with strong PE-specific carve-outs and clean-team framework.",
          "rank": 6
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 12.9,
          "note": "Comprehensive, production-ready NDA with well-drafted clean team and residuals provisions.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 30.0,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 7.6,
          "note": "Solid PE-specific NDA; DTSA cite accurate; antitrust carve-outs underdeveloped.",
          "rank": 2
        },
        "mistral-large": {
          "total": 25.2,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 32.0,
          "note": "Al Minor (Ohio) and Stryker citations are misapplied or unverifiable here.",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 19.6,
          "quality": 6.0,
          "accuracy": 3.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 9.6,
          "note": "Hallucinated Deutsche Bank case; residuals clause lacks adequate safeguards for trade secrets",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 100.9,
          "note": "Comprehensive PE-specific NDA with strong practitioner notes and antitrust awareness.",
          "rank": 4
        },
        "qwen3.7-max": {
          "total": 26.3,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 75.3,
          "note": "Flakeboard cite unverifiable as stated; Ernst & Young ECJ cite misapplied contextually",
          "rank": 8
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 24.9,
          "note": "Solid PE-specific NDA; residuals and clean team provisions well-drafted.",
          "rank": 3
        }
      }
    },
    {
      "id": 8,
      "category": "Privacy & Data Protection",
      "use_case": "GDPR DPA Review",
      "models": {
        "claude-opus-4.8": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 19.4,
          "note": "Comprehensive Art.28 gap analysis; Schrems II and UK GDPR flags add value.",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 33.6,
          "note": "Comprehensive Art.28 gap analysis; C-340/21 and Schrems II correctly cited.",
          "rank": 7
        },
        "gpt-5.5": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 10.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 21.9,
          "note": "Comprehensive Art.28 gap analysis, all mandatory elements covered accurately.",
          "rank": 4
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 10.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 6.3,
          "note": "Thorough Art.28 gap analysis; minor omission on sub-processor objection right.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 5.0,
          "note": "Thorough Art.28(3) gap analysis; transfer section slightly thin on specifics.",
          "rank": 5
        },
        "mistral-large": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 26.9,
          "note": "Thorough Art.28 gap analysis; Schrems II and EDPB cites verified.",
          "rank": 6
        },
        "llama-4-maverick": {
          "total": 24.0,
          "quality": 6.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 3.5,
          "note": "Misses key Art.28 gaps: confidentiality, security, audits, deletion/return obligations",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 46.1,
          "note": "Thorough Art.28(3) gap analysis; UK/EU GDPR dual-track handled well.",
          "rank": 9
        },
        "qwen3.7-max": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 64.2,
          "note": "Schrems II and EDPB Rec 01/2020 correctly cited; TIA requirement accurate.",
          "rank": 8
        },
        "gemini-3.1-pro": {
          "total": 31.7,
          "quality": 9.0,
          "accuracy": 10.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 16.7,
          "note": "Comprehensive Art.28 gap analysis; correctly flags transfer mechanism deficiency.",
          "rank": 1
        }
      }
    },
    {
      "id": 9,
      "category": "Privacy & Data Protection",
      "use_case": "CCPA DSAR Response",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 34.7,
          "note": "Sephora cite accurate; verification tiers, GPC, CPRA nuances all correct.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 53.1,
          "note": "Calhoun v. Google exists but cited context slightly imprecise; otherwise excellent.",
          "rank": 7
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 81.9,
          "note": "Comprehensive, well-cited CCPA/CPRA template with correct statutory references and practical options.",
          "rank": 8
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 15.1,
          "note": "Thorough, production-ready CCPA response with correct statutory citations and exceptions.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 5.1,
          "note": "Solid CCPA/CPRA response; lacks CPRA opt-out and sensitive PI nuances.",
          "rank": 4
        },
        "mistral-large": {
          "total": 29.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 31.5,
          "note": "Comprehensive, well-structured; minor cite imprecision but no hallucinated case law.",
          "rank": 6
        },
        "llama-4-maverick": {
          "total": 26.0,
          "quality": 7.0,
          "accuracy": 8.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 4.5,
          "note": "Solid CCPA template; misses CPRA updates, opt-out, and 90-day extension notice.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 43.0,
          "note": "Thorough CPRA-compliant draft with accurate citations and practical attorney notes.",
          "rank": 5
        },
        "qwen3.7-max": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 74.9,
          "note": "Thorough dual-draft approach; minor CPRA citation nuances but no hallucinations.",
          "rank": 9
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 27.9,
          "note": "Thorough, cite-accurate CCPA/CPRA response with practical exceptions and verification guidance.",
          "rank": 3
        }
      }
    },
    {
      "id": 10,
      "category": "Privacy & Data Protection",
      "use_case": "Cross-Border Data Transfer",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 39.4,
          "note": "Latombe dismissal claim unverifiable but flagged as uncertain; otherwise excellent.",
          "rank": 6
        },
        "claude-sonnet-4.6": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 75.3,
          "note": "Exceptional depth; RISAA, Works Council, Schrems III risk all correctly flagged.",
          "rank": 7
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 114.9,
          "note": "Comprehensive, jurisdiction-correct, production-ready analysis with accurate citations throughout.",
          "rank": 9
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 19.4,
          "note": "Comprehensive, accurate, well-structured with strong German-specific HR compliance detail.",
          "rank": 3
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 7.6,
          "note": "Accurate, well-structured, actionable; correctly cites 2021 SCCs and DPF 2023.",
          "rank": 2
        },
        "mistral-large": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 30.9,
          "note": "Accurate, well-structured, jurisdiction-specific; DPF Schrems III risk appropriately flagged.",
          "rank": 4
        },
        "llama-4-maverick": {
          "total": 23.0,
          "quality": 6.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 6.1,
          "note": "Adequate overview but shallow TIA analysis; odd boxed conclusion undermines professionalism",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 62.5,
          "note": "Accurate, well-structured, actionable; correctly identifies DPF, SCCs, BCRs tradeoffs.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 29.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 70.3,
          "note": "Exceptional layered analysis; BetrVG, CLOUD Act, and BDSG §26 expertly flagged.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 31.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 25.9,
          "note": "Excellent: DPF HR carve-out, BetrVG §87, TIA/EO14086 nuances all correct.",
          "rank": 1
        }
      }
    },
    {
      "id": 11,
      "category": "Risk & Compliance",
      "use_case": "Indemnification Risk",
      "models": {
        "claude-opus-4.8": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 26.2,
          "note": "Thorough multi-jurisdiction analysis; gross negligence gap and anti-indemnity statutes well-flagged.",
          "rank": 4
        },
        "claude-sonnet-4.6": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 30.7,
          "note": "Markborough cite unverifiable; Gross v. Sweet context slightly misapplied",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 19.8,
          "note": "Ethyl Corp. and Dresser Industries are real, correctly cited Texas cases.",
          "rank": 2
        },
        "o3": {
          "total": 27.1,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 6.9,
          "note": "Texas statute cite wrong; Delaware Fortis/Allergan citation unverified/suspect.",
          "rank": 6
        },
        "grok-4.3": {
          "total": 29.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 4.9,
          "note": "Valhal cite plausible but gross negligence gap and anti-indemnity statutes well-flagged.",
          "rank": 3
        },
        "mistral-large": {
          "total": 24.8,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 20.7,
          "note": "WorldCom cite misapplied; indemnity context wrong for that case.",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 22.6,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 5.6,
          "note": "Canada Steamship cited but misapplied; it limits, not supports, broad indemnities.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 25.6,
          "note": "Texas cite slightly off but statutes real; solid dual-party perspective.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 27.0,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 59.5,
          "note": "Hooper Associates cite misapplied; City of Santa Barbara holding accurately cited.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 18.4,
          "note": "Ethyl Corp cite verified; UCTA, NY GOL, CA Civil Code correctly applied.",
          "rank": 1
        }
      }
    },
    {
      "id": 12,
      "category": "Risk & Compliance",
      "use_case": "AI Act Classification",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 37.9,
          "note": "Comprehensive, accurate AI Act analysis with correct Annex III classification and timeline.",
          "rank": 5
        },
        "claude-sonnet-4.6": {
          "total": 29.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 61.5,
          "note": "Exceptional depth; Amazon AI cite is factual, not legal hallucination.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 124.5,
          "note": "Comprehensive, accurate EU AI Act analysis with correct articles, dates, penalties.",
          "rank": 7
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 13.3,
          "note": "Minor article numbering imprecision but substantively accurate, comprehensive, and actionable.",
          "rank": 3
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 6.2,
          "note": "Accurate, well-structured; minor penalty timeline nuance on existing systems.",
          "rank": 1
        },
        "mistral-large": {
          "total": 27.2,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 33.3,
          "note": "CJEU C-524/19 is hallucinated; GPAI timeline error (Aug 2025 not 2027)",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 22.0,
          "quality": 6.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 4.2,
          "note": "Penalty figures wrong; outdated draft references; misses GDPR intersection and bias obligations",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 25.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 47.5,
          "note": "Article 5(1)(b) misapplied; timeline for high-risk AI is 24 months, not 36.",
          "rank": 9
        },
        "qwen3.7-max": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 84.9,
          "note": "Comprehensive, accurate EU AI Act analysis with strong cross-jurisdictional coverage.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 26.4,
          "note": "Accurate, well-structured, correct Annex III cite, GDPR Art.22 cross-reference valuable.",
          "rank": 2
        }
      }
    },
    {
      "id": 13,
      "category": "Risk & Compliance",
      "use_case": "AI Training Data Liability",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 46.0,
          "note": "Exceptional depth; hiQ ToS characterization slightly overstated but defensible.",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 93.4,
          "note": "Exceptional depth; all cited cases verifiable; risk matrix highly actionable.",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 104.9,
          "note": "Comprehensive, jurisdiction-accurate, cites verified; Thomson Reuters 2025 ruling correctly noted.",
          "rank": 7
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 14.9,
          "note": "Ticketmaster v. SeatGeek cite unverifiable but rest is solid and thorough",
          "rank": 2
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 11.5,
          "note": "Van Buren cite accurate; DSM/database right analysis precise and actionable.",
          "rank": 1
        },
        "mistral-large": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 51.4,
          "note": "Thorough, well-cited; AI Act citation slightly premature but defensible.",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 25.0,
          "quality": 7.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 7.5,
          "note": "Solid framework but misattributes Authors Guild quote; misses Vanderhye v. iParadigms",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 29.7,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 39.8,
          "note": "Solid analysis, verified citations, actionable recommendations, minor circuit-split oversimplification.",
          "rank": 4
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 75.8,
          "note": "Thorough, well-cited memo; ABA Opinion 512 number warrants verification.",
          "rank": 9
        },
        "gemini-3.1-pro": {
          "total": 29.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 44.8,
          "note": "Thorough, well-cited; Mata sanctions case accurately cited; minor EU gaps",
          "rank": 6
        }
      }
    },
    {
      "id": 14,
      "category": "Employment Law",
      "use_case": "Non-Compete Review",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 41.6,
          "note": "Accurate, thorough, well-structured; FTC rule status correctly noted.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 72.4,
          "note": "Exceptional depth; Ryan LLC cite accurate; all major cases verifiable.",
          "rank": 8
        },
        "gpt-5.5": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 84.8,
          "note": "Accurate, well-structured, production-ready; all cited cases verifiable.",
          "rank": 9
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 11.3,
          "note": "Gallagher Healthcare cite unverified but core analysis is accurate and actionable.",
          "rank": 4
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 10.7,
          "note": "Accurate citations, strong narrowed draft, good cross-jurisdictional choice-of-law flag.",
          "rank": 1
        },
        "mistral-large": {
          "total": 29.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 32.1,
          "note": "Solid analysis; case citations appear verifiable; CA prohibition correctly emphasized.",
          "rank": 6
        },
        "llama-4-maverick": {
          "total": 27.0,
          "quality": 7.0,
          "accuracy": 8.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 3.8,
          "note": "Solid analysis; narrowed draft lacks CA-specific carve-out language.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 65.2,
          "note": "Accurate citations, strong CA carve-out, well-structured narrowed draft.",
          "rank": 5
        },
        "qwen3.7-max": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 87.9,
          "note": "Accurate cites, SB699/AB1076 flagged, TX fee-shifting noted, CA-proof drafts.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 26.9,
          "note": "Accurate cites, SB699/AB1076 flagged, TX reformation duty correctly noted.",
          "rank": 3
        }
      }
    },
    {
      "id": 15,
      "category": "Employment Law",
      "use_case": "Executive Severance",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 61.1,
          "note": "Exceptional depth; Ryan LLC cite accurate; McLaren Macomb correct; OWBPA mechanics precise.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 116.8,
          "note": "Exceptional depth; Oubre, McLaren Macomb, OWBPA citations all verifiable and accurate.",
          "rank": 8
        },
        "gpt-5.5": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 113.1,
          "note": "Comprehensive, jurisdiction-aware, cites McLaren Macomb and DTSA correctly.",
          "rank": 6
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 12.9,
          "note": "Production-ready, jurisdiction-aware, 409A/ADEA compliant, strong counsel notes.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 30.6,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.9,
          "note": "Solid template; Speak Out Act cite correct; equity placeholder needs specificity.",
          "rank": 3
        },
        "mistral-large": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 47.9,
          "note": "Oubre and AMN Healthcare citations are real; solid CA-specific compliance notes.",
          "rank": 5
        },
        "llama-4-maverick": {
          "total": 25.0,
          "quality": 7.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.3,
          "note": "Solid draft but garden leave conflated with severance; double-trigger logic inverted.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 90.0,
          "note": "Production-ready draft with strong ADEA compliance and practical counsel notes.",
          "rank": 9
        },
        "qwen3.7-max": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 63.7,
          "note": "McLaren Macomb cite verified; 409A, OWBPA, CA law correctly applied.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 52.2,
          "note": "McLaren Macomb cited correctly; Speak Out Act cite slightly imprecise but acceptable.",
          "rank": 4
        }
      }
    },
    {
      "id": 16,
      "category": "M\\&A & Corporate",
      "use_case": "Rep & Warranty Analysis",
      "models": {
        "claude-opus-4.8": {
          "total": 27.8,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 44.4,
          "note": "Cobalt/Eurofins citations unverifiable; ABRY cite real but misapplied slightly",
          "rank": 6
        },
        "claude-sonnet-4.6": {
          "total": 25.4,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 84.2,
          "note": "Anvil Knitwear cite appears fabricated; Akorn misapplied to sandbagging",
          "rank": 9
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 93.4,
          "note": "Comprehensive, well-structured, jurisdiction-correct M&A rep analysis with actionable drafting.",
          "rank": 4
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 13.0,
          "note": "Comprehensive, well-structured, accurate Delaware M&A indemnity framework with actionable drafting.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 25.6,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 8.6,
          "note": "CBS v. Ziff-Davis cited inaccurately; case concerns warranty reliance, not disclosure",
          "rank": 8
        },
        "mistral-large": {
          "total": 3.2,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.2,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 34.4,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 27.0,
          "quality": 7.0,
          "accuracy": 8.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 6.1,
          "note": "Solid framework but missing IP, employee, and change-of-control reps analysis.",
          "rank": 7
        },
        "deepseek-v3.2": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 70.7,
          "note": "Thorough, market-accurate draft; lacks cross-border and R&W insurance angles.",
          "rank": 5
        },
        "qwen3.7-max": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 70.7,
          "note": "Practitioner-grade; all citations verified; materiality scrape interaction well-flagged.",
          "rank": 3
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 24.2,
          "note": "ABRY Partners cite verified; deductible vs. tipping basket distinction excellent.",
          "rank": 2
        }
      }
    },
    {
      "id": 17,
      "category": "M\\&A & Corporate",
      "use_case": "Entity Structure",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 54.3,
          "note": "Exceptional cross-border analysis; NHR 2.0/IFICI update and PE risk standout.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 66.7,
          "note": "Comprehensive, well-structured; PE risk and NHR 2.0 update are standout additions.",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 134.7,
          "note": "Comprehensive, well-cited, actionable cross-border advice with correct NHR 2.0 update.",
          "rank": 7
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 17.4,
          "note": "Comprehensive, well-structured; minor NHR 2024 reform omission noted.",
          "rank": 3
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 11.0,
          "note": "Thorough, accurate PFIC/NHR/Delaware C-corp analysis with correct IRC citations.",
          "rank": 1
        },
        "mistral-large": {
          "total": 23.2,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 51.0,
          "note": "Garlock, Rev.Rul.2006-1, PT Ruling 157/2019 appear fabricated; PFIC framing confused",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 23.0,
          "quality": 6.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 5.6,
          "note": "PFIC section misapplied; NHR 2024 reform ignored; wrong IRC cite for PFIC",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 80.7,
          "note": "Strong analysis; IRC §351 asset-transfer nuance slightly overstated for clean flip.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 29.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 69.9,
          "note": "Exceptional cross-border analysis; NHR abolition, GILTI/§962, PE risks all flagged.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 29.4,
          "note": "Solid memo; NHR 2.0 caveat and PFIC cash-asset trap well flagged.",
          "rank": 5
        }
      }
    },
    {
      "id": 18,
      "category": "M\\&A & Corporate",
      "use_case": "Shareholder Agreement",
      "models": {
        "claude-opus-4.8": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 57.1,
          "note": "Thorough, jurisdiction-neutral draft with clear commercial decision flags and cap-table analysis.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 117.7,
          "note": "Comprehensive, production-ready draft with correct Delaware mechanics and smart thresholds.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 127.3,
          "note": "Comprehensive, well-structured draft with strong Bad Leaver mechanics and valuation provisions.",
          "rank": 6
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 19.0,
          "note": "Production-ready Delaware draft; cause termination mechanics well-structured and practical.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 7.6,
          "note": "Solid draft; Trados cite is real but tangentially relevant here.",
          "rank": 3
        },
        "mistral-large": {
          "total": 25.7,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 27.2,
          "note": "DGCL §121 misapplied; Trados cite tangential but real; solid structure overall",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 5.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 6.7,
          "note": "Cited irrelevant cases; thin mechanics, missing cause-termination buyout pricing detail",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.8,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 51.0,
          "note": "Solid Malaysian-law draft; cap table math correct; cause escrow clause practical.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 83.3,
          "note": "Glidepath v. Beumer citation appears fabricated; core provisions excellent.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 28.2,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 28.2,
          "note": "Halpin v. Riverstone citation unverifiable; likely hallucinated Delaware case.",
          "rank": 5
        }
      }
    },
    {
      "id": 19,
      "category": "Dispute Resolution",
      "use_case": "Arbitration Clause",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 47.4,
          "note": "Thorough, well-structured; minor SIAC 2025 threshold detail unverified",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 120.5,
          "note": "Comprehensive, production-ready clause; minor gap: cost allocation section incomplete",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 139.7,
          "note": "Strong draft; ICC Art.30 threshold detail slightly outdated but no hallucinations",
          "rank": 8
        },
        "o3": {
          "total": 29.1,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 20.5,
          "note": "CVG v CVH [2020] SGHC 75 appears fabricated; SIAC IP rules unverified",
          "rank": 4
        },
        "grok-4.3": {
          "total": 30.6,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.2,
          "note": "Solid clause, accurate institutional comparison, good JP arbitrability caveat included.",
          "rank": 2
        },
        "mistral-large": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 36.1,
          "note": "Bloomberry case citation unverifiable; SIAC Rule numbers may be outdated",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 18.0,
          "quality": 5.0,
          "accuracy": 3.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 7.7,
          "note": "Hallucinated cases; JCAA article numbers wrong; shallow IP-specific analysis",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 54.4,
          "note": "SIAC threshold slightly off; strong practical clause with good comparative analysis",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 76.0,
          "note": "Mitsubishi Motors cite verified; Singapore IAA 2019 amendment accurately described.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 32.0,
          "note": "Excellent cross-border analysis; IP arbitrability nuances and enforcement flags well-handled.",
          "rank": 1
        }
      }
    },
    {
      "id": 20,
      "category": "Dispute Resolution",
      "use_case": "Litigation Hold Notice",
      "models": {
        "claude-opus-4.8": {
          "total": 31.3,
          "quality": 10.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 57.2,
          "note": "Exceptional depth; verified citations; GDPR flag; source-code nuance addressed.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 26.0,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 121.6,
          "note": "GN Nettest cite unverifiable; Colonies Partners citation questionable; Zubulake accurate",
          "rank": 9
        },
        "gpt-5.5": {
          "total": 30.4,
          "quality": 10.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 112.2,
          "note": "Exceptional SaaS-specific hold notice; cited cases are real and accurate.",
          "rank": 4
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 10.6,
          "note": "Zubulake cite is real and accurate; SaaS-specific tech coverage is excellent.",
          "rank": 3
        },
        "grok-4.3": {
          "total": 32.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 7.7,
          "note": "Comprehensive, production-ready hold notice with accurate Fed.R.Civ.P. 37(e) reference.",
          "rank": 1
        },
        "mistral-large": {
          "total": 29.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 43.1,
          "note": "Zubulake and Pension Committee are real, accurately cited cases.",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 25.6,
          "quality": 7.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 8.0,
          "note": "Solid template but lacks patent-specific nuance and cross-jurisdictional ESI considerations.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 28.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 44.6,
          "note": "Zubulake cite accurate; solid SaaS-specific coverage; lacks international data issues",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 29.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 72.8,
          "note": "Zubulake, Halo, FRCP 37(e) all correctly cited; GDPR/CPRA cross-border nuance excellent.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 26.5,
          "note": "Comprehensive, production-ready; Google Play cite is real but context slightly stretched.",
          "rank": 6
        }
      }
    },
    {
      "id": 21,
      "category": "Securities & Finance",
      "use_case": "Safe Harbor Analysis",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 41.4,
          "note": "Excellent 506(b) general solicitation flag; PSLRA private issuer exclusion well-analyzed.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 58.8,
          "note": "Excellent analysis; Trump Casino cite slightly misapplied but not hallucinated",
          "rank": 7
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 129.8,
          "note": "Thorough, well-cited, production-ready analysis with accurate PSLRA private-company exclusion.",
          "rank": 6
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 14.3,
          "note": "Texas Gulf Sulphur correctly cited; PSLRA private-issuer exclusion accurately flagged.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.0,
          "note": "Solid analysis; PSLRA exclusion for private issuers correctly identified.",
          "rank": 4
        },
        "mistral-large": {
          "total": 25.2,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 26.7,
          "note": "Slayton v. AmEx citation wrong; PSLRA safe harbor misapplied to private companies",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 22.0,
          "quality": 6.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 6.6,
          "note": "Silicon Graphics cite misapplied; PSLRA safe harbor inapplicable to private companies",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.3,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 47.0,
          "note": "Solid PSLRA inapplicability analysis; misses SEC Rule 506(c) general solicitation risk.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 77.6,
          "note": "Excellent PSLRA inapplicability analysis; 506(b) vs 506(c) distinction is critical.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 26.2,
          "note": "Technically sound; Virginia Bankshares citation slightly stretched but defensible.",
          "rank": 3
        }
      }
    },
    {
      "id": 22,
      "category": "Securities & Finance",
      "use_case": "SAFE Note Negotiation",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 43.0,
          "note": "Excellent pre/post-money SAFE distinction; observer seat pushback well-reasoned and actionable.",
          "rank": 4
        },
        "claude-sonnet-4.6": {
          "total": 26.4,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 103.0,
          "note": "In re Emerging Communications cite unverified; Delaware statute references solid overall",
          "rank": 7
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 105.6,
          "note": "Thorough, production-ready; DGCL §141/§220 correctly cited; no hallucinations.",
          "rank": 5
        },
        "o3": {
          "total": 30.6,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 13.2,
          "note": "Thorough, actionable founder analysis with solid counter-proposal and market benchmarks.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 15.0,
          "note": "Strong practical guidance; Delaware control-person claim slightly overstated but no hallucinations.",
          "rank": 3
        },
        "mistral-large": {
          "total": 22.7,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 21.9,
          "note": "Kalisman v. Friedman misapplied; Restatement cite irrelevant; cap range debatable",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 23.0,
          "quality": 6.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 8.2,
          "note": "Adequate but shallow; misses discount-only SAFE option and dilution math.",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 47.2,
          "note": "Solid, actionable founder guidance; no hallucinated cites; minor YC SAFE nuances missed.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 72.9,
          "note": "Strong practical memo; DGCL §220 cite slightly misapplied but not hallucinated.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 27.7,
          "note": "Accurate, well-structured; DGCL §141 cite is legitimate and relevant.",
          "rank": 2
        }
      }
    },
    {
      "id": 23,
      "category": "International Trade",
      "use_case": "Sanctions Compliance",
      "models": {
        "claude-opus-4.8": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 42.1,
          "note": "Thorough, well-structured; minor uncertainty on Sept 2024 BIS software determination specifics.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 57.6,
          "note": "Thorough, well-structured; GL25 characterization slightly overbroad but defensible.",
          "rank": 7
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 119.0,
          "note": "Thorough, well-structured; minor uncertainty on 2024 OFAC determination specifics.",
          "rank": 8
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 22.5,
          "note": "Thorough, well-structured; minor EAR99 SaaS carve-out nuance understated.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 7.4,
          "note": "Solid practical guidance; misses UAE local sanctions and DIFC considerations.",
          "rank": 5
        },
        "mistral-large": {
          "total": 26.8,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 42.5,
          "note": "Strong structure; § 746.8 EAR99 blanket license claim slightly overstated",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 26.0,
          "quality": 7.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 6.3,
          "note": "Misses Russia sectoral sanctions, OFAC General License analysis, and SDN screening depth.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 63.0,
          "note": "Thorough, accurate OFAC/EAR analysis; practical exclusion recommendation is sound.",
          "rank": 4
        },
        "qwen3.7-max": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 107.2,
          "note": "Thorough, well-structured; FAQ 1054 plausible but unverified; anti-boycott clause excellent.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 29.2,
          "note": "Strong analysis; GL 25D letter designation and FAQ 1187 need verification.",
          "rank": 3
        }
      }
    },
    {
      "id": 24,
      "category": "International Trade",
      "use_case": "Distribution Agreement",
      "models": {
        "claude-opus-4.8": {
          "total": 30.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 62.8,
          "note": "Exceptional MENA-specific caveats, operative clauses, and cross-jurisdictional risk flags.",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 121.6,
          "note": "Comprehensive, jurisdiction-aware draft with accurate MENA law flags and Cavendish cite.",
          "rank": 8
        },
        "gpt-5.5": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 95.4,
          "note": "Comprehensive, jurisdiction-aware, production-ready with accurate MENA agency law citations.",
          "rank": 4
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 13.0,
          "note": "Comprehensive, jurisdiction-specific, well-structured; minor gaps in churn cure periods.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 6.4,
          "note": "Solid template; lacks UAE/KSA agency law specifics and termination indemnity warnings.",
          "rank": 7
        },
        "mistral-large": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 36.4,
          "note": "UAE Federal Decree-Law No. 34/2021 misapplied; cybercrime law cited incorrectly.",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 24.0,
          "quality": 6.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 5.4,
          "note": "Solid structure but lacks MENA-specific nuance, cure periods, and termination triggers.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 28.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 62.6,
          "note": "Thorough, jurisdiction-aware draft with appropriate caveats on mandatory local laws.",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 79.7,
          "note": "Thorough, jurisdiction-specific, actionable; minor uncertainty on KSA 2022 agency law details.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 25.8,
          "note": "Excellent MENA-specific drafting; commercial agency trap and data laws correctly flagged.",
          "rank": 2
        }
      }
    },
    {
      "id": 25,
      "category": "Regulatory",
      "use_case": "Force Majeure Post-COVID",
      "models": {
        "claude-opus-4.8": {
          "total": 26.7,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 46.6,
          "note": "Several US cases unverifiable/likely hallucinated; English cases mostly solid",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 22.4,
          "quality": 8.0,
          "accuracy": 4.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 91.9,
          "note": "Multiple unverifiable/likely fabricated citations severely undermine otherwise strong analysis",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 24.0,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 162.4,
          "note": "Hitz citation appears fabricated; 616 B.R. 374 unverifiable, penalized accordingly.",
          "rank": 3
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 32.8,
          "note": "",
          "rank": 8
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 11.0,
          "note": "JN Contemporary Art 2d Cir. citation needs verification; core analysis solid",
          "rank": 1
        },
        "mistral-large": {
          "total": 23.2,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 47.3,
          "note": "Newcastle United case, Rajah & Tann cite, Atlantic Lottery appear fabricated/misapplied",
          "rank": 4
        },
        "llama-4-maverick": {
          "total": 16.0,
          "quality": 4.0,
          "accuracy": 2.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 8.1,
          "note": "Multiple fabricated citations; RTI v MUR facts wrong; Murphy cite invented",
          "rank": 7
        },
        "deepseek-v3.2": {
          "total": 18.9,
          "quality": 6.0,
          "accuracy": 2.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 63.6,
          "note": "Multiple fabricated case citations severely undermine otherwise well-structured analysis.",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 2.3,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 2.3,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 57.8,
          "note": "",
          "rank": 10
        },
        "gemini-3.1-pro": {
          "total": 2.8,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 2.8,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 49.7,
          "note": "",
          "rank": 9
        }
      }
    },
    {
      "id": 26,
      "category": "Regulatory",
      "use_case": "Open Source License Compatibility",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 42.1,
          "note": "Thorough, accurate, well-structured; correctly flags unsettled derivative-work doctrine.",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 48.8,
          "note": "Jacobsen and Hellwig citations accurate; AGPL/GPL asymmetry correctly identified.",
          "rank": 8
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 137.5,
          "note": "Jacobsen and Artifex citations verified; thorough GPL/AGPL SaaS analysis.",
          "rank": 7
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 15.7,
          "note": "Thorough, well-structured, accurate GPL/AGPL analysis with actionable separation strategies.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 32.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.3,
          "note": "Thorough, accurate GPL/AGPL analysis; correctly flags SaaS vs on-premise distinction.",
          "rank": 1
        },
        "mistral-large": {
          "total": 24.7,
          "quality": 8.0,
          "accuracy": 4.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 32.9,
          "note": "Galoob, Newegg v. Ezra, Artifex citations are hallucinated or misapplied",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 21.6,
          "quality": 6.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 8.6,
          "note": "AGPL/GPLv3 compatibility claim is wrong; they are actually compatible.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 45.8,
          "note": "Thorough, accurate GPL/AGPL/SaaS analysis with actionable mitigation strategies.",
          "rank": 4
        },
        "qwen3.7-max": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 68.9,
          "note": "Jacobsen and Artifex citations verified; ML dataset taint risk non-obvious.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 29.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 38.8,
          "note": "Thorough, accurate GPL/AGPL analysis with actionable isolation strategies provided.",
          "rank": 5
        }
      }
    },
    {
      "id": 27,
      "category": "Regulatory",
      "use_case": "Fintech BNPL Compliance",
      "models": {
        "claude-opus-4.8": {
          "total": 29.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 45.8,
          "note": "Accurate, well-structured; Adar Bays and CashCall citations verifiable and correct.",
          "rank": 4
        },
        "claude-sonnet-4.6": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 125.6,
          "note": "Exceptional depth; cases verified; OCC rule vacatur correctly noted; APR risk flagged",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 159.0,
          "note": "Solid B2B BNPL analysis; answer truncated before completing all five required topics.",
          "rank": 8
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 19.2,
          "note": "Madden and Avant citations verified; OCC CRA repeal accurate; strong output.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 8.9,
          "note": "CashCall cite format questionable; Western Sky IL citation unverifiable as stated",
          "rank": 5
        },
        "mistral-large": {
          "total": 24.8,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 52.1,
          "note": "Multiple hallucinated/misrepresented citations undermine otherwise strong structural analysis",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 21.0,
          "quality": 6.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 7.8,
          "note": "Loper v. Prosper Funding cite appears fabricated; CFL §22802 misattributed",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 73.8,
          "note": "Madden cited correctly but tangentially; strong B2B BNPL analysis overall",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 119.5,
          "note": "CashCall v. Morrisey citation unverified; Think Finance citation details questionable.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 30.1,
          "note": "Solid B2B BNPL analysis; CashCall cite accurate; Georgia CFDL questionable.",
          "rank": 2
        }
      }
    },
    {
      "id": 28,
      "category": "Consumer Protection",
      "use_case": "ToS Audit",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 40.2,
          "note": "Rigorous tri-jurisdiction analysis; DSA, CRA, FAA cites verified and accurate.",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 79.3,
          "note": "Comprehensive, well-cited; minor risk on some lower-court cite verifiability",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 111.3,
          "note": "Comprehensive tri-jurisdiction audit; cites verifiable; replacement drafting highly actionable.",
          "rank": 6
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 17.1,
          "note": "Thorough tri-jurisdiction analysis; all cited cases and directives verifiable.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 31.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 6.2,
          "note": "Solid tri-jurisdiction analysis; Douglas cite plausible but minor risk.",
          "rank": 2
        },
        "mistral-large": {
          "total": 24.7,
          "quality": 7.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 26.0,
          "note": "Answer cut off mid-sentence; clause 3 entirely missing; otherwise solid analysis",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 24.6,
          "quality": 7.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 7.0,
          "note": "Solid framework but lacks actionable redlines and misattributes Nguyen holding",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 124.6,
          "note": "Thorough, well-structured; minor case law application nuances but no hallucinations.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 92.8,
          "note": "Douglas v. US Dist. Court cited incorrectly; Zappos cite unverifiable/hallucinated",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 29.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 26.7,
          "note": "Rodman v. Safeway and Douglas v. Talk America are real cases.",
          "rank": 5
        }
      }
    },
    {
      "id": 29,
      "category": "Consumer Protection",
      "use_case": "Cookie Consent Compliance",
      "models": {
        "claude-opus-4.8": {
          "total": 29.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 34.1,
          "note": "Thorough, well-structured; enforcement figures and dates appear accurate and verifiable.",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 79.6,
          "note": "Comprehensive, well-structured; enforcement figures and citations appear verifiable.",
          "rank": 7
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 116.2,
          "note": "Thorough, well-structured; IAB Europe CJEU case citation needs verification.",
          "rank": 8
        },
        "o3": {
          "total": 29.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 13.5,
          "note": "Solid analysis; CNIL fines slightly imprecise but directionally correct.",
          "rank": 4
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 8.2,
          "note": "Planet49 and Orange România correctly cited; Deutsche Telekom C-129/21 accurate.",
          "rank": 2
        },
        "mistral-large": {
          "total": 25.8,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 41.8,
          "note": "Spanish AEPD €10M Google fine and some enforcement figures unverifiable/inaccurate",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 26.0,
          "quality": 7.0,
          "accuracy": 8.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 6.1,
          "note": "Solid analysis, Planet49 cite correct, but lacks UK/DPA cross-jurisdictional depth.",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 47.3,
          "note": "CNIL fines verified; compliant draft is thorough and actionable.",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 73.4,
          "note": "Accurate, thorough, actionable; minor overstatement on Garante/AEPD specifics.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 22.8,
          "note": "Solid analysis; CNIL deliberation numbers slightly imprecise but directionally correct.",
          "rank": 1
        }
      }
    },
    {
      "id": 30,
      "category": "Real Estate",
      "use_case": "Commercial Lease Review",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 34.4,
          "note": "Kendall v. Pestana and CA/UK statutes correctly cited; thorough tenant revisions.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 56.2,
          "note": "Slavin and Jack Frost citations unverified/likely hallucinated; Kendall accurate",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 93.7,
          "note": "Kendall v. Pestana and CA Civil Code §1995.260 are accurate citations.",
          "rank": 4
        },
        "o3": {
          "total": 26.1,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 13.8,
          "note": "Two likely hallucinated citations undermine otherwise excellent practical analysis.",
          "rank": 8
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 6.1,
          "note": "Kendall v. Ernest Pestana is real CA case; well-structured, actionable output.",
          "rank": 1
        },
        "mistral-large": {
          "total": 23.7,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 32.3,
          "note": "Multiple fabricated case citations across NY, CA, and Ontario jurisdictions.",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 25.0,
          "quality": 7.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 5.6,
          "note": "Kendall case real; Cal. Civ. Code cite plausible but profit-share analysis shallow",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 26.3,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 47.5,
          "note": "Slavin misapplied; Kendall cite accurate; revision clause is excellent.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 74.0,
          "note": "Kendall and Cal. Civ. Code §1995.250 citations verified and accurate.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 29.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 32.9,
          "note": "Kendall and Dress Shirt Sales citations appear accurate and jurisdiction-appropriate.",
          "rank": 3
        }
      }
    },
    {
      "id": 31,
      "category": "Legal Operations",
      "use_case": "Canned Responses",
      "models": {
        "claude-opus-4.8": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 42.5,
          "note": "Zubulake and FreecycleSunnyvale are real, verifiable cases; solid output.",
          "rank": 6
        },
        "claude-sonnet-4.6": {
          "total": 27.0,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 65.0,
          "note": "Freecycle Network v. Rambler cite is incorrect/hallucinated; core substance strong",
          "rank": 9
        },
        "gpt-5.5": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 32.9,
          "note": "Production-ready, well-structured; FRCP 37(e) cite accurate and appropriate.",
          "rank": 3
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 10.8,
          "note": "Production-ready templates; correct FRCP cites; Lanham Act reference accurate.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 6.1,
          "note": "Solid, production-ready templates; lacks GDPR/CCPA specificity and litigation risk flags.",
          "rank": 4
        },
        "mistral-large": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 30.6,
          "note": "Zubulake cite accurate; AICPA ET§301 slightly misapplied but defensible",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 24.0,
          "quality": 6.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 4.3,
          "note": "Functional but generic; misses key risks like litigation hold scope, IP indemnity.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 40.2,
          "note": "Production-ready templates; correct SCCs cite; solid preservation hold caveats.",
          "rank": 5
        },
        "qwen3.7-max": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 54.9,
          "note": "Zubulake and Dawn Donut citations are real and correctly applied.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 22.7,
          "note": "Accurate citations, practical options, strong disclaimers, minor trademark nuance missing.",
          "rank": 2
        }
      }
    },
    {
      "id": 32,
      "category": "Legal Operations",
      "use_case": "Legal Department OKRs",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 30.3,
          "note": "Actionable, well-caveated OKRs with correct GDPR/CCPA statutory references.",
          "rank": 7
        },
        "claude-sonnet-4.6": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 69.5,
          "note": "Comprehensive, measurable OKRs with appropriate caveats and realistic implementation guidance.",
          "rank": 10
        },
        "gpt-5.5": {
          "total": 31.2,
          "quality": 9.0,
          "accuracy": 10.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 28.5,
          "note": "Comprehensive, measurable, role-mapped OKRs; practical for Series B legal teams.",
          "rank": 1
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.4,
          "note": "Highly actionable, measurable KRs with realistic baselines and clear ownership.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 30.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 5.9,
          "note": "Measurable, realistic OKRs; lacks cross-jurisdictional and risk nuance.",
          "rank": 4
        },
        "mistral-large": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 32.6,
          "note": "Comprehensive, measurable OKRs with realistic baselines and strong cross-functional context.",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 30.0,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 5.0,
          "note": "Solid, actionable OKRs with measurable KRs; outside counsel satisfaction metric is odd.",
          "rank": 5
        },
        "deepseek-v3.2": {
          "total": 29.9,
          "quality": 9.0,
          "accuracy": 10.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 34.9,
          "note": "Measurable, realistic OKRs with strong implementation caveats and jurisdiction awareness.",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 67.3,
          "note": "Berman cite verified; GDPR/UCC/ABA rules correctly applied throughout.",
          "rank": 9
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 23.2,
          "note": "Production-ready OKRs with correct legal references and strong operational guidance.",
          "rank": 3
        }
      }
    },
    {
      "id": 33,
      "category": "Incident Response",
      "use_case": "Breach Notification",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 46.3,
          "note": "Production-ready, EDPB-grounded, hashing analysis and one-stop-shop flags excellent.",
          "rank": 4
        },
        "claude-sonnet-4.6": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 122.2,
          "note": "BA/Marriott fines are real; hashing-algorithm risk flag is excellent.",
          "rank": 8
        },
        "gpt-5.5": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 108.8,
          "note": "Comprehensive, legally precise, production-ready; NIS2 and one-stop-shop correctly flagged.",
          "rank": 7
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 20.8,
          "note": "Comprehensive, GDPR-accurate, production-ready; minor one-stop-shop analysis could deepen.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.2,
          "note": "Comprehensive, legally accurate GDPR breach response; minor gap on Art.55(1) cross-border.",
          "rank": 1
        },
        "mistral-large": {
          "total": 26.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 31.1,
          "note": "Solid structure; WP29/EDPB cites plausible but unverified; Art.34 high-risk analysis thin.",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 24.0,
          "quality": 6.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.0,
          "note": "Missing 72-hour deadline mechanics, DPO role, risk assessment, and lead-SA analysis.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 82.1,
          "note": "Comprehensive, jurisdiction-aware, production-ready templates with strong legal framing.",
          "rank": 5
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 89.0,
          "note": "Österreichische Post citation accurate; WP250rev.01 reference slightly imprecise but defensible.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 32.9,
          "note": "Comprehensive, accurate GDPR drafts; EDPB Guidelines correctly cited; production-ready.",
          "rank": 3
        }
      }
    },
    {
      "id": 34,
      "category": "Incident Response",
      "use_case": "Cyber Insurance Claim",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 66.0,
          "note": "Merck cite accurate; Travelers v. ICS less verifiable but flagged appropriately",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 26.4,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 130.4,
          "note": "Merck appellate cite vague; Apache/Ernst cases misapplied but core analysis sound",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 139.8,
          "note": "Comprehensive, accurate citations; OFAC sanctions angle adds practical value.",
          "rank": 4
        },
        "o3": {
          "total": 29.1,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 17.3,
          "note": "Merck v. ACE citation and Travelers form reference unverifiable/potentially hallucinated",
          "rank": 2
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 7.5,
          "note": "Merck v. ACE citation accurate; exclusion analysis nuanced and jurisdiction-aware.",
          "rank": 3
        },
        "mistral-large": {
          "total": 3.2,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.2,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 58.9,
          "note": "",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 6.0,
          "accuracy": 2.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 6.9,
          "note": "Multiple fabricated case citations severely undermine legal credibility and accuracy.",
          "rank": 8
        },
        "deepseek-v3.2": {
          "total": 24.3,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 73.5,
          "note": "Mondelez v. Zurich citation details are inaccurate/unverifiable as stated.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 1.9,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 1.9,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 80.3,
          "note": "",
          "rank": 10
        },
        "gemini-3.1-pro": {
          "total": 26.7,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 29.5,
          "note": "Travelers cite unverifiable; Merck reporter citation imprecise but case real.",
          "rank": 5
        }
      }
    },
    {
      "id": 35,
      "category": "Intellectual Property",
      "use_case": "Patent vs Trade Secret",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 40.7,
          "note": "Accurate case law, strong hybrid strategy, excellent DTSA whistleblower notice flag.",
          "rank": 5
        },
        "claude-sonnet-4.6": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 58.9,
          "note": "Accurate citations, thorough hybrid strategy, strong practical risk flags throughout.",
          "rank": 8
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 123.4,
          "note": "Comprehensive, accurate, well-cited; minor gap on foreign patent strategy.",
          "rank": 10
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 11.7,
          "note": "Accurate citations, thorough hybrid strategy, actionable sequencing for startup context.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 33.0,
          "quality": 9.0,
          "accuracy": 10.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 7.8,
          "note": "Accurate, well-structured, cites verified; minor gap on hybrid strategy options.",
          "rank": 1
        },
        "mistral-large": {
          "total": 28.2,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 38.4,
          "note": "Religious Tech v. Netcom misapplied; Sino Legend cite questionable for trade secrets",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 8.3,
          "note": "Solid analysis, accurate citations, but lacks cross-jurisdictional and hybrid strategy depth.",
          "rank": 6
        },
        "deepseek-v3.2": {
          "total": 29.9,
          "quality": 9.0,
          "accuracy": 10.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 64.8,
          "note": "Thorough, accurate, well-structured; hybrid strategy recommendation adds strong actionability.",
          "rank": 4
        },
        "qwen3.7-max": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 79.2,
          "note": "Comprehensive, accurate, well-cited hybrid strategy memo with strong cross-jurisdictional analysis.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 25.0,
          "note": "Accurate case law, strong hybrid strategy, excellent § 101 analysis.",
          "rank": 3
        }
      }
    },
    {
      "id": 36,
      "category": "Edge Case",
      "use_case": "Conflicting Jurisdiction",
      "models": {
        "claude-opus-4.8": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 32.8,
          "note": "Weltimmo cite accurate; Ministers case real but tangential; analysis thorough",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 69.1,
          "note": "Exceptional multi-regime analysis; Welsbach and Cooney citations verifiable and apt.",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 137.3,
          "note": "Exceptional multi-jurisdictional analysis; case citations verifiable and correctly applied.",
          "rank": 4
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 17.3,
          "note": "Rigorous multi-regime analysis; Google Spain and Schrems II correctly cited.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 7.0,
          "note": "Verein cite is real but tangential; UK GDPR angle underdeveloped",
          "rank": 6
        },
        "mistral-large": {
          "total": 25.8,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 34.3,
          "note": "Weltimmo citation misapplied; Schrems II framing inaccurate; UK adequacy date wrong",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 22.0,
          "quality": 6.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 6.2,
          "note": "Bremen v. Zapata misapplied; GDPR applies by establishment, not citizenship",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 63.2,
          "note": "Solid multi-layer analysis; Rome I applicability to arbitration slightly overstated.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 78.3,
          "note": "Eco Swiss and Wirtschaftsakademie correctly cited; Rome I analysis strong.",
          "rank": 8
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 28.7,
          "note": "Weltimmo cite is real CJEU case; Rome I analysis correct and precise.",
          "rank": 2
        }
      }
    },
    {
      "id": 37,
      "category": "Edge Case",
      "use_case": "Ethical Boundary",
      "models": {
        "claude-opus-4.8": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 28.2,
          "note": "Thorough, well-structured, accurate Model Rules analysis with actionable practical guidance.",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 26.9,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 52.1,
          "note": "Eisenberg cite unverified/vague; otherwise thorough, well-structured Model Rules analysis",
          "rank": 8
        },
        "gpt-5.5": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 53.8,
          "note": "Thorough, well-structured, accurate Model Rules analysis with actionable client guidance.",
          "rank": 5
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 10.5,
          "note": "Thorough, well-structured analysis with actionable guidance and correct Rule citations.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 7.0,
          "note": "Solid Rule-by-Rule analysis; candor framing slightly narrow but corrected.",
          "rank": 4
        },
        "mistral-large": {
          "total": 2.8,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 2.8,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 36.8,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 23.0,
          "quality": 6.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 4.5,
          "note": "Adequate but shallow; odd boxed conclusion undermines professionalism significantly",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 27.3,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 48.2,
          "note": "Thorough Model Rules analysis; no hallucinated cites; alternatives section practical.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 63.4,
          "note": "Thorough, well-structured analysis; correctly cites MRPC rules without fabrication.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 25.0,
          "note": "Thorough, well-structured analysis with actionable drafting alternative and correct rule citations.",
          "rank": 2
        }
      }
    },
    {
      "id": 38,
      "category": "Edge Case",
      "use_case": "Hallucination Trap",
      "models": {
        "claude-opus-4.8": {
          "total": 29.2,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 17.0,
          "note": "Exemplary refusal; verified foundational cites; practical research guidance provided",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 27.1,
          "quality": 7.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 9.8,
          "note": "Honest refusal avoids hallucination; loses points for incomplete actionability.",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 17.4,
          "quality": 5.0,
          "accuracy": 3.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 111.9,
          "note": "Oberstein real; Klarna and Subway citations likely hallucinated or materially wrong.",
          "rank": 6
        },
        "o3": {
          "total": 3.7,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.7,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 15.5,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 10.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 9.2,
          "note": "Correctly refused to fabricate cases; cited real precedent instead.",
          "rank": 1
        },
        "mistral-large": {
          "total": 13.8,
          "quality": 4.0,
          "accuracy": 1.0,
          "speed": 2.8,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 19.4,
          "note": "All three cited cases appear fabricated; citations unverifiable or plainly false.",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 13.0,
          "quality": 3.0,
          "accuracy": 2.0,
          "speed": 5.0,
          "style": 2.0,
          "creativity": 1.0,
          "halluc": true,
          "latency": 4.0,
          "note": "Lee v. Ticketmaster citation appears fabricated; honest admission of limits undermined by hallucination",
          "rank": 8
        },
        "deepseek-v3.2": {
          "total": 21.3,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 36.3,
          "note": "Google Assistant and Sellers v. JustAnswer citations appear fabricated or unverifiable.",
          "rank": 5
        },
        "qwen3.7-max": {
          "total": 25.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 132.5,
          "note": "Correctly refused hallucination; cited real older cases with accurate holdings.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 11.9,
          "quality": 3.0,
          "accuracy": 1.0,
          "speed": 1.9,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 48.2,
          "note": "Oberstein is real but 2023 framing misleads; Edmundson v. Klarna fabricated",
          "rank": 9
        }
      }
    },
    {
      "id": 39,
      "category": "Edge Case",
      "use_case": "Ambiguous Facts",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 42.9,
          "note": "Thorough, well-structured; case citations real but outcomes correctly flagged as unresolved.",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 67.6,
          "note": "Exceptional structure; real cases cited accurately; EU AI Act correctly flagged.",
          "rank": 7
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 84.4,
          "note": "Exceptional triage framework; case law verified; EU AI Act correctly flagged.",
          "rank": 6
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 14.4,
          "note": "Jacobsen v. Katzer correctly cited; thorough licence taxonomy; actionable next steps.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 6.8,
          "note": "Thorough, well-structured, correctly flags AGPL/copyleft and RAIL license nuances.",
          "rank": 1
        },
        "mistral-large": {
          "total": 28.2,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 30.1,
          "note": "Galoob v. Nintendo misapplied; Progress Software v. MySQL citation questionable here",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 26.0,
          "quality": 7.0,
          "accuracy": 8.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 5.9,
          "note": "Solid framework but misses AI-specific model weights licensing nuances entirely.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 53.4,
          "note": "Thorough, well-structured; correctly flags AGPL SaaS risk and RAIL licenses.",
          "rank": 5
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 64.5,
          "note": "Jacobsen v. Katzer and Getty v. Stability AI are real, verifiable cases.",
          "rank": 9
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 25.2,
          "note": "Artifex and Thaler cites are real; Llama MAU threshold accurate.",
          "rank": 4
        }
      }
    },
    {
      "id": 40,
      "category": "Edge Case",
      "use_case": "Multi-Stakeholder Conflict",
      "models": {
        "claude-opus-4.8": {
          "total": 28.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 51.3,
          "note": "Solid memo; Apple/BCBS cites real but contextually stretched for scale caveat.",
          "rank": 4
        },
        "claude-sonnet-4.6": {
          "total": 25.0,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 129.3,
          "note": "Biotronik cite unverifiable/likely hallucinated; UCITA applicability overstated",
          "rank": 8
        },
        "gpt-5.5": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 91.9,
          "note": "Exceptional depth; bankruptcy code cites accurate; interaction risks well-analyzed",
          "rank": 5
        },
        "o3": {
          "total": 29.1,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 14.8,
          "note": "Akamai/Soasta 2016 diligence report cited without verification; Robinson-Patman application overstated",
          "rank": 3
        },
        "grok-4.3": {
          "total": 32.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.5,
          "note": "Thorough, actionable memo; antitrust MFC note adds value; no hallucinations.",
          "rank": 1
        },
        "mistral-large": {
          "total": 23.2,
          "quality": 8.0,
          "accuracy": 4.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 36.5,
          "note": "Sony/Tenenbaum, Leegin misapplied; SAS Institute cite inaccurate; antitrust analysis flawed",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 20.6,
          "quality": 6.0,
          "accuracy": 4.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 8.6,
          "note": "Hill v. Gateway misapplied; MFC antitrust analysis superficial; missing IP/escrow depth",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 62.9,
          "note": "Thorough, actionable memo; lacks cross-jurisdictional nuance and insurance angle.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 72.9,
          "note": "Accurate statutes, strong framework, minor Robinson-Patman SaaS caveat oversimplified",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 34.6,
          "note": "Solid memo; Robinson-Patman software caveat accurate; escrow 365(n) cite correct.",
          "rank": 2
        }
      }
    },
    {
      "id": 41,
      "category": "Compliance / Due Diligence",
      "use_case": "Regulatory Change Impact Assessment",
      "models": {
        "claude-opus-4.8": {
          "total": 31.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 49.1,
          "note": "Precise citations, live-obligation framing, parallel-regime conflicts expertly flagged.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 110.4,
          "note": "Some delegated regulation citations unverifiable; RTS numbering may be imprecise.",
          "rank": 7
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 137.1,
          "note": "Comprehensive, well-cited DORA analysis with accurate regulatory references and practical milestones.",
          "rank": 8
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 18.0,
          "note": "Comprehensive, accurate DORA analysis with correct citations and actionable milestones.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 30.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 7.6,
          "note": "Accurate, well-structured DORA analysis with correct article citations and timeline.",
          "rank": 4
        },
        "mistral-large": {
          "total": 27.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 53.3,
          "note": "Solid DORA analysis; incident reporting 4-hour timeline needs verification against RTS.",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 23.0,
          "quality": 6.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.4,
          "note": "Timeline inaccurate; DORA applies January 2025, not vaguely '2024'.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 61.4,
          "note": "Accurate, well-structured DORA analysis with correct articles and realistic timeline.",
          "rank": 5
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 83.5,
          "note": "Delegated Reg 2024/1502 citation needs verification; otherwise thorough and actionable.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 31.7,
          "note": "Accurate DORA citations, strong structure, actionable milestones, solid cross-regulatory analysis.",
          "rank": 3
        }
      }
    },
    {
      "id": 42,
      "category": "Compliance / Due Diligence",
      "use_case": "Convert Law Into Checklist",
      "models": {
        "claude-opus-4.8": {
          "total": 31.2,
          "quality": 9.0,
          "accuracy": 10.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 35.1,
          "note": "Comprehensive, legally precise, well-structured; WP248 and Art.36 timelines correct.",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 30.4,
          "quality": 10.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 70.8,
          "note": "Exceptional depth, accurate citations, actionable structure, strong cross-jurisdictional awareness.",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 30.9,
          "quality": 10.0,
          "accuracy": 10.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 67.6,
          "note": "Exhaustive, production-ready DPIA checklist with precise Article citations throughout.",
          "rank": 5
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 10.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 8.9,
          "note": "Thorough, accurate GDPR Art.35 checklist; minor gap on EDPB WP248 guidance.",
          "rank": 4
        },
        "grok-4.3": {
          "total": 32.0,
          "quality": 9.0,
          "accuracy": 10.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 5.5,
          "note": "Accurate, well-structured, actionable checklist with correct article citations throughout.",
          "rank": 1
        },
        "mistral-large": {
          "total": 24.8,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 41.7,
          "note": "Schrems II misapplied; C-708/18 and WP29 Opinion 2/2009 misused",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 25.6,
          "quality": 7.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 5.9,
          "note": "Solid but misses supervisory authority blacklists and DPO seek-advice distinction.",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 58.5,
          "note": "Thorough, accurate DPIA checklist with correct EDPB guidance and jurisdiction notes.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 79.8,
          "note": "Comprehensive, accurate, well-structured DPIA checklist with strong cross-jurisdictional analysis.",
          "rank": 8
        },
        "gemini-3.1-pro": {
          "total": 31.7,
          "quality": 9.0,
          "accuracy": 10.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 29.4,
          "note": "Thorough, accurate GDPR Art.35 checklist with correct article citations throughout.",
          "rank": 2
        }
      }
    },
    {
      "id": 43,
      "category": "Compliance / Due Diligence",
      "use_case": "Vendor Risk Assessment",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 55.3,
          "note": "Schrems II, SCCs, DPF, DORA all correctly cited; production-ready output.",
          "rank": 4
        },
        "claude-sonnet-4.6": {
          "total": 31.4,
          "quality": 10.0,
          "accuracy": 10.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 102.2,
          "note": "Schrems II cite correct; all statutory references verified; production-ready output.",
          "rank": 2
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 104.9,
          "note": "Comprehensive, production-ready questionnaire with correct GDPR/HIPAA/PCI references throughout.",
          "rank": 9
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 12.6,
          "note": "Comprehensive, well-structured, legally grounded with correct multi-jurisdictional citations.",
          "rank": 3
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.9,
          "note": "Comprehensive, well-structured, actionable questionnaire with correct GDPR/CCPA references.",
          "rank": 1
        },
        "mistral-large": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 37.7,
          "note": "Comprehensive, well-structured, accurate citations, actionable risk ratings throughout.",
          "rank": 6
        },
        "llama-4-maverick": {
          "total": 24.0,
          "quality": 6.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.9,
          "note": "Solid baseline but thin questions, missing CCPA/SCCs, low creativity on edge risks.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 58.0,
          "note": "Comprehensive, jurisdiction-accurate, production-ready questionnaire with clear risk tiering.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 29.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 76.4,
          "note": "Exceptional depth; CLOUD Act/Schrems II tension and DORA citation add real value.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 27.4,
          "note": "Thorough, jurisdiction-accurate, production-ready with CLOUD Act and TIA flags.",
          "rank": 5
        }
      }
    },
    {
      "id": 45,
      "category": "Employment Law / International",
      "use_case": "Cross-Border Termination Advisory",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 57.6,
          "note": "Exceptional cross-border analysis; Rome I, travail dissimulé, Macron barème all correct.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 112.3,
          "note": "Exceptional depth; Barème Macron, SYNTEC, Rome I all correctly applied.",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 130.4,
          "note": "Thorough, well-structured; Rome I, Cass. soc. 2002, Tillman all verifiable.",
          "rank": 6
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 17.2,
          "note": "EG Baldwins cite unverifiable; Cass. soc. citations plausible but some unverified",
          "rank": 3
        },
        "grok-4.3": {
          "total": 30.6,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 10.4,
          "note": "Solid Rome I analysis; French mandatory law overlay correctly prioritized throughout.",
          "rank": 2
        },
        "mistral-large": {
          "total": 25.2,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 37.2,
          "note": "Multiple unverifiable French Cass. Soc. citations; Rome I post-Brexit analysis oversimplified",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 20.0,
          "quality": 5.0,
          "accuracy": 3.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 9.4,
          "note": "Wrong French Labor Code articles; Rome I analysis incomplete; TFS cite questionable",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 27.3,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 84.7,
          "note": "Articles L.151-28 to L.151-34 cited incorrectly; non-compete articles are L.1237 range",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 1.9,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 1.9,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 98.1,
          "note": "",
          "rank": 10
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 31.5,
          "note": "Cour de cassation cite unverified; Rome I UK retention framing slightly imprecise",
          "rank": 4
        }
      }
    },
    {
      "id": 46,
      "category": "Technology Law / Securities",
      "use_case": "Smart Contract Legal Analysis",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 60.8,
          "note": "Exceptional multi-jurisdictional analysis; PSA correction and Ooki DAO cite verified.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 133.0,
          "note": "Sarcuni v. bZx and some citations require verification; Quoine analysis strong",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 147.8,
          "note": "Thorough multi-jurisdiction analysis; answer truncated before completing EU/Singapore liability sections.",
          "rank": 6
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 19.5,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 31.0,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 7.8,
          "note": "Solid multi-jurisdictional analysis; PSA correction notable; DAO standing appropriately hedged.",
          "rank": 1
        },
        "mistral-large": {
          "total": 25.2,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 49.9,
          "note": "Multiple hallucinated/misattributed cases; Kramer, BGH NJW, Lim Teck Chye unverified",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 20.6,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 9.5,
          "note": "Superficial analysis; lacks depth on DAO liability, standing, code-law conflict",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 25.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 83.4,
          "note": "Rohr v. Reliance cite unverifiable; SEC DAO Report 2017 accurate reference",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 83.2,
          "note": "Sarcuni cite accurate; B2C2 cite accurate; Morrison caveat well-handled.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 39.5,
          "note": "Eisenberg cite accurate; Sarcuni/Ooki DAO cites verifiable; strong cross-jurisdictional depth.",
          "rank": 3
        }
      }
    },
    {
      "id": 47,
      "category": "Dispute Resolution / Construction Law",
      "use_case": "Construction Lien & Insurance Analysis",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 68.4,
          "note": "Thorough, well-structured; minor uncertainty on post-2022 subsection precision flagged appropriately.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 123.3,
          "note": "Hughes Wood Products cite unverifiable; Kroger v. Keng application slightly misstated",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 26.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 168.2,
          "note": "Solid Texas framework; answer truncated mid-sentence on manufacturer defenses.",
          "rank": 6
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 18.8,
          "note": "",
          "rank": 9
        },
        "grok-4.3": {
          "total": 29.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 8.6,
          "note": "Solid statutory grounding, no hallucinated cases, minor gaps in cross-issue analysis.",
          "rank": 2
        },
        "mistral-large": {
          "total": 3.2,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.2,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 57.4,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 21.6,
          "quality": 6.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 8.9,
          "note": "Tex. Ins. Code §§ 1512.101 and 554.051 citations appear fabricated or misapplied.",
          "rank": 8
        },
        "deepseek-v3.2": {
          "total": 25.3,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 69.8,
          "note": "BMW v. Gore cited irrelevantly; Timpte citation plausible but misapplied here",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 99.1,
          "note": "Mid-Continent cite verifiable; lien accrual nuance and FNC analysis strong.",
          "rank": 3
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 27.9,
          "note": "Asahi/Nicastro cited correctly; retainage and lien deadlines accurate.",
          "rank": 4
        }
      }
    },
    {
      "id": 48,
      "category": "IP Law / Antitrust",
      "use_case": "FRAND Patent Licensing Dispute",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 56.6,
          "note": "Exceptional depth; ISO/RAND distinction, policy flux, and Qualcomm limits correctly flagged.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 121.8,
          "note": "Exceptional depth; Posner Apple v. Motorola characterization slightly imprecise but defensible.",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 173.1,
          "note": "Comprehensive, accurate SEP/FRAND analysis; TCL cite noted but handled carefully.",
          "rank": 8
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 19.7,
          "note": "Continental v. Avanci cite slightly misapplied but overall excellent analysis.",
          "rank": 3
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.0,
          "note": "Accurate, well-structured, cites verified; Qualcomm and Rambus applied correctly.",
          "rank": 1
        },
        "mistral-large": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 47.5,
          "note": "Solid framework; VirnetX SSPPU application slightly overstated; FTC v. Qualcomm accurate.",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 18.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 2.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 6.6,
          "note": "Superficial analysis, wrong final answer format, lacks actionable FRAND methodology depth",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 92.8,
          "note": "Rigorous, well-cited analysis; FTC v. Qualcomm characterization slightly oversimplified.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 128.3,
          "note": "Comprehensive, well-cited; minor overstatement on EU excessive pricing likelihood.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 38.4,
          "note": "TCL v. Ericsson vacated caveat noted; HTC v. Ericsson citation verified accurate.",
          "rank": 4
        }
      }
    },
    {
      "id": 49,
      "category": "Environmental Law / ESG",
      "use_case": "Cross-Border Carbon Credit Dispute",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 65.7,
          "note": "Exceptional cross-jurisdictional analysis; Rome I conflict flag and CSRD Omnibus caveat outstanding",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 115.6,
          "note": "Royscot damages-as-fraud rule doubted; Springwell citation slightly misapplied; otherwise excellent",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 151.1,
          "note": "Rigorous multi-jurisdictional analysis; Lomas cite verified; Lei 15042 correctly identified.",
          "rank": 3
        },
        "o3": {
          "total": 27.1,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 16.3,
          "note": "Lei 15042/2024 article citations unverifiable; HIH/Peyman citations plausible but risky",
          "rank": 6
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.5,
          "note": "Lomas citation used cautiously; Lei 15.042 article references unverified but plausible",
          "rank": 2
        },
        "mistral-large": {
          "total": 24.2,
          "quality": 7.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 35.6,
          "note": "Lei 15042/2024 article citations unverifiable; BGH cite plausible but unconfirmed",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 21.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.0,
          "note": "Superficial analysis; misses ISDA Section 5 Events of Default specifics and NDC conflict depth.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 71.8,
          "note": "Springwell cite accurate; Lei 15.042/2024 vs 14.462 conflation minor error",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 24.9,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 110.1,
          "note": "Lei 15042/2024 misidentified; 2024 ISDA VCC Definitions fabricated; Derry v Peek valid",
          "rank": 8
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 26.0,
          "note": "Derry v Peek correctly cited; Lei 15042/2024 reference needs verification",
          "rank": 4
        }
      }
    },
    {
      "id": 50,
      "category": "Banking & Finance / Fintech",
      "use_case": "Multi-Jurisdiction Payment License Analysis",
      "models": {
        "claude-opus-4.8": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 81.5,
          "note": "Solid framework; PSD3 figures provisional; BCB capital ranges need verification",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 25.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 129.0,
          "note": "Strong structure; some regulatory details unverifiable but no fabricated case law",
          "rank": 2
        },
        "gpt-5.5": {
          "total": 24.4,
          "quality": 7.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 136.1,
          "note": "Solid framework but answer cuts off mid-sentence; missing board resolution draft",
          "rank": 5
        },
        "o3": {
          "total": 25.1,
          "quality": 7.0,
          "accuracy": 5.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 33.4,
          "note": "DIFC case CFI-036-2022 and Micula cite appear fabricated",
          "rank": 4
        },
        "grok-4.3": {
          "total": 24.0,
          "quality": 6.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 10.3,
          "note": "Solid structure but thin on PSD3 specifics and BCB capital figures need verification",
          "rank": 6
        },
        "mistral-large": {
          "total": 21.7,
          "quality": 6.0,
          "accuracy": 4.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 44.4,
          "note": "Multiple fabricated rule citations; PSD3 details speculative; BCB figures unverified",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 15.6,
          "quality": 3.0,
          "accuracy": 4.0,
          "speed": 4.6,
          "style": 2.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 29.0,
          "note": "Superficial bullet summaries; wrong capitals cited; BCB/PSD3 references unverified",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 25.8,
          "quality": 7.0,
          "accuracy": 7.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 80.5,
          "note": "Solid framework; BCB SPI mechanics slightly oversimplified; PSD3 timeline speculative.",
          "rank": 3
        },
        "qwen3.7-max": {
          "total": 24.0,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 146.9,
          "note": "Three Rivers No5 misapplied; Singapore Companies Act section wrong; DFSA capital understated",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 21.2,
          "quality": 6.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 60.3,
          "note": "US-Singapore DTA error; PSD3 capital figures speculative; answer truncated mid-sentence",
          "rank": 9
        }
      }
    },
    {
      "id": 55,
      "category": "Corporate Governance",
      "use_case": "Board Fiduciary Duty Analysis",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 82.3,
          "note": "Rigorous tri-jurisdictional analysis; correctly flags Revlon inapplicability; minor truncation.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 127.9,
          "note": "Rigorous tri-jurisdictional analysis; MFW subsidiary limitation astutely flagged; incomplete Part IV-V",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 25.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 145.6,
          "note": "Memo cut off mid-sentence; Singapore Act citation '1967' slightly anachronistic but defensible",
          "rank": 8
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 28.1,
          "note": "Corwin cite accurate; minor s191 threshold math imprecision; Revlon correctly excluded",
          "rank": 1
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.8,
          "note": "Singapore CA 1967 sections cited but Act is 1967; Chapter 9 analysis solid",
          "rank": 3
        },
        "mistral-large": {
          "total": 23.2,
          "quality": 7.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 57.4,
          "note": "Wrong Act year; fabricated SG cases; Revlon misapplied; MFW context errors",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 5.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 9.6,
          "note": "Superficial analysis; wrong CA year, weak Rule 904 and MFW application",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 93.0,
          "note": "Solid framework; Companies Act 1967 citation slightly anachronistic, MFW inapplicable to wholly-owned sub",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 25.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 123.1,
          "note": "Revlon inapplicability correctly noted; MFW minority-of-minority gap well-spotted.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 42.9,
          "note": "Anadarko cite misapplied; MFW scope overstated; otherwise solid cross-jurisdictional analysis",
          "rank": 4
        }
      }
    },
    {
      "id": 56,
      "category": "Insurance Law",
      "use_case": "Reinsurance Treaty Dispute",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 74.1,
          "note": "Spinney's and Friends Provident correctly cited; Pan Am jurisdiction caveat properly flagged.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 25.4,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 128.9,
          "note": "Shinedean, Primus, Dunlop Haywards citations unverified or misapplied; Pan Am persuasive only",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 23.0,
          "quality": 7.0,
          "accuracy": 6.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 165.3,
          "note": "Aspen v Adana citation unverifiable; answer truncated before covering issues 3-5",
          "rank": 9
        },
        "o3": {
          "total": 27.1,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 15.5,
          "note": "Pan Am v Aetna citation wrong; Milton Keynes v Nulty misapplied; R+V unverified",
          "rank": 3
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.8,
          "note": "Solid analysis; Aspen v Adana citation needs verification, core framework sound.",
          "rank": 2
        },
        "mistral-large": {
          "total": 23.7,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 29.9,
          "note": "Aspen v Adana citation wrong; Pan American jurisdiction/citation misrepresented; DFSA rules fabricated",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 17.0,
          "quality": 4.0,
          "accuracy": 3.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 7.0,
          "note": "Aspen v Adana misattributed; Pan American citation inaccurate; analysis superficial throughout",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 25.3,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 100.9,
          "note": "Aspen v Adana citation unverifiable; Pan American v Aetna citation misattributed/suspect",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 24.9,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 115.9,
          "note": "Aspen v Adana citation unverifiable; Mercantile joinder cite misapplied/suspect",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 33.4,
          "note": "Aspen v Adana citation unverifiable; s.11 analysis on notification clauses oversimplified",
          "rank": 4
        }
      }
    },
    {
      "id": 59,
      "category": "International Trade / Sanctions",
      "use_case": "Sanctions Compliance Advisory",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 83.6,
          "note": "Rigorous multi-jurisdictional analysis; correctly flags ownership/control as determinative threshold",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 24.4,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 132.1,
          "note": "Melli Bank, Rosneft, Sina Bank citations misapplied or unverifiable in context",
          "rank": 9
        },
        "gpt-5.5": {
          "total": 25.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 146.0,
          "note": "Strong EU analysis; answer truncated; UAE decree-law year correction noted appropriately.",
          "rank": 7
        },
        "o3": {
          "total": 27.1,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 25.6,
          "note": "C-72/11 Afrasiabi misapplied; Council Reg 2024/1745 unverifiable; EAR analysis oversimplified",
          "rank": 2
        },
        "grok-4.3": {
          "total": 26.6,
          "quality": 7.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 14.5,
          "note": "Commission Opinion 2017/C 236/03 and OFAC FAQ 398 citations unverified/suspect",
          "rank": 3
        },
        "mistral-large": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 68.7,
          "note": "Council Regulation 2024/1745 citation unverifiable; 31 CFR §501.603 misattributed for 50% rule",
          "rank": 5
        },
        "llama-4-maverick": {
          "total": 17.0,
          "quality": 4.0,
          "accuracy": 3.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 9.7,
          "note": "Wrong OFAC cite; shallow EAR/AWS analysis; misapplies EU nexus rationale",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 24.9,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 97.4,
          "note": "Case C-72/11 Afrasiabi cited incorrectly; regulation basis unverified",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 87.3,
          "note": "Council Regulation (EU) 2024/1745 and EU Best Practices citation unverified/potentially fabricated",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 25.7,
          "quality": 7.0,
          "accuracy": 6.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 39.4,
          "note": "Council Regulation (EU) 2024/1745 unverifiable; CEO control test overstated vs EU practice",
          "rank": 6
        }
      }
    },
    {
      "id": 60,
      "category": "Real Estate / Cross-Border Investment",
      "use_case": "Cross-Border Property Fund Structure",
      "models": {
        "claude-opus-4.8": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 86.3,
          "note": "RFMC repeal claim needs verification; MIT trading-risk flag is excellent",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 130.2,
          "note": "Technically strong, correct statutory refs, minor threshold/date imprecisions noted",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 24.0,
          "quality": 7.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 210.0,
          "note": "Strong MIT/VCC analysis but answer cuts off; FIRB/FCA/waterfall sections missing.",
          "rank": 7
        },
        "o3": {
          "total": 30.6,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 21.9,
          "note": "Comprehensive, well-structured; minor threshold figures may need current-year verification.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 29.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 10.9,
          "note": "Solid framework; FIRB threshold and RFMC AUM limit need verification.",
          "rank": 3
        },
        "mistral-large": {
          "total": 23.2,
          "quality": 7.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 68.1,
          "note": "Multiple wrong statutory refs: VCC Act s18/20, Corps Act s185/275 misapplied",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 17.1,
          "quality": 4.0,
          "accuracy": 4.0,
          "speed": 4.1,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 28.9,
          "note": "Wrong FIRB threshold, incorrect statute citations, shallow waterfall terms, missing key analysis",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 23.8,
          "quality": 7.0,
          "accuracy": 6.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 70.6,
          "note": "FIRB $0 threshold claim oversimplified; RFMC/CMS thresholds slightly mischaracterized",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 113.4,
          "note": "Exceptional cross-jurisdictional analysis; 13O/13U exclusion flag adds real value.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 36.5,
          "note": "RFMC repeal claim unverified; MIT withholding rate and FIRB thresholds mostly accurate",
          "rank": 5
        }
      }
    },
    {
      "id": 62,
      "category": "AI/Tech Regulation",
      "use_case": "Facial Recognition Liability Analysis",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 82.5,
          "note": "Exceptional multi-jurisdictional analysis; Art.5(1)(d) vs (h) correction adds value",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 130.5,
          "note": "Exceptional depth; Cothron, Rosenbach, TransUnion citations verified and accurate.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 25.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 142.4,
          "note": "Solid BIPA analysis; correctly flags Art.5(1)(e) vs (d) error; incomplete GDPR/LGPD sections",
          "rank": 8
        },
        "o3": {
          "total": 29.1,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 23.4,
          "note": "Clearview Hamburg €20M and Ginart unlearning cite require verification",
          "rank": 2
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 10.6,
          "note": "Solid analysis; LGPD Art.55-J mischaracterized as criminal; minor gaps.",
          "rank": 3
        },
        "mistral-large": {
          "total": 25.2,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 47.2,
          "note": "Breyer case misapplied; Tims cite dubious; PDPA SGD1M cap understated",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 21.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.1,
          "note": "Superficial analysis; §202a misapplied; no concrete GDPR turnover estimates given",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 82.8,
          "note": "Amazon/Meta fines cited as benchmarks, not binding precedent; technically acceptable.",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 24.9,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 97.8,
          "note": "BGH 1 StR 437/18 and Patel v. Facebook citations require verification",
          "rank": 9
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 35.8,
          "note": "SB 2979 citation unverified; hiQ/Van Buren analysis solid; BIPA math correct",
          "rank": 5
        }
      }
    },
    {
      "id": 63,
      "category": "Bankruptcy & Insolvency",
      "use_case": "Cross-Border Restructuring Opinion",
      "models": {
        "claude-opus-4.8": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 86.3,
          "note": "Re Nasmyth unverifiable; Re Houst pension cite questionable; otherwise excellent",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 124.7,
          "note": "Exceptional depth; minor risk on Part 26A Chapter 15 recognition novelty",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 155.2,
          "note": "Technically strong, truncated before completing all six required sections.",
          "rank": 5
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 22.6,
          "note": "",
          "rank": 9
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 11.1,
          "note": "Re Zetta Jet cited plausibly but Re Pacific Andes recognition context unverified",
          "rank": 2
        },
        "mistral-large": {
          "total": 3.2,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.2,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 70.4,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 21.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 8.8,
          "note": "Superficial treatment; misses TPR cram-down limits, COMI rebuttal risks, sequencing nuance",
          "rank": 8
        },
        "deepseek-v3.2": {
          "total": 25.8,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 78.2,
          "note": "Bloom v Pensions Regulator citation incorrect; In re Mirant misapplied here",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 123.3,
          "note": "Re Design Studio Group [2023] SGHC 144 CCCD citation unverifiable; Avanti caveat needed",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 26.7,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 40.8,
          "note": "Re Fairfield Sentry and In re West Electronics citations are hallucinated/misapplied",
          "rank": 6
        }
      }
    },
    {
      "id": 64,
      "category": "Government Contracts & Procurement",
      "use_case": "Defense Procurement Compliance Advisory",
      "models": {
        "claude-opus-4.8": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 90.8,
          "note": "Exceptional cross-jurisdictional depth; minor AUKUS ITAR cite imprecision noted.",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 135.3,
          "note": "Exceptional depth; minor CJEU case citations need verification but plausible",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 158.7,
          "note": "Thorough US analysis; EU/Australian/AUKUS sections incomplete but foundations solid",
          "rank": 6
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 26.6,
          "note": "Comprehensive, well-structured; minor CFIUS mandatory threshold nuance oversimplified.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.9,
          "note": "Solid multi-jurisdictional memo; AUKUS and CFIUS nuances slightly underdeveloped.",
          "rank": 2
        },
        "mistral-large": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 57.8,
          "note": "C-337/05 misapplied; USML Category XIII mischaracterized; ZTE citation strained",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 22.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 8.1,
          "note": "Superficial treatment; lacks CMMC specifics, DISP detail, AUKUS depth, FOCI analysis",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.8,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 86.4,
          "note": "Solid multi-jurisdictional analysis; AUKUS section underdeveloped; no hallucinated citations.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 103.0,
          "note": "Exceptional cross-jurisdictional analysis; AUKUS rule cite needs verification",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 38.5,
          "note": "Solid framework; AUKUS ITAR exemption cite needs verification for accuracy.",
          "rank": 8
        }
      }
    },
    {
      "id": 65,
      "category": "AI/Tech Regulation",
      "use_case": "foundation_model_export_controls",
      "models": {
        "claude-opus-4.8": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 44.4,
          "note": "Strong structure; some ECCN analysis imprecise but no fabricated citations",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 28.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 56.6,
          "note": "Strong memo; minor uncertainty on ECCN 0E501 applicability and GPAI thresholds.",
          "rank": 1
        },
        "gpt-5.5": {
          "total": 26.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 130.2,
          "note": "Solid statutory grounding, correct EU AI Act articles, appropriately flags unsettled areas.",
          "rank": 5
        },
        "o3": {
          "total": 23.1,
          "quality": 7.0,
          "accuracy": 3.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 16.8,
          "note": "Multiple fabricated regulations: 89 Fed.Reg.4512, EU 2025/217, EU 2026/1099, Notice 2025/19",
          "rank": 9
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 15.3,
          "note": "Solid framework; EAR99 conclusion too quick given 4E001 analysis tension.",
          "rank": 2
        },
        "mistral-large": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 58.5,
          "note": "Solid framework; ECCN 3A001.a.9 neural-network threshold citation needs verification.",
          "rank": 4
        },
        "llama-4-maverick": {
          "total": 18.0,
          "quality": 4.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 10.0,
          "note": "Wrong ECCNs, AI Act cited as draft post-adoption, shallow mitigations",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 24.9,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 73.3,
          "note": "ECCN 0D521 misapplied; Article 29a and Export Control Amendment Order 2021 citations dubious",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 25.4,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 98.7,
          "note": "Solid framework; EAR §734.18 service exclusion overstated, GPAI FLOPs threshold accurate.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 25.7,
          "quality": 7.0,
          "accuracy": 6.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 39.8,
          "note": "ECCN 3D001.b.1 and 4E001.c citations appear fabricated or misapplied",
          "rank": 6
        }
      }
    },
    {
      "id": 66,
      "category": "M&A",
      "use_case": "cross_border_healthtech_acquisition",
      "models": {
        "claude-opus-4.8": {
          "total": 30.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 85.0,
          "note": "Exceptional cross-jurisdictional analysis; NIS2/HIPAA notification conflict table outstanding",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 130.8,
          "note": "Exceptional cross-jurisdictional analysis; EDPB Opinion 28/2024 citation unverified",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 170.3,
          "note": "Rigorous, well-cited, multi-jurisdictional; truncated at GDPR DPO point.",
          "rank": 7
        },
        "o3": {
          "total": 32.1,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 24.1,
          "note": "Comprehensive, jurisdiction-accurate, actionable; Schrems II cite correct and verifiable.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.9,
          "note": "Solid multi-jurisdictional analysis; NIS2 essential-entity threshold verification missing.",
          "rank": 4
        },
        "mistral-large": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 51.8,
          "note": "Solid framework; DPDP localization overstated, NIS2 fines slightly off",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 17.0,
          "quality": 3.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 2.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.6,
          "note": "Generic checklist; lacks specific deal-breaker analysis, localization conflicts, and SPA mechanics.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.8,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 70.8,
          "note": "Solid multi-jurisdictional analysis; NIS2 24-hour vs HIPAA 60-day conflict well-spotted.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 108.0,
          "note": "Schrems II cite accurate; BetterHelp FTC cite verifiable; NIS2 Art.23 correct",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 33.9,
          "note": "Rigorous cross-jurisdictional analysis; minor DPDP Rules finalization caveat needed.",
          "rank": 2
        }
      }
    },
    {
      "id": 67,
      "category": "Banking/Finance",
      "use_case": "multi_currency_sharia_compliant_facility",
      "models": {
        "claude-opus-4.8": {
          "total": 30.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 62.9,
          "note": "Beximco cite accurate; OHADA, KSA, Egypt analysis precise and actionable.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 136.9,
          "note": "Chromalloy cite contextually appropriate; Cavendish Square correctly applied; comprehensive output",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 161.7,
          "note": "Shamil Bank cite verified; GCC/Riyadh conventions correctly applied; actionable hybrid recommendation",
          "rank": 6
        },
        "o3": {
          "total": 25.1,
          "quality": 8.0,
          "accuracy": 4.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 19.0,
          "note": "Multiple fabricated case citations severely undermine otherwise strong substantive analysis",
          "rank": 8
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.4,
          "note": "Solid cross-jurisdictional analysis; LMA modifications actionable but arbitration point underdeveloped.",
          "rank": 4
        },
        "mistral-large": {
          "total": 23.2,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 52.0,
          "note": "Multiple fabricated cases and circulars undermine otherwise solid structural analysis.",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 15.0,
          "quality": 3.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 1.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 5.9,
          "note": "Generic step-by-step format lacks actionable specifics; no LMA clause modifications drafted",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.8,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 61.1,
          "note": "Solid cross-jurisdictional analysis; minor gaps on SAMA tawarruq specifics and OHADA exequatur nuance.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 95.9,
          "note": "Shamil Bank and Dana Gas citations accurate; NYC accession dates verified correctly.",
          "rank": 2
        },
        "gemini-3.1-pro": {
          "total": 28.7,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 32.9,
          "note": "Beximco cite accurate; Riyadh Convention/GCC treaty analysis solid and actionable.",
          "rank": 3
        }
      }
    },
    {
      "id": 68,
      "category": "Contract & Commercial",
      "use_case": "dual_governing_law_supply_chain_contract",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 51.8,
          "note": "Enka/Kabab-Ji cited correctly; Arbitration Act 2025 caveat appropriately hedged.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 128.3,
          "note": "Enka cite accurate; Hurtado/Nedlloyd correct; thorough cross-jurisdictional CISG analysis",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 156.3,
          "note": "Asante and Nedlloyd citations verified; Rome I analysis precise and actionable.",
          "rank": 4
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 19.1,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 24.6,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 8.8,
          "note": "BGH VIII ZR 119/14 citation unverifiable; likely hallucinated case reference.",
          "rank": 7
        },
        "mistral-large": {
          "total": 22.2,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 31.6,
          "note": "Rome I cited as EU 1215/2012 (wrong); Nedlloyd misapplied; BGH cite unverified",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 23.0,
          "quality": 6.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 7.2,
          "note": "Nedlloyd cite real but analysis shallow; CISG exclusion mechanics underdeveloped",
          "rank": 8
        },
        "deepseek-v3.2": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 58.0,
          "note": "ICF case cited accurately; Rome I post-Brexit nuance slightly overstated but defensible.",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 112.2,
          "note": "Rigorous, well-structured analysis; Nedlloyd cite verified and correctly applied.",
          "rank": 2
        },
        "gemini-3.1-pro": {
          "total": 26.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 27.2,
          "note": "Solid analysis; Nedlloyd and Asante citations verified; dépeçage treatment accurate.",
          "rank": 5
        }
      }
    },
    {
      "id": 69,
      "category": "Corporate Governance",
      "use_case": "dual_listed_esg_board_duties",
      "models": {
        "claude-opus-4.8": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 81.0,
          "note": "Exceptional cross-jurisdictional depth; Vedanta/Caremark/Swiss minerals DD well-integrated.",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 140.5,
          "note": "Exceptional cross-jurisdictional depth; CS3D/ESRS timelines need verification",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 188.4,
          "note": "Excellent cross-jurisdictional analysis; Swiss subsidiary risk underdeveloped; answer truncated.",
          "rank": 7
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 20.3,
          "note": "Comprehensive, well-structured; minor CSRD timeline imprecision but no hallucinated cites.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.2,
          "note": "Solid multi-jurisdictional analysis; Boeing cite slightly imprecise but not fabricated.",
          "rank": 2
        },
        "mistral-large": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 51.6,
          "note": "Re Southern Counties Fresh Foods and ClientEarth v Shell citations require verification",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 23.0,
          "quality": 6.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 7.8,
          "note": "Superficial treatment; misses TCFD, LSE DTRs, Swiss OR 964a, ADR SEC duties",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 28.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 72.5,
          "note": "Sequana, Marchand, eBay, Revlon citations all verifiable and correctly applied.",
          "rank": 4
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 123.6,
          "note": "TSCA/Northway cite wrong; TSC Industries v Northway misattributed to securities materiality",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 26.7,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 48.7,
          "note": "Anadarko cite misapplied; Article 40a reference inaccurate; otherwise solid framework",
          "rank": 8
        }
      }
    },
    {
      "id": 70,
      "category": "Employment Law",
      "use_case": "remote_worker_multi_jurisdiction_employment_status",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 68.0,
          "note": "Rigorous multi-jurisdiction analysis; Rome I Art.8 and § 266a StGB correctly applied.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 135.9,
          "note": "Exceptional depth; minor truncation; PGMOL 2024 cite needs verification",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 24.4,
          "quality": 7.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 188.3,
          "note": "Answer truncated mid-sentence; Germany/UK strong but India/CA/NY incomplete",
          "rank": 7
        },
        "o3": {
          "total": 1.0,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 1.0,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 469.6,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 27.6,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 9.0,
          "note": "BAG citation and Sushil Kumar v RPF Commr unverifiable; IR35 chapter refs imprecise",
          "rank": 5
        },
        "mistral-large": {
          "total": 26.7,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 60.5,
          "note": "Several case citations unverifiable or misattributed; core analysis solid",
          "rank": 6
        },
        "llama-4-maverick": {
          "total": 16.0,
          "quality": 4.0,
          "accuracy": 3.0,
          "speed": 5.0,
          "style": 2.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 8.8,
          "note": "Hallucinated NY case law; shallow analysis; absurd boxed-answer format unusable",
          "rank": 8
        },
        "deepseek-v3.2": {
          "total": 29.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 64.8,
          "note": "Solid multi-jurisdictional analysis; Rome I application and case law verified.",
          "rank": 3
        },
        "qwen3.7-max": {
          "total": 2.3,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 2.3,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 115.8,
          "note": "",
          "rank": 9
        },
        "gemini-3.1-pro": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 56.5,
          "note": "Comprehensive, well-structured; minor gaps in Rome I Art.8 nuance.",
          "rank": 1
        }
      }
    },
    {
      "id": 71,
      "category": "Data Privacy",
      "use_case": "biometric_marketing_consent_mismatch",
      "models": {
        "claude-opus-4.8": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 80.1,
          "note": "Patel v. Facebook citation misattributed; Rosenbach cite slightly off but close",
          "rank": 4
        },
        "claude-sonnet-4.6": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 130.1,
          "note": "Patel v. Facebook cite misapplied; Vance v. Microsoft jurisdiction/holding questionable",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 26.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 157.2,
          "note": "Solid analysis, truncated before completing CA, Ontario, Brazil, GDPR sections",
          "rank": 8
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 20.9,
          "note": "Rosenbach cite accurate; ANPD FAQ #34 unverifiable but minor risk.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 10.7,
          "note": "Rosenbach cite accurate; EDPB 05/2022 plausible but unverified guideline number",
          "rank": 3
        },
        "mistral-large": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 57.4,
          "note": "PIPEDA case URL cited appears fabricated; Schrems II citation accurate.",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 21.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.7,
          "note": "Superficial analysis; missing retention specifics, BIPA private right, Ontario PIPEDA nuance",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 70.4,
          "note": "Rosenbach cite accurate; Cadillac Fairview OPC finding correctly applied; SCCs analysis solid.",
          "rank": 2
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 126.9,
          "note": "Monroy v. Shutterfly cited incorrectly; AEPD 2021 guide unverified reference.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 25.7,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 43.5,
          "note": "AEPD Nov 2023 guidance, ANPD Resolution 15/2024, Mutua case hallucinated",
          "rank": 9
        }
      }
    },
    {
      "id": 72,
      "category": "Regulatory Compliance",
      "use_case": "global_online_gambling_licensing",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 82.5,
          "note": "Thorough, well-structured; Nigeria Supreme Court 2024 cite unverified but plausible.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 131.2,
          "note": "Exceptional depth; minor risk on some Indian case citations but broadly verifiable.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 149.9,
          "note": "Thorough, well-structured; answer truncated mid-India section, minor gaps remain.",
          "rank": 8
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 27.1,
          "note": "Scheinberg cite accurate; minor NJ server-location nuance; strong overall.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.5,
          "note": "Solid, well-structured analysis; minor gaps in crypto and conflict depth.",
          "rank": 3
        },
        "mistral-large": {
          "total": 25.2,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 68.9,
          "note": "Multiple hallucinated citations: NLA sections, MLPA s.15, IGA s.61BA/DA fabricated",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 21.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 8.4,
          "note": "Superficial analysis; misses influencer marketing risks, crypto AML depth, state-level India detail",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 83.6,
          "note": "Thorough, well-structured; minor gaps on Wire Act scope and India MeitY rules.",
          "rank": 5
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 118.4,
          "note": "Thorough, well-structured; minor Wire Act scope overstatement; crypto analysis strong.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 37.5,
          "note": "Strong structure; Australia crypto ban claim overstated; NJ statute citation minor error.",
          "rank": 7
        }
      }
    },
    {
      "id": 73,
      "category": "Arbitration",
      "use_case": "state_owned_entity_waiver_of_immunity",
      "models": {
        "claude-opus-4.8": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 60.9,
          "note": "Correctly flags Nigeria/OHADA error; strong cross-jurisdictional analysis with accurate citations.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 130.5,
          "note": "Exceptional multi-jurisdictional analysis; French immunity section cut off abruptly.",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 26.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 151.2,
          "note": "Strong multi-jurisdictional analysis; Nigeria non-OHADA correction adds real value.",
          "rank": 6
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 22.5,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 26.6,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 8.2,
          "note": "Congo v. Total cite unverified; OHADA UAA arts cited imprecisely",
          "rank": 5
        },
        "mistral-large": {
          "total": 25.3,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 80.3,
          "note": "Trendtex/Kano/CCJA SNH cases misapplied or unverifiable; FSIA analysis solid",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 18.0,
          "quality": 5.0,
          "accuracy": 3.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 7.9,
          "note": "Kraft Foods v Niger State fabricated; Svenska Petroleum misapplied; OHADA membership wrong",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 26.8,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 67.0,
          "note": "Trendtex and Bancec cited correctly; Nigeria SIA 1990 plausible but unverified.",
          "rank": 4
        },
        "qwen3.7-max": {
          "total": 24.9,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 127.3,
          "note": "Letelier cite misapplied; CCJA 2007 case unverifiable; Bancec correct",
          "rank": 8
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 44.1,
          "note": "OHADA 2023 revision and Sapin II Article L.111-1-2 details unverified/likely hallucinated",
          "rank": 2
        }
      }
    },
    {
      "id": 74,
      "category": "IP/Tech Law",
      "use_case": "generative_music_infringement_multi_forum",
      "models": {
        "claude-opus-4.8": {
          "total": 28.8,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 69.4,
          "note": "Bartz v. Anthropic and Kadrey 2025 outcomes misstated; Subafilms circuit wrong",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 26.4,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 133.1,
          "note": "Suno cite plausible but unverified; answer truncated before conflict-of-laws conclusion",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 145.8,
          "note": "Rigorous multi-forum analysis; truncated before Japan/moral rights/conflict-of-law conclusion",
          "rank": 3
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 21.5,
          "note": "",
          "rank": 9
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.8,
          "note": "Skidmore cite accurate; Daimler/Morrison application slightly stretched but defensible.",
          "rank": 2
        },
        "mistral-large": {
          "total": 2.3,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 2.3,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 75.8,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 22.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 9.4,
          "note": "Superficial analysis; misses post-license output doctrine, TDM opt-out nuance, forum specifics.",
          "rank": 8
        },
        "deepseek-v3.2": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 54.2,
          "note": "CDPA s.29A misapplied; RIAA v MP3.com inapt; Ford citation imprecise",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 25.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 111.9,
          "note": "Skidmore cite is 9th Cir., not binding NY; Sportradar attribution imprecise.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 43.5,
          "note": "Solid analysis; Arnstein cite dated but valid; UMG/Suno real cases.",
          "rank": 4
        }
      }
    },
    {
      "id": 75,
      "category": "International Trade",
      "use_case": "anti_dumping_circumvention_and_third_country_processing",
      "models": {
        "claude-opus-4.8": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 78.4,
          "note": "Rigorous dual-regime analysis; 2023 solar circumvention precedent correctly cited; WTO caveated appropriately.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 131.3,
          "note": "CBP HQ H301619 and some WTO case citations appear fabricated or mischaracterized.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 23.0,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 134.1,
          "note": "Renesola C-209/20 citation appears fabricated; Bell Supply citation needs verification",
          "rank": 8
        },
        "o3": {
          "total": 28.1,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 22.4,
          "note": "Several citations unverifiable or fabricated: DS437, HQ H301619, Solar II memo specifics",
          "rank": 2
        },
        "grok-4.3": {
          "total": 26.6,
          "quality": 7.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 8.8,
          "note": "Solid framework but thin on WTO case law and specific solar precedents",
          "rank": 6
        },
        "mistral-large": {
          "total": 3.2,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.2,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 46.0,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 5.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 7.5,
          "note": "DS184 misapplied; shallow analysis; missing key circumvention thresholds and recent precedents",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 25.8,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 65.4,
          "note": "Belcrest Linens misapplied; EC-Fittings citation context overstated; UFLPA conflation",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 90.9,
          "note": "Strong analysis; 87 Fed. Reg. 78,883 cite requires verification but plausible.",
          "rank": 3
        },
        "gemini-3.1-pro": {
          "total": 26.7,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 40.8,
          "note": "Auxin Solar cite plausible but unverified; Bell Supply citation needs scrutiny",
          "rank": 5
        }
      }
    },
    {
      "id": 76,
      "category": "Real Estate",
      "use_case": "foreign_sovereign_pension_fund_reit_investment",
      "models": {
        "claude-opus-4.8": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 87.4,
          "note": "Comprehensive, well-structured; QFPF/§897(l) and FIRB FGI threshold correctly identified.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 130.4,
          "note": "Thorough, well-cited memo; truncated FIRB section; QFPF analysis excellent",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 147.7,
          "note": "Solid multi-jurisdiction analysis; memo truncated mid-sentence on REIT asset tests.",
          "rank": 8
        },
        "o3": {
          "total": 29.1,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 36.1,
          "note": "Canada v. ICAO 2008 FCA citation appears fabricated; deduct accordingly",
          "rank": 3
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 11.5,
          "note": "Solid framework; ADGM transfer fee claim needs verification; thresholds may be stale.",
          "rank": 5
        },
        "mistral-large": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 45.2,
          "note": "Solid framework; some treaty citations imprecise; MIT withholding rate correct",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 18.0,
          "quality": 4.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 9.2,
          "note": "ADGM Real Estate Rules 2015 unverified; misses QFIE, Section 892 sovereign exemption",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 28.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 87.3,
          "note": "Solid multi-jurisdiction memo; ADGM depth thin; UAE corporate tax 2023 omitted.",
          "rank": 4
        },
        "qwen3.7-max": {
          "total": 25.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 116.4,
          "note": "Solid framework; minor inaccuracies on CFIUS 99-mile rule and ADGM specifics.",
          "rank": 9
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 41.8,
          "note": "Technically strong, QFPF/MIT/FIRB analysis accurate, minor sovereign immunity oversimplifications.",
          "rank": 1
        }
      }
    },
    {
      "id": 77,
      "category": "Tax",
      "use_case": "digital_services_tax_and_pillar_two_conflict",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 77.3,
          "note": "Exceptional multi-jurisdictional analysis; EL 2.0 repeal correctly flagged; stacking risk clear.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 128.6,
          "note": "Wipro cite unverifiable; India 2% EL abolition date needs verification",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 25.0,
          "quality": 7.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 170.6,
          "note": "Solid framework but answer truncated before Pillar Two, credits, and recommendations.",
          "rank": 9
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 28.3,
          "note": "Comprehensive, well-structured; minor risk on DPT creditability characterization.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 30.6,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 10.2,
          "note": "Technically strong, accurate citations, actionable recommendations, minor treaty nuance gaps.",
          "rank": 3
        },
        "mistral-large": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 57.6,
          "note": "Rev. Rul. 2021-14 and CBDT Circular 14/2020 citations unverified/likely hallucinated",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 22.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 7.5,
          "note": "Superficial analysis; missing UTPR/STTR, India EL 2021 amendment, treaty override issues",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 85.0,
          "note": "Pillar One revenue threshold error; €20bn not €750m for Amount A.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 117.5,
          "note": "Technically strong, accurate statutory citations, minor gaps on UTPR/STTR interaction",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 40.3,
          "note": "Strong structure; India EL abolition noted; Pillar One MLC credit claim overstated.",
          "rank": 5
        }
      }
    },
    {
      "id": 78,
      "category": "Immigration",
      "use_case": "distributed_founder_visa_and_control_issues",
      "models": {
        "claude-opus-4.8": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 87.4,
          "note": "Strong framework; E-2 treaty analysis correct; Australia rebrand caveat appropriately flagged.",
          "rank": 5
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 137.0,
          "note": "Exceptional depth; Brazil E-2 treaty date and Dhanasar citation verified correct.",
          "rank": 1
        },
        "gpt-5.5": {
          "total": 26.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 154.7,
          "note": "Solid comparative memo; correctly flags E-2 nationality issue and SUV control conflicts.",
          "rank": 6
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 23.6,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 26.6,
          "quality": 7.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 10.1,
          "note": "E-2 Brazil/India treaty errors; ONE Pass salary threshold omitted; solid structure",
          "rank": 4
        },
        "mistral-large": {
          "total": 24.7,
          "quality": 7.0,
          "accuracy": 6.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 43.0,
          "note": "IRPR §98.06(2) citation unverifiable; E-2 treaty eligibility errors for Egypt/India",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 20.0,
          "quality": 5.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 10.0,
          "note": "Superficial analysis; misses nationality-visa matching, ONE Pass criteria, key conflicts",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 26.8,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 74.1,
          "note": "Solid framework; Egypt E-2 treaty status and ONE Pass salary threshold need verification.",
          "rank": 3
        },
        "qwen3.7-max": {
          "total": 25.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 125.8,
          "note": "Matter of Izummi cite is real; ONE Pass salary threshold slightly outdated but defensible",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 45.2,
          "note": "Strong structure; IRPR citation unverified; tax residency risk well-spotted.",
          "rank": 2
        }
      }
    },
    {
      "id": 79,
      "category": "Criminal/White Collar",
      "use_case": "cross_border_bribery_internal_investigation",
      "models": {
        "claude-opus-4.8": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 77.0,
          "note": "Exceptional multi-jurisdictional depth; ENRC citation accurate; minor FEPA framing imprecision.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 133.6,
          "note": "Exceptional depth; Airbus/Rolls-Royce/Alstom cites verifiable; minor FCPA penalty figures outdated",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 176.0,
          "note": "Hoskins cite accurate; thorough multi-jurisdictional analysis; missing sequencing/privilege sections",
          "rank": 7
        },
        "o3": {
          "total": 29.1,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 18.7,
          "note": "United States v. Straub 2013 FCPA cite appears fabricated; Airbus verified",
          "rank": 3
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 9.2,
          "note": "Safran 2023 DPA unverified; Airbus cite plausible but needs verification",
          "rank": 5
        },
        "mistral-large": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 57.0,
          "note": "Cass.Crim. 2018/2019 citations unverifiable; STF ADPF 572 misapplied",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 23.0,
          "quality": 5.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 6.2,
          "note": "Outline is superficial; lacks actionable sequencing, COMI analysis, and deconfliction detail.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.8,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 63.3,
          "note": "Solid multi-jurisdictional framework; Airbus reference accurate; US nexus caveats appropriate.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 97.1,
          "note": "ENRC and Three Rivers citations verified; SISSE framework accurately applied.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 38.7,
          "note": "SFO v ENRC citation accurate; strong multi-jurisdictional sequencing and privilege analysis.",
          "rank": 1
        }
      }
    },
    {
      "id": 80,
      "category": "Bankruptcy & Insolvency",
      "use_case": "group_insolvency_and_center_of_main_interests",
      "models": {
        "claude-opus-4.8": {
          "total": 30.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 87.9,
          "note": "Exceptional cross-border analysis; Brexit trap, Gibbs, India IBC all correctly handled.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 143.4,
          "note": "Exceptional depth; Gibbs rule, India IBC gaps, and COMI traps well-handled.",
          "rank": 2
        },
        "gpt-5.5": {
          "total": 24.0,
          "quality": 7.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 166.3,
          "note": "Answer truncated mid-sentence; strong framework but incomplete delivery",
          "rank": 7
        },
        "o3": {
          "total": 24.8,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 51.6,
          "note": "Multiple hallucinated citations: Morning Mist/Fairfield conflation, Re Agrokor, Re DSG Asia, Re Yuk Lun, Rubicon",
          "rank": 6
        },
        "grok-4.3": {
          "total": 27.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.1,
          "note": "Ocean Rig and Stanford cited plausibly but without verified pinpoint accuracy.",
          "rank": 4
        },
        "mistral-large": {
          "total": 3.2,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.2,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 51.6,
          "note": "",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.1,
          "note": "Superficial analysis; misses COMI shift timing rules, protocol mechanics, key traps",
          "rank": 8
        },
        "deepseek-v3.2": {
          "total": 25.1,
          "quality": 7.0,
          "accuracy": 5.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 48.5,
          "note": "Vitro cited incorrectly; Stocznia misapplied; Jet Airways unverifiable as cited",
          "rank": 5
        },
        "qwen3.7-max": {
          "total": 1.9,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 1.9,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 121.1,
          "note": "",
          "rank": 10
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 50.0,
          "note": "Solid analysis; Gibbs rule and Indian IBC gaps well-flagged; minor gaps.",
          "rank": 3
        }
      }
    },
    {
      "id": 81,
      "category": "Securities",
      "use_case": "tokenized_equity_dual_offering",
      "models": {
        "claude-opus-4.8": {
          "total": 30.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 88.4,
          "note": "Exceptional depth; VARA/SCA conflict flagged; Landreth correctly cited; MiCA exclusion precise.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 134.5,
          "note": "Thorough, well-structured; minor gaps in VARA detail; no hallucinated cites",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 200.8,
          "note": "Strong US/EU analysis; Singapore/UAE sections appear cut off mid-sentence",
          "rank": 6
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 29.3,
          "note": "Thorough, well-structured; minor VARA/SCA nuances could be sharper.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 11.0,
          "note": "Ripple cite oversimplified; Telegram cite accurate; VARA/MiCA interplay well-handled.",
          "rank": 4
        },
        "mistral-large": {
          "total": 25.8,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 69.9,
          "note": "Solid framework but VARA/DFSA conflation and shallow cross-border DEX analysis.",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 18.0,
          "quality": 4.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 10.5,
          "note": "SEC v. Shavers cite misused; MiCA equity token analysis wrong; shallow throughout",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 63.1,
          "note": "MiCA ART misclassification; ESMA35-43-3578 and PSN02/2022 unverified citations",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 26.9,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 100.7,
          "note": "Technically strong, ERC-3643 practical, SCA Decision citation needs verification",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 28.7,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 37.5,
          "note": "Solid analysis; Landreth cite accurate; VARA/SCA nuance slightly oversimplified.",
          "rank": 3
        }
      }
    },
    {
      "id": 82,
      "category": "Construction",
      "use_case": "mega_project_fidic_governing_law_conflict",
      "models": {
        "claude-opus-4.8": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 83.1,
          "note": "Rigorous multi-jurisdictional analysis; Enka/Kabab-Ji/Cavendish citations verified and accurate.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 131.6,
          "note": "Exceptional depth; Enka, Sulamerica, Cavendish correctly applied; Lebanon NYC gap flagged.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 25.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 155.1,
          "note": "Solid conflict-of-laws analysis; answer truncated before redraft proposals delivered.",
          "rank": 7
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 16.0,
          "note": "",
          "rank": 9
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 7.4,
          "note": "Solid analysis; Qatar Civil Code Art.266 citation unverified but plausible.",
          "rank": 3
        },
        "mistral-large": {
          "total": 3.2,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.2,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 40.5,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 5.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 6.5,
          "note": "Sulamerica cite misapplied; Sonatrach plausible but strained; OHADA analysis thin",
          "rank": 8
        },
        "deepseek-v3.2": {
          "total": 25.8,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 55.8,
          "note": "Habas and North Midland citations need verification; Cavendish application overstated",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 100.7,
          "note": "Sulamérica and Malmaison citations verified; Art.266 Qatari CC analysis precise.",
          "rank": 2
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 35.5,
          "note": "Solid analysis; Rome I retention post-Brexit overstated; Malmaison cite unverified",
          "rank": 5
        }
      }
    },
    {
      "id": 83,
      "category": "Environmental/ESG",
      "use_case": "supply_chain_deforestation_and_human_rights",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 66.9,
          "note": "Rigorous, well-caveated, actionable; correctly flags Omnibus/deferral uncertainties throughout.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 130.1,
          "note": "Exceptional cross-jurisdictional depth; truncated contract clause slightly reduces completeness.",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 155.1,
          "note": "Strong multi-framework analysis; answer truncated before Nigerian supplier and contract recommendations",
          "rank": 9
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 25.6,
          "note": "Comprehensive, well-structured; minor EUDR deadline inaccuracy noted.",
          "rank": 4
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 6.6,
          "note": "Accurate, well-structured, actionable; CSDDD article citation is provisional but flagged.",
          "rank": 1
        },
        "mistral-large": {
          "total": 29.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 58.5,
          "note": "Thorough, well-structured roadmap; CSDDD thresholds slightly outdated post-amendments.",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 22.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 6.5,
          "note": "Superficial roadmap; misses EUDR delay, CSDDD scope, UFLPA inapplicability to Nigeria",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 82.6,
          "note": "Accurate, well-structured roadmap; EUDR delay to 2025 not flagged.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 29.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 102.9,
          "note": "Vedanta/Okpabi correctly cited; GDPR-traceability conflict is non-obvious gem.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 34.7,
          "note": "Accurate, well-structured, actionable; EUDR delay caveat appropriately flagged.",
          "rank": 3
        }
      }
    },
    {
      "id": 84,
      "category": "Insurance",
      "use_case": "parametric_climate_risk_policy_trigger_dispute",
      "models": {
        "claude-opus-4.8": {
          "total": 30.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 71.4,
          "note": "Verifiable NY cases, correct PILA/Rome I analysis, actionable clause revisions throughout.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 126.5,
          "note": "Stirling Covent Bridge cite unverifiable; core NY cases appear legitimate but risky",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 25.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 148.1,
          "note": "Answer truncated mid-sentence; strong NY analysis but missing clause revisions section",
          "rank": 6
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 17.1,
          "note": "",
          "rank": 9
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.4,
          "note": "Greenfield cite is real but tangential; PILA articles correctly cited throughout.",
          "rank": 2
        },
        "mistral-large": {
          "total": 24.2,
          "quality": 8.0,
          "accuracy": 4.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 56.7,
          "note": "Multiple hallucinated ICC case numbers and questionable NY case applications penalize accuracy.",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 17.0,
          "quality": 4.0,
          "accuracy": 3.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 7.0,
          "note": "NY case citations unverified; shallow Rome I/Swiss PIL analysis; generic clause revisions",
          "rank": 8
        },
        "deepseek-v3.2": {
          "total": 2.8,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 2.8,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 61.7,
          "note": "",
          "rank": 10
        },
        "qwen3.7-max": {
          "total": 26.9,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 89.3,
          "note": "In re Estate of Corcoran cite unverifiable; other NY cases appear legitimate",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 36.6,
          "note": "Penn Central cite misapplied; Cummins citation plausible but unverified reinsurance context",
          "rank": 3
        }
      }
    },
    {
      "id": 85,
      "category": "Government Contracts",
      "use_case": "defense_ai_systems_dual_use_procurement",
      "models": {
        "claude-opus-4.8": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 95.9,
          "note": "Night Vision Corp cite unverified but plausible; CJEU cites appear accurate",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 140.3,
          "note": "Exceptionally thorough; AI data-rights gap noted; memo cut off mid-sentence",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 27.4,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 132.9,
          "note": "Thorough DFARS/ITAR analysis; memo cut off before EU/UK procurement section",
          "rank": 7
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 29.7,
          "note": "2025 BIS/DDTC Guidance Memo and ITAR §120.50 TCP cite unverifiable",
          "rank": 1
        },
        "grok-4.3": {
          "total": 27.6,
          "quality": 7.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 9.2,
          "note": "Solid framework but thin on Israeli contractor specifics and 2027 regulatory evolution.",
          "rank": 6
        },
        "mistral-large": {
          "total": 22.2,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 65.2,
          "note": "McDonnell Douglas and ZTE citations misapplied/hallucinated; DFARS cite errors present",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 21.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 8.8,
          "note": "Superficial analysis; misses USML categories, ECCN 4E001, and Israeli ITAR nuances.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 24.3,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 90.7,
          "note": "McDonnell Douglas cite misapplied; USML XII(f) categorization questionable for target-recognition AI",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 27.8,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 83.7,
          "note": "Solid, actionable memo; clean-room retraining advice is practically sound.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 53.8,
          "note": "InsTiimi Oy cite unverifiable; AUKUS §126.7 reference imprecise but plausible",
          "rank": 5
        }
      }
    },
    {
      "id": 86,
      "category": "Energy/Climate",
      "use_case": "cross_border_hydrogen_project_structuring",
      "models": {
        "claude-opus-4.8": {
          "total": 27.3,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 92.8,
          "note": "Technically strong, truncated at certification clause; FSR and CBAM angles commendable.",
          "rank": 7
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 139.3,
          "note": "Exceptional depth; minor risk on some unverified Commission decision citations",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 160.4,
          "note": "Thorough, well-structured; SA.62619 cite plausible but unverified; answer truncated.",
          "rank": 5
        },
        "o3": {
          "total": 27.1,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 17.8,
          "note": "Several citations unverifiable or fabricated; core framework sound but risky specifics",
          "rank": 8
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 8.2,
          "note": "Kotnik cite is real but misapplied; Hydrogen Society Promotion Act unverified.",
          "rank": 3
        },
        "mistral-large": {
          "total": 2.8,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 2.8,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 81.8,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 6.2,
          "note": "Superficial treatment; lacks specific clause drafting, CBAM analysis, and bankability depth.",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 29.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 75.5,
          "note": "Comprehensive, well-structured; Canada Renewable Energy cite contextually accurate but imprecise.",
          "rank": 1
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 112.1,
          "note": "Exceptional cross-jurisdictional depth; RFNBO hourly-matching cliff risk particularly valuable.",
          "rank": 2
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 50.7,
          "note": "Solid cross-jurisdictional analysis; Japan Hydrogen Act date needs verification",
          "rank": 6
        }
      }
    },
    {
      "id": 87,
      "category": "Consumer Protection",
      "use_case": "dark_patterns_cross_border_ecommerce",
      "models": {
        "claude-opus-4.8": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 87.7,
          "note": "8th Cir. FTC vacatur unverifiable; otherwise thorough, well-structured, actionable.",
          "rank": 4
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 128.6,
          "note": "BGH Himbeer-Vanille cite misapplied; Mayron v Google citation questionable",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 26.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 149.7,
          "note": "Solid comparative analysis, truncated before baseline recommendation section",
          "rank": 8
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 21.5,
          "note": "Planet49 and Trivago citations are real; minor ARL step-count imprecision.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 30.6,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.7,
          "note": "Solid cross-jurisdictional analysis; ACL s47 application slightly strained but defensible.",
          "rank": 1
        },
        "mistral-large": {
          "total": 3.2,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.2,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 57.4,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 22.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 6.7,
          "note": "Superficial analysis; misses DSA/DMA, India dark pattern guidelines, ARL specifics",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 28.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 85.4,
          "note": "Solid cross-jurisdictional analysis; DMCC clause numbers unverified but plausible.",
          "rank": 5
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 107.3,
          "note": "Solid, actionable memo; Planet49 cite accurate; DPDP/CCPA nuances slightly compressed.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 48.9,
          "note": "Solid cross-jurisdictional analysis; Planet49, CMA Emma Sleep, DMCC all verifiable.",
          "rank": 3
        }
      }
    },
    {
      "id": 88,
      "category": "Trusts & Estates",
      "use_case": "multi_jurisdiction_digital_asset_estate_plan",
      "models": {
        "claude-opus-4.8": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 83.9,
          "note": "Comprehensive, well-structured; minor US estate tax nuance on holdco needs verification.",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 140.8,
          "note": "Exceptional depth; minor US estate tax treaty nuances; cases appear verifiable",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 26.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 167.3,
          "note": "Solid conflicts analysis; truncated before trust/corporate structure proposals completed",
          "rank": 8
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 24.2,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 30.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 10.5,
          "note": "Solid multi-jurisdictional analysis; AA v Persons Unknown and IRC v Bullock correctly cited.",
          "rank": 2
        },
        "mistral-large": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 61.5,
          "note": "BGE 142 III 695 and Re Berchtold application questionable; Singapore has no forced heirship",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 18.6,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 10.6,
          "note": "Superficial analysis; misses forced heirship, DIFC law, NY estate tax situs rules",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 26.8,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 66.2,
          "note": "Solid framework; Hague 1989 Convention attribution slightly imprecise but no hallucinated cases.",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 121.8,
          "note": "Technically strong; Hague 1989 Convention status and RUFADAA scope accurate.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 39.9,
          "note": "Tulip Trading cite accurate; professio juris, DIFC WPR, IHT tail all correct.",
          "rank": 1
        }
      }
    },
    {
      "id": 89,
      "category": "M&A",
      "use_case": "dual_class_rollup_merger_us_de",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 45.9,
          "note": "BCE and MFW citations verified; §3(a)(10) workaround is genuinely creative.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 127.6,
          "note": "Exceptional depth; BCE cite accurate; Riviera Tool cite unverifiable but minor",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 156.1,
          "note": "Exceptional cross-jurisdictional depth; all cited cases and statutes verifiable.",
          "rank": 3
        },
        "o3": {
          "total": 28.1,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 13.3,
          "note": "BCE cite wrong court/year; Dowling v Ontario unverifiable; otherwise strong analysis",
          "rank": 7
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 6.1,
          "note": "Solid cross-jurisdictional analysis; workarounds practical but scheme explanation imprecise.",
          "rank": 5
        },
        "mistral-large": {
          "total": 3.7,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.7,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 26.0,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 22.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 6.1,
          "note": "Superficial analysis; CBCA point misapplied; workarounds underdeveloped; formatting weak",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 27.3,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 52.7,
          "note": "Solid cross-jurisdictional analysis; whitewash procedure misstated for private companies post-2006",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 134.6,
          "note": "BCE citation accurate; FPO Art.70 scheme exemption slightly overstated but defensible",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 31.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 44.1,
          "note": "BCE and MFW citations accurate; s.3(a)(10) workaround sophisticated and correct.",
          "rank": 1
        }
      }
    },
    {
      "id": 90,
      "category": "AI/Tech Regulation",
      "use_case": "foundational_model_saas_eu_uae_sg",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 67.8,
          "note": "Rigorous territorial scoping, correct GPAI/high-risk dual classification, no hallucinated cites.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 135.8,
          "note": "Rigorous multi-jurisdictional analysis; DIFC-UAE transfer gap correctly identified",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 25.0,
          "quality": 7.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 156.8,
          "note": "Answer truncated; missing DIFC/UAE contrast, PDPA mapping, and MAS gaps sections",
          "rank": 9
        },
        "o3": {
          "total": 28.1,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 20.7,
          "note": "Solid framework; AI Act article citations are provisional/approximate but flagged appropriately.",
          "rank": 5
        },
        "grok-4.3": {
          "total": 30.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 10.0,
          "note": "Solid multi-jurisdictional analysis; GPAI/high-risk dual classification well-reasoned.",
          "rank": 3
        },
        "mistral-large": {
          "total": 25.2,
          "quality": 7.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 49.8,
          "note": "Solid structure; GPAI/high-risk conflation, Article 52a misattributed, FEAT oversimplified",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 18.6,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 11.5,
          "note": "Superficial analysis; misses GPAI obligations, extraterritoriality nuance, and key conflicts",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.3,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 81.6,
          "note": "Solid multi-jurisdictional analysis; GPAI systemic risk threshold reasoning slightly imprecise.",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 25.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 115.1,
          "note": "Strong structure; Annex III classification slightly overbroad; PDPA Business Improvement Exception well-spotted.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 45.6,
          "note": "Rigorous cross-jurisdictional analysis; DIFC-onshore transfer framing is accurate and non-obvious.",
          "rank": 2
        }
      }
    },
    {
      "id": 91,
      "category": "Data Privacy",
      "use_case": "employee_monitoring_gdpr_lgpd_pdpa",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 58.6,
          "note": "BAG keylogger cite and CJEU C-34/21 appear verifiable; strong cross-jurisdictional depth.",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 134.0,
          "note": "BAG 2 AZR 681/16 citation unverified; core legal analysis otherwise strong.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 24.0,
          "quality": 7.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 148.3,
          "note": "Answer truncated mid-sentence; missing India DPDP, Singapore PDPA, remediation plan",
          "rank": 9
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 18.8,
          "note": "Accurate, well-structured, jurisdiction-specific; DPDP rules still pending but flagged.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 31.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.2,
          "note": "Solid comparative analysis; CLT Art.373-A citation needs verification.",
          "rank": 2
        },
        "mistral-large": {
          "total": 26.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 33.2,
          "note": "Bărbulescu cite is real; ANPD 2021 guidance vague but not fabricated",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 17.6,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 2.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 13.9,
          "note": "Superficial analysis; weak on PDPA/DPDPA specifics; remediation plan generic",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 69.0,
          "note": "Thorough, well-structured; minor DPDPA Section 7(a) characterization imprecision.",
          "rank": 5
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 105.6,
          "note": "Technically strong; DPDPA rules still evolving, minor overstatements on India scope.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 28.2,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 41.7,
          "note": "BAG 2 AZR 681/16 cited but unverified; DPDPA Sec 5(3) misattributed",
          "rank": 6
        }
      }
    },
    {
      "id": 92,
      "category": "Banking/Finance",
      "use_case": "islamic_structured_repo_multi_forum",
      "models": {
        "claude-opus-4.8": {
          "total": 30.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 89.3,
          "note": "Shamil Bank and Investment Dar correctly cited; Decree 34/2021 flag excellent.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 135.9,
          "note": "Shamil Bank correct; Cukurova/Lehman misapplied; Al-Rashidi cite unverifiable",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 26.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 194.7,
          "note": "Truncated before KSA/riba and NYC enforcement sections; DIFC-LCIA point accurate.",
          "rank": 7
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 30.1,
          "note": "CCJA Air Afrique cite and Enforcement Decision 34/1/34 appear fabricated",
          "rank": 2
        },
        "grok-4.3": {
          "total": 27.6,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 9.8,
          "note": "Lehman Bros cite misapplied to repo characterisation; AUS article numbers unverified",
          "rank": 4
        },
        "mistral-large": {
          "total": 23.2,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 53.1,
          "note": "Multiple fabricated case citations undermine otherwise solid structural analysis",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 20.0,
          "quality": 5.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 7.1,
          "note": "Fabricated OHADA article numbers; shallow analysis; misses key cross-jurisdictional nuances",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 25.8,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 78.7,
          "note": "Symphony Gems citation misapplied; OHADA Act numbering questionable; Rome I inapplicable post-Brexit",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 98.7,
          "note": "Excellent cross-jurisdictional analysis; DIFC-LCIA abolition flag adds real value.",
          "rank": 3
        },
        "gemini-3.1-pro": {
          "total": 26.7,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 38.0,
          "note": "Baker Hughes cite unverifiable; Shamil Bank applied correctly but DIFC-LCIA point strong",
          "rank": 6
        }
      }
    },
    {
      "id": 93,
      "category": "Corporate Governance",
      "use_case": "dual_listed_climate_strategy_board_duties",
      "models": {
        "claude-opus-4.8": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 50.9,
          "note": "Marchand v. Gordon cite slightly imprecise but not fabricated; strong overall",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 133.8,
          "note": "eBay/Newmark misapplied; Cassimatis citation imprecise; otherwise strong multi-jurisdictional analysis",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 26.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 189.4,
          "note": "Solid multi-jurisdictional analysis; answer truncated before completing Pathway 2 conclusion.",
          "rank": 5
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 22.0,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 23.6,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 7.7,
          "note": "ASIC v Mitchell (No 2) [2020] FCA 1098 appears fabricated; Caremark misapplied",
          "rank": 8
        },
        "mistral-large": {
          "total": 24.8,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 54.0,
          "note": "FCA v Carillion, BGE cites, CSRD Art.25 figures likely hallucinated",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 5.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 6.0,
          "note": "BGE cite unverified; eBay/Tooley misapplied; shallow CSRD/UK analysis",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 25.3,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 57.3,
          "note": "Swairmo doctrine fabricated; eBay/Newmark misapplied; Sharma citation imprecise",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 105.9,
          "note": "Rigorous multi-jurisdictional analysis; BGE cite plausible but unverified minor risk",
          "rank": 2
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 31.9,
          "note": "BGE 139 III 24 citation unverified; Hutley opinions characterised accurately enough",
          "rank": 3
        }
      }
    },
    {
      "id": 94,
      "category": "Dispute Resolution",
      "use_case": "multi_forum_supply_chain_disputes_esg",
      "models": {
        "claude-opus-4.8": {
          "total": 30.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 60.8,
          "note": "Exceptional cross-jurisdictional analysis; UK Hague date dispute and AMA 2023 correctly flagged.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 29.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 135.3,
          "note": "Exceptional depth; OHADA/NYC/HCCA analysis precise; minor Brexit-gap nuances",
          "rank": 2
        },
        "gpt-5.5": {
          "total": 25.4,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 151.0,
          "note": "Answer truncated mid-sentence; otherwise thorough, accurate, well-structured analysis.",
          "rank": 8
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 18.9,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 25.6,
          "quality": 7.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 6.2,
          "note": "Solid analysis but thin on OHADA detail and formatting lacks headers",
          "rank": 7
        },
        "mistral-large": {
          "total": 26.8,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 58.6,
          "note": "Solid analysis; Apple Sales cite tangential; OHADA CCJA role slightly misstated.",
          "rank": 6
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 6.0,
          "note": "Superficial analysis; misses LiSoFa, OHADA 2017 Act, Nigeria NY Convention nuances",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 56.8,
          "note": "Modi Entertainment cite unverified but plausible; corporate veil analysis solid",
          "rank": 5
        },
        "qwen3.7-max": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 161.7,
          "note": "Rigorous, jurisdiction-correct analysis; AMA 2023 and OHADA nuances handled well.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 33.6,
          "note": "Solid analysis; minor error conflating GreenDelta/Shakti enforcement direction.",
          "rank": 3
        }
      }
    },
    {
      "id": 95,
      "category": "Contract & Commercial",
      "use_case": "governing_law_warranty_chain_consumer_goods",
      "models": {
        "claude-opus-4.8": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 67.4,
          "note": "Rigorous multi-layer analysis; Ford v. Montana citation accurate and well-applied.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 132.4,
          "note": "Exceptional cross-jurisdictional analysis; Schwarzenegger cite slightly misapplied but minor.",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 25.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 155.7,
          "note": "Strong structure, accurate cites, but answer truncated mid-sentence on California analysis",
          "rank": 8
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 17.5,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 27.6,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 8.8,
          "note": "Rheinhart v. Nissan cite appears fabricated; Pammer citation accurate and helpful.",
          "rank": 5
        },
        "mistral-large": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 36.1,
          "note": "Solid structure; Carnival Cruise citation slightly misapplied to B2C warranty context.",
          "rank": 4
        },
        "llama-4-maverick": {
          "total": 22.0,
          "quality": 6.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 7.5,
          "note": "Nedlloyd cited correctly but WWV Woodson misapplied; shallow cross-jurisdictional analysis",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 25.8,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 62.1,
          "note": "Hulsey cite misapplied; Nedlloyd and AOL citations need verification for context",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 26.9,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 129.3,
          "note": "AOL v. Superior Court and Kearney citations require verification; CDC analysis strong",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 29.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 43.2,
          "note": "Pammer cite valid; Ford Motor cite accurate; CDC articles correctly applied.",
          "rank": 2
        }
      }
    },
    {
      "id": 96,
      "category": "IP/Tech Law",
      "use_case": "cross_border_ai_training_text_images",
      "models": {
        "claude-opus-4.8": {
          "total": 30.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 45.9,
          "note": "Exceptional cross-jurisdictional depth; real citations only; AI Act integration outstanding.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 121.6,
          "note": "Exceptional multi-jurisdictional analysis; real citations; EU AI Act integration notable.",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 139.9,
          "note": "Answer truncated mid-Canada section; otherwise rigorous and citation-accurate throughout.",
          "rank": 7
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 16.8,
          "note": "hiQ cite slightly misapplied but cases real; strong comparative structure overall",
          "rank": 3
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 6.6,
          "note": "Precise statutory citations, no invented cases, strong cross-jurisdictional gap analysis.",
          "rank": 1
        },
        "mistral-large": {
          "total": 25.7,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 28.7,
          "note": "Tokyo 2019 manga case and Nguyen citation context are hallucinated/misapplied.",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 24.0,
          "quality": 6.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 5.6,
          "note": "UsedSoft cite irrelevant; hiQ borderline; Canada missing statutory reference entirely.",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 43.0,
          "note": "Accurate, well-structured; hiQ cite contextually appropriate though tangential.",
          "rank": 4
        },
        "qwen3.7-max": {
          "total": 5.9,
          "quality": 1.0,
          "accuracy": 1.0,
          "speed": 1.9,
          "style": 1.0,
          "creativity": 1.0,
          "halluc": false,
          "latency": 67.9,
          "note": "Model output only meta-commentary, zero substantive legal analysis provided.",
          "rank": 10
        },
        "gemini-3.1-pro": {
          "total": 28.2,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 32.0,
          "note": "Solid comparative analysis; Warhol cite accurate but application slightly stretched.",
          "rank": 6
        }
      }
    },
    {
      "id": 97,
      "category": "Employment Law",
      "use_case": "remote_executive_cross_border_stock_options",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 59.1,
          "note": "Thorough, well-structured; case citations verifiable; UAE dual-regime analysis excellent.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 1.4,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 1.4,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 138.4,
          "note": "",
          "rank": 10
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 181.2,
          "note": "Thorough, well-cited; UAE/DIFC section truncated, minor omissions only",
          "rank": 5
        },
        "o3": {
          "total": 3.2,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.2,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 35.8,
          "note": "",
          "rank": 9
        },
        "grok-4.3": {
          "total": 27.6,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 8.1,
          "note": "Tapas Kumar v Goldman Sachs unverified; Autoclenz citation solid but risks noted",
          "rank": 4
        },
        "mistral-large": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 31.3,
          "note": "",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 20.0,
          "quality": 5.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 2.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 6.8,
          "note": "Superficial analysis; step-format unprofessional; Lawson v Serco citation borderline but acceptable",
          "rank": 7
        },
        "deepseek-v3.2": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 67.9,
          "note": "Autoclenz citation accurate; Rome I, DIFC Art.12, UAE Art.13 correctly applied.",
          "rank": 2
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 98.9,
          "note": "Autoclenz and Dharangadhara citations verifiable; DIFC law articles accurate.",
          "rank": 3
        },
        "gemini-3.1-pro": {
          "total": 26.7,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 32.1,
          "note": "Micklefield citation misapplied; Lawson v Serco and Rome I broadly correct",
          "rank": 6
        }
      }
    },
    {
      "id": 98,
      "category": "Tax",
      "use_case": "permanent_establishment_saastax_nexus",
      "models": {
        "claude-opus-4.8": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 66.5,
          "note": "Exceptional depth; LLC treaty-access flag and EL withdrawal date are standout additions.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 1.4,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 1.4,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 135.9,
          "note": "",
          "rank": 10
        },
        "gpt-5.5": {
          "total": 25.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 145.3,
          "note": "Strong analysis but answer truncated; case citations appear verifiable and jurisdiction-correct",
          "rank": 7
        },
        "o3": {
          "total": 25.1,
          "quality": 8.0,
          "accuracy": 4.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 11.6,
          "note": "Roche Products HCA 24 and Firstenberg citations appear fabricated or misattributed.",
          "rank": 6
        },
        "grok-4.3": {
          "total": 27.6,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 7.5,
          "note": "Commissioner v. VAS Holdings cited without verifiable basis; MLI India-US status overstated",
          "rank": 3
        },
        "mistral-large": {
          "total": 3.7,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.7,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 33.7,
          "note": "",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 5.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 2.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 5.9,
          "note": "Superficial analysis; misses MLI Article 12/15, SEP treaty override nuance, GAAR depth",
          "rank": 8
        },
        "deepseek-v3.2": {
          "total": 27.8,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 59.4,
          "note": "Formula One PE citation misapplied; TR 2010/7 unverified; Morgan Stanley accurate",
          "rank": 2
        },
        "qwen3.7-max": {
          "total": 25.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 108.8,
          "note": "Samsung Electronics PE cite unverified; Morgan Stanley India PE case is real.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 39.5,
          "note": "Formula One PE reasoning misapplied; Mastercard/Nokia citations unverifiable as stated",
          "rank": 4
        }
      }
    },
    {
      "id": 99,
      "category": "Real Estate",
      "use_case": "foreign_ownership_controls_mixed_use_tower",
      "models": {
        "claude-opus-4.8": {
          "total": 30.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 55.8,
          "note": "Accurate, well-structured, cites real statutes, strong cross-jurisdictional REIT-specific flags.",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 126.8,
          "note": "Exceptional depth; minor threshold figures need verification for 2027 indexation.",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 122.3,
          "note": "Thorough, jurisdiction-accurate checklist; strong REIT-specific transposition warnings included.",
          "rank": 5
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 17.9,
          "note": "Accurate, well-structured, actionable; FIRB threshold and ECTEA details correct.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 30.6,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.0,
          "note": "Solid comparative checklist; nominee mis-transposition points well-targeted and accurate.",
          "rank": 2
        },
        "mistral-large": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 38.2,
          "note": "Dubai onshore SPV 51% sponsor rule overstated; ECTEA analysis solid",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 4.9,
          "note": "Superficial analysis; misses FIRB foreign government investor rules, ROE detail, Dubai nuance",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.8,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 39.7,
          "note": "Solid comparative checklist; FTA threshold nuance slightly oversimplified but no hallucinations.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 25.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 108.6,
          "note": "Solid framework; FTA threshold detail and UAE Trust Law citation need verification.",
          "rank": 9
        },
        "gemini-3.1-pro": {
          "total": 28.7,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 32.5,
          "note": "Solid framework; FIRB FGI thresholds and ROE mechanics correctly stated.",
          "rank": 4
        }
      }
    },
    {
      "id": 100,
      "category": "Securities",
      "use_case": "tokenized_equity_cross_border_offering",
      "models": {
        "claude-opus-4.8": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 83.1,
          "note": "Rigorous, well-structured; Ripple/LBRY citations accurate but context slightly stretched.",
          "rank": 4
        },
        "claude-sonnet-4.6": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 137.1,
          "note": "Exceptional depth; minor truncation at end; case citations appear verifiable.",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 24.0,
          "quality": 7.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 140.2,
          "note": "Strong US/EU analysis; Singapore/Brazil/Switzerland sections incomplete or truncated.",
          "rank": 9
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 21.8,
          "note": "Rigorous, well-structured; minor risk on 2027 regulatory evolution assumptions.",
          "rank": 3
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.3,
          "note": "Accurate, well-structured, jurisdiction-correct; Telegram cite verifiable, no hallucinations.",
          "rank": 1
        },
        "mistral-large": {
          "total": 25.8,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 51.9,
          "note": "MAS v. OneCoin 2017 and Atlas Quantum CVM citations appear fabricated/misattributed",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 17.0,
          "quality": 3.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 2.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 6.9,
          "note": "Superficial analysis; step-format unprofessional; conflicts underdeveloped; boxed answer absurd",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 45.4,
          "note": "Solid framework; SEC v. Telegram citation slightly imprecise but not fabricated.",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 25.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 135.2,
          "note": "Telegram cite accurate; Howey cite correct; MiCA exclusion analysis strong.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 37.1,
          "note": "Landreth and MiCA Art.2(4)(a) correctly cited; structuring advice actionable.",
          "rank": 2
        }
      }
    },
    {
      "id": 101,
      "category": "Arbitration",
      "use_case": "state_entity_ppa_renegotiation_enforcement",
      "models": {
        "claude-opus-4.8": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 65.8,
          "note": "Rigorous dual-track analysis; BIT caveat appropriate; case citations appear verifiable.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 134.5,
          "note": "Dalmia/Westacre citations misapplied; Rome I retained-law analysis overstated post-Brexit",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 26.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 160.2,
          "note": "Solid framework but answer cuts off; BIT details and NY Convention enforcement underexplored.",
          "rank": 5
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 18.4,
          "note": "",
          "rank": 9
        },
        "grok-4.3": {
          "total": 29.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 6.1,
          "note": "Solid structure; Nigerian Sovereign Immunity Act citation unverified but plausible.",
          "rank": 3
        },
        "mistral-large": {
          "total": 3.2,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.2,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 61.1,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 17.6,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 2.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 6.1,
          "note": "Step-by-step format unprofessional; shallow analysis, missing BIT specifics and key risks",
          "rank": 8
        },
        "deepseek-v3.2": {
          "total": 25.8,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 62.1,
          "note": "Shell v NNPC cite unverified; Fedax/CMS citations plausible but context stretched",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 24.9,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 125.8,
          "note": "Ralli Bros misapplied; Libyan/Actimon cite unverifiable; Soleimany correctly cited",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 42.3,
          "note": "Ralli Bros and Haugesund correctly cited; Binnenbeziehung analysis sophisticated and accurate.",
          "rank": 1
        }
      }
    },
    {
      "id": 102,
      "category": "Regulatory Compliance",
      "use_case": "health_data_clinical_trial_platform",
      "models": {
        "claude-opus-4.8": {
          "total": 31.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 55.5,
          "note": "Rigorous multi-jurisdictional map; caveats appropriate; no hallucinated citations detected.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 126.6,
          "note": "Exceptional multi-jurisdictional depth; accurate citations; missing Part IV Area 3 and governance mechanisms",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 27.4,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 121.6,
          "note": "Rigorous multi-jurisdiction map; answer truncated before cross-border transfers and HIPAA gaps",
          "rank": 8
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 16.0,
          "note": "Rigorous multi-jurisdictional map; DPDP status caveat appropriately noted.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 30.6,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.4,
          "note": "Fashion ID analogy is reasonable; Lebanon analysis appropriately caveated; DPDP transfer rules noted as pending.",
          "rank": 4
        },
        "mistral-large": {
          "total": 26.8,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 66.6,
          "note": "Solid structure; UK-US Data Bridge health exclusion claim needs verification",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 21.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.8,
          "note": "Superficial analysis; lacks depth on CTR intersection and Lebanon Law 81/2018 specifics.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 89.7,
          "note": "Solid GDPR-plus framework; DISHA flagged as draft; Lebanon law correctly cited.",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 104.8,
          "note": "Rigorous, well-structured; minor gap on Lebanon Law 81 specifics and DPDP rules.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 32.9,
          "note": "Technically rigorous, Lebanon Law 81/2018 article citations need verification",
          "rank": 3
        }
      }
    },
    {
      "id": 103,
      "category": "Criminal/White Collar",
      "use_case": "cross_border_bribery_joint_venture",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 62.8,
          "note": "Esquenazi and Airbus cites verified; two divergences precisely identified.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 129.0,
          "note": "Exceptional depth; Esquenazi and Hoskins citations accurate; minor Egyptian law gaps",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 161.4,
          "note": "Solid multi-jurisdictional framework; answer truncated before completing key comparative analysis.",
          "rank": 6
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 30.1,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 30.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.1,
          "note": "Solid comparative memo; Airbus cite accurate; Egyptian/Qatari law appropriately hedged.",
          "rank": 3
        },
        "mistral-large": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 44.6,
          "note": "SEC v. Oracle 2012, Cass. Crim. cite, Qatar Law 22/2015 unverified",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 19.6,
          "quality": 5.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 8.7,
          "note": "Egyptian/Qatari statute citations unverifiable; shallow analysis throughout; misses key risks",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 64.9,
          "note": "Solid comparative memo; Egyptian Law No. 184/2020 citation needs verification.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 29.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 104.2,
          "note": "Exceptional multi-jurisdictional analysis; accurate statutes; two divergences clearly identified.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 39.3,
          "note": "Airbus cite accurate; FCPA/UKBA divergences precisely identified and well-argued.",
          "rank": 2
        }
      }
    },
    {
      "id": 104,
      "category": "Immigration",
      "use_case": "distributed_founder_residency_and_exit",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 57.1,
          "note": "Simeio cite verified; ILR 180-day rule accurate; thorough cross-jurisdictional analysis",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 141.9,
          "note": "Exceptional cross-jurisdictional depth; statutory cites verifiable; table cut off",
          "rank": 7
        },
        "gpt-5.5": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 136.9,
          "note": "Rigorous, jurisdiction-correct, cites verified; Simeio and IRPA references accurate.",
          "rank": 4
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 17.9,
          "note": "Simeio Solutions cite verified; UAE Cabinet Resolution plausible but unverified.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.0,
          "note": "Accurate, well-structured, actionable; cites real statutes; two blind spots well-chosen.",
          "rank": 1
        },
        "mistral-large": {
          "total": 25.3,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 64.7,
          "note": "Raj and Knoll cite unverifiable; Matter of Hira misapplied; Simeio plausible",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 17.0,
          "quality": 3.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 2.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 6.7,
          "note": "Superficial analysis; misses portability rules, treaty visas, UAE free zone nuances",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 28.2,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 56.6,
          "note": "Solid, jurisdiction-correct brief; H-1B portability nuance well-handled.",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 100.1,
          "note": "Thorough, jurisdiction-accurate brief; FEMA/EOSG/FEIE cross-issues notably strong.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 41.3,
          "note": "Solid framework; minor inaccuracies on TN scope and H-1B portability nuance.",
          "rank": 8
        }
      }
    },
    {
      "id": 105,
      "category": "Bankruptcy & Insolvency",
      "use_case": "cross_border_saas_vendor_insolvency",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 71.8,
          "note": "Rigorous, jurisdiction-specific; Catapult/Footstar circuit split accurately flagged.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 130.3,
          "note": "Re Ascentra 2023 citation needs verification; core analysis is excellent",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 157.4,
          "note": "Rigorous, well-cited analysis; truncated before India/Singapore/ipso facto sections.",
          "rank": 7
        },
        "o3": {
          "total": 28.1,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 23.5,
          "note": "Re Avanti [2023] and Re Flightlease citations appear fabricated or misattributed.",
          "rank": 5
        },
        "grok-4.3": {
          "total": 30.6,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.3,
          "note": "Rubin v Eurofinance cited cautiously; Cambridge Gas reference slightly imprecise but defensible.",
          "rank": 1
        },
        "mistral-large": {
          "total": 3.2,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.2,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 53.4,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 5.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 2.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 6.7,
          "note": "Superficial analysis; wrong Singapore cite; India IBC 234/235 not yet operative",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 76.1,
          "note": "British Eagle citation tangential; UK ipso facto rule slightly overstated but solid overall.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 108.4,
          "note": "Rubin v Eurofinance and A.H. Robins citations appear verifiable and correctly applied.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 28.7,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 36.9,
          "note": "Re PT Visi Media Asia Tbk [2023] SGHC 234 appears fabricated/unverifiable",
          "rank": 3
        }
      }
    },
    {
      "id": 106,
      "category": "International Trade",
      "use_case": "export_controls_ai_chip_resellers",
      "models": {
        "claude-opus-4.8": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 88.3,
          "note": "Thorough multi-jurisdictional memo; FDP/de minimis nuances well-handled; local law caveated appropriately.",
          "rank": 4
        },
        "claude-sonnet-4.6": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 124.3,
          "note": "Exceptional multi-jurisdictional depth; two EAR-gap points well-articulated and original.",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 162.6,
          "note": "Memo truncated mid-sentence; Singapore section incomplete but otherwise rigorous",
          "rank": 8
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 21.7,
          "note": "Comprehensive multi-jurisdictional analysis; minor citation precision issues but no hallucinated case law",
          "rank": 1
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.5,
          "note": "Solid multi-jurisdictional analysis; UAE and EU sections could be deeper.",
          "rank": 2
        },
        "mistral-large": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 56.7,
          "note": "CJEU C-405/16P cite misapplied; EU Reg 821/2021 numbering wrong (2021/821)",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 22.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.8,
          "note": "Superficial analysis; misses deemed exports, FDP rule, specific ECCNs, OFAC overlap",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.8,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 67.8,
          "note": "Solid multi-jurisdictional analysis; UAE framework citation slightly imprecise but not hallucinated.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 107.4,
          "note": "Thorough multi-jurisdictional analysis; UAE Cabinet Decision citation needs verification.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 28.7,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 40.8,
          "note": "Solid multi-jurisdictional analysis; FDPR/SGCA treatment accurate and actionable.",
          "rank": 6
        }
      }
    },
    {
      "id": 107,
      "category": "Environmental/ESG",
      "use_case": "carbon_credit_derivatives_misreporting",
      "models": {
        "claude-opus-4.8": {
          "total": 30.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 75.7,
          "note": "Rigorous, well-caveated, correct case law, strong CoL analysis.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 142.9,
          "note": "Exceptional depth; minor truncation; Lei 15.042/2024 and ISA 2024 impressively current.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 24.0,
          "quality": 7.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 178.2,
          "note": "Strong EU/UK analysis but Brazil, Nigeria, and conflict-of-laws sections missing entirely.",
          "rank": 10
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 24.1,
          "note": "Comprehensive, well-structured; minor risk CVM cite unverifiable but plausible",
          "rank": 1
        },
        "grok-4.3": {
          "total": 30.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 7.1,
          "note": "Solid multi-jurisdictional analysis; COL issues well-framed; no hallucinated cites.",
          "rank": 3
        },
        "mistral-large": {
          "total": 24.2,
          "quality": 7.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 43.4,
          "note": "Basic v Levinson misapplied; Spector/CDC citations questionable; CMA v BMW unverified",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 24.6,
          "quality": 6.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 7.2,
          "note": "Solid framework but shallow on EMIR, greenwashing rules, and enforcement gaps.",
          "rank": 8
        },
        "deepseek-v3.2": {
          "total": 25.8,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 68.2,
          "note": "Basic Inc. v. Levinson cited inappropriately; US precedent misapplied here",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 26.9,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 90.3,
          "note": "Solid cross-jurisdictional analysis; Rome II and MAR application well-handled.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 33.0,
          "note": "Solid framework; MAR venue-trading caveat well-handled; Rome II analysis competent.",
          "rank": 5
        }
      }
    },
    {
      "id": 108,
      "category": "Construction",
      "use_case": "mega_hospital_epc_multilaw_risk",
      "models": {
        "claude-opus-4.8": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 85.1,
          "note": "Rigorous multi-jurisdictional analysis; Belokon cite plausible but verify.",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 141.8,
          "note": "Larsen & Toubro Delhi HC 2012 and Belokon citations unverified/likely hallucinated",
          "rank": 7
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 177.1,
          "note": "Thorough, well-structured; French seat section truncated but otherwise production-ready.",
          "rank": 5
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 20.2,
          "note": "Shri Lal Mahal citation misapplied; Decree 2023-357 unverified; core analysis strong",
          "rank": 2
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 7.2,
          "note": "Solid multi-jurisdictional memo; Qatari article citations need verification.",
          "rank": 4
        },
        "mistral-large": {
          "total": 22.2,
          "quality": 7.0,
          "accuracy": 3.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 45.0,
          "note": "Multiple fabricated cases; Al Attiyah, Qatar Foundation, SNF citations unverifiable",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 20.0,
          "quality": 5.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 6.3,
          "note": "Article 34 Law 24/2016 unverified; shallow OHADA/Indian law analysis throughout",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 24.8,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 65.9,
          "note": "Saw Pipes misapplied; Qatar Civil Code article numbers unverifiable; SNF cite plausible",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 120.3,
          "note": "Kailash Nath citation plausible; Alstom/Belokon citations misapplied or hallucinated",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 40.7,
          "note": "Technically strong, jurisdiction-specific, actionable; minor risk on Article 266 framing.",
          "rank": 1
        }
      }
    },
    {
      "id": 109,
      "category": "Insurance",
      "use_case": "cyber_attack_cross_border_coverage",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 66.4,
          "note": "Safeway v Twigger cited cautiously and correctly flagged as analogous only.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 139.5,
          "note": "Exceptional depth; Goldfarb cite accurate; minor uncertainty on some English authorities",
          "rank": 2
        },
        "gpt-5.5": {
          "total": 25.0,
          "quality": 7.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 203.6,
          "note": "Solid structure but answer cuts off mid-sentence; missing Japan/NY/Swiss fine analysis",
          "rank": 6
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 22.0,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 27.6,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 8.8,
          "note": "Les Laboratoires Servier misapplied; Hartford cite strained but plausible",
          "rank": 3
        },
        "mistral-large": {
          "total": 23.2,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 44.4,
          "note": "Multiple hallucinated/misapplied cases; Solo Capital, AIG, Ace citations misused",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 5.0,
          "accuracy": 2.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 6.5,
          "note": "Multiple hallucinated cases; Les Fils Dreyfus, Richardson v Mellish misapplied, Reliance fabricated",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 24.3,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 70.6,
          "note": "Multiple hallucinated/misapplied citations: Servier, Burr, IRB-Brasil, EIOPA Opinion",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 26.9,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 102.4,
          "note": "Safeway v Twigger misapplied; Home Ins NY cite needs verification",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 26.7,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 32.5,
          "note": "Safeway v Twigger misapplied; BGE 143 II 8 unverified; Shearson plausible",
          "rank": 5
        }
      }
    },
    {
      "id": 110,
      "category": "Trusts & Estates",
      "use_case": "cross_border_digital_assets_estate",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 70.9,
          "note": "Rigorous multi-jurisdictional analysis; minor uncertainty on Singapore Hague status.",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 131.9,
          "note": "CLM v CLN cite verified; Pflichtteil reform figures slightly imprecise but minor",
          "rank": 7
        },
        "gpt-5.5": {
          "total": 26.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 184.7,
          "note": "Solid multi-jurisdictional analysis; answer truncated before completing forced heirship section.",
          "rank": 8
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 20.5,
          "note": "Rigorous, well-structured; minor uncertainty on DIFC Art.24 citation specificity.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 13.1,
          "note": "Solid framework; minor statutory imprecisions; forced heirship quantum slightly overstated.",
          "rank": 6
        },
        "mistral-large": {
          "total": 23.7,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 33.2,
          "note": "BGE 144 III 189 and In re Coinbase citations appear fabricated or misapplied",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 18.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 2.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.4,
          "note": "Superficial analysis; misses Singapore situs, FEMA specifics, tokenization mechanics",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 92.9,
          "note": "Rigorous multi-jurisdictional analysis; California Hague Convention claim needs verification.",
          "rank": 4
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 114.7,
          "note": "Rigorous cross-border analysis; DIFC Trust Law article citation unverified but plausible.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 31.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 34.0,
          "note": "Rigorous, jurisdiction-correct analysis; DIFC Law No.2/2024 reference plausible but unverified.",
          "rank": 1
        }
      }
    },
    {
      "id": 111,
      "category": "Government Contracts",
      "use_case": "defense_software_offset_localization",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 77.2,
          "note": "Exceptional multi-jurisdiction analysis; correct citations; actionable structuring recommendations throughout.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 140.2,
          "note": "Exceptionally thorough, accurate multi-jurisdiction analysis; truncated at Conflict 2 resolution.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 171.7,
          "note": "Thorough, accurate, well-structured but answer cuts off mid-sentence in Section 3.",
          "rank": 8
        },
        "o3": {
          "total": 27.1,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 22.0,
          "note": "CJEU C-187/16 citation unverifiable; DAP 2020 section numbers likely fabricated",
          "rank": 7
        },
        "grok-4.3": {
          "total": 30.6,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 13.0,
          "note": "Technically precise, well-structured, no hallucinated cites, actionable conflicts identified.",
          "rank": 3
        },
        "mistral-large": {
          "total": 25.2,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 60.6,
          "note": "Union of India v. Cessna AIR 1990 Del 18 appears fabricated/unverifiable",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 21.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 6.9,
          "note": "Superficial treatment; misses TAA/MLA nuance, KSA ITAR restrictions, EU Directive 2009/43/EC",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 81.2,
          "note": "Technically strong, jurisdiction-accurate, actionable; minor USML category imprecision noted.",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 113.8,
          "note": "Technically rigorous, well-structured, no hallucinated cites, actionable structuring strategies.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 45.5,
          "note": "Technically precise, modular IP structuring advice is actionable and non-obvious.",
          "rank": 2
        }
      }
    },
    {
      "id": 112,
      "category": "AI/Tech Regulation",
      "use_case": "foundation_model_risk_policy_multi_regime",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 46.2,
          "note": "Precise citations, actionable architecture, strong conflict analysis, no hallucinations detected.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 61.5,
          "note": "Rigorous multi-regime analysis; minor EO 14110 status caveat appropriately flagged.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 89.0,
          "note": "Precise citations, strong architecture, minor gap on UK statutory developments",
          "rank": 6
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 17.8,
          "note": "Modular architecture well-executed; minor EU Act article numbering imprecision noted.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 29.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 7.4,
          "note": "PDPA 3-day breach notice slightly mischaracterized; otherwise solid cross-regime analysis.",
          "rank": 5
        },
        "mistral-large": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 42.4,
          "note": "Bolger v. Amazon and FTC v. Weight Watchers misapplied; article citations imprecise",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 18.6,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 9.1,
          "note": "Cal. Bus. & Prof. Code §17941 on AI disclosure is fabricated; shallow analysis throughout",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.9,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 70.8,
          "note": "Solid modular architecture; EO 14110 details slightly overstated but no fabricated caselaw.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 25.4,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 86.0,
          "note": "Solid architecture; CIRCIA scope overstated; EO 14110 superseded by EO 14179.",
          "rank": 9
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 33.5,
          "note": "Rigorous Core-and-Spoke architecture; EO 14110 compute threshold slightly imprecise.",
          "rank": 3
        }
      }
    },
    {
      "id": 113,
      "category": "Corporate Governance",
      "use_case": "dual_listed_corporate_opportunity_conflict",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 67.3,
          "note": "Rigorous dual-jurisdiction analysis; MFW, §122(17), s.175 all correctly applied.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 1.4,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 1.4,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 132.6,
          "note": "",
          "rank": 10
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 149.4,
          "note": "Thorough, well-cited; answer truncated before completing NY/forum analysis.",
          "rank": 4
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 17.8,
          "note": "",
          "rank": 8
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.7,
          "note": "Guth and Broz correctly cited; dual-board process well-structured and actionable.",
          "rank": 1
        },
        "mistral-large": {
          "total": 3.2,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.2,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 66.5,
          "note": "",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 5.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 7.2,
          "note": "Toner v. Baltimore Envelope is hallucinated; analysis is superficial throughout",
          "rank": 7
        },
        "deepseek-v3.2": {
          "total": 27.3,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 78.8,
          "note": "Guth and Broz correctly cited; solid dual-board analysis; NY law underdeveloped",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 105.8,
          "note": "Excellent cross-jurisdictional analysis; MFW cite accurate; Edgar v MITE slightly misapplied",
          "rank": 3
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 37.5,
          "note": "Solid analysis; VantagePoint cite slightly misapplied but cases otherwise accurate.",
          "rank": 5
        }
      }
    },
    {
      "id": 114,
      "category": "Contract & Commercial",
      "use_case": "multi_tier_dispute_resolution_clauses_alignment",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 72.2,
          "note": "Exceptional cross-jurisdictional depth; Enka/Kabab-Ji/Dalico citations verified and accurate.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 119.5,
          "note": "Banyan Tree/DNB citations plausible but Enka UKSC 38 ratio slightly mischaracterized",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 26.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 152.5,
          "note": "Strong clause; commentary truncated mid-sentence, missing OHADA/French mandatory rules analysis",
          "rank": 8
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 17.0,
          "note": "Enka v Chubb and Emirates Trading cited correctly; OHADA analysis sophisticated.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.2,
          "note": "Solid clause; OHADA-CCJA conflict and UAE Federal Law citation need deeper analysis.",
          "rank": 5
        },
        "mistral-large": {
          "total": 25.7,
          "quality": 7.0,
          "accuracy": 6.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 38.7,
          "note": "CIArb 2023 Protocol citation unverifiable; SIAC Rules should cite 2025 edition",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.5,
          "note": "Shallow commentary; missing OHADA NY Convention gaps, DIFC-onshore bridge, separability drafting",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.3,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 74.4,
          "note": "Solid clause; commentary covers key traps but OHADA analysis could deepen.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 116.3,
          "note": "Exceptional cross-jurisdictional depth; Kabab-Ji and Fiona Trust citations verified correct.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 41.0,
          "note": "Peterson Farms, Enka, Kabab-Ji, Anupam Mittal all verifiable; strong cross-jurisdictional analysis.",
          "rank": 3
        }
      }
    },
    {
      "id": 115,
      "category": "Banking/Finance",
      "use_case": "cross_border_security_package_priority_map",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 89.8,
          "note": "Exceptional cross-border analysis; UCC §9-307(c) BVI/DC filing point is precise.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 126.1,
          "note": "Exceptional multi-jurisdictional depth; answer truncated before completing Parts B and C",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 154.4,
          "note": "Technically strong, truncated before Nigeria/OHADA/intercreditor undermining analysis",
          "rank": 8
        },
        "o3": {
          "total": 29.1,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 18.4,
          "note": "STJ REsp 1.340.553/SP and Re: Intercontractors citations unverified/likely hallucinated",
          "rank": 5
        },
        "grok-4.3": {
          "total": 30.6,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 10.9,
          "note": "Solid multi-jurisdictional analysis; STMAA 2017 and OHADA citations verified.",
          "rank": 2
        },
        "mistral-large": {
          "total": 23.2,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 56.9,
          "note": "Multiple fabricated case citations across all jurisdictions undermine otherwise solid framework",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 21.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 8.3,
          "note": "Superficial analysis; misses CAMA 2020, RCCM details, intercreditor depth",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 115.7,
          "note": "Technically strong, jurisdiction-accurate, minor gaps in STMA control perfection detail.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 120.3,
          "note": "Rigorous multi-jurisdictional analysis; pactum commissorium and registry conflicts expertly flagged.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 38.0,
          "note": "203 N. LaSalle cited tangentially but not directly on point; otherwise solid.",
          "rank": 3
        }
      }
    },
    {
      "id": 116,
      "category": "Data Privacy",
      "use_case": "clinical_trial_data_sharing_conflict_matrix",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 87.2,
          "note": "Exceptional depth; ANPD Resolução 19/2024 and DPF extension verified.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 134.4,
          "note": "Exceptional depth; truncated LGPD section; Breyer cite accurate and relevant",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 22.0,
          "quality": 6.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 154.0,
          "note": "Solid GDPR analysis but answer is incomplete; missing UK, Canada, Japan, LGPD sections",
          "rank": 9
        },
        "o3": {
          "total": 32.1,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 24.5,
          "note": "Thorough, jurisdiction-accurate, actionable; federated learning contingency adds real value.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.8,
          "note": "Solid matrix; minor LGPD Art.5§3 localization claim overstated slightly.",
          "rank": 4
        },
        "mistral-large": {
          "total": 25.7,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 44.9,
          "note": "Multiple hallucinated citations: P-25-01, ANPD v. Telefônica, UK-US Bridge health exclusion",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 22.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 8.7,
          "note": "Superficial analysis; misses genomic-specific rules, APPI 2022 reforms, LGPD gaps.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 25.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 99.2,
          "note": "Canada GDPR adequacy claim overstated; LGPD transfer mechanisms slightly underdeveloped",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 128.6,
          "note": "ANPD Resolution 15/2024 unverified; Gymrek cite plausible but peripheral",
          "rank": 3
        },
        "gemini-3.1-pro": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 50.6,
          "note": "Solid matrix; ANPD Resolution 19/2024 needs verification; PHIPA nuances strong.",
          "rank": 6
        }
      }
    },
    {
      "id": 117,
      "category": "M&A",
      "use_case": "public_private_cross_border_takeover_structuring",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 86.2,
          "note": "Thorough, well-structured; minor gaps in UAE PDPL detail and truncated Scenario 3",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 135.0,
          "note": "Rigorous multi-jurisdictional analysis; SEC/earn-out scenarios incomplete but strong overall",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 26.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 149.7,
          "note": "Solid multi-jurisdictional analysis; memo truncated mid-sentence on FIRB section.",
          "rank": 8
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 22.0,
          "note": "Rigorous multi-jurisdictional memo; minor ASIC item 14 framing imprecision",
          "rank": 1
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 11.3,
          "note": "Solid multi-jurisdictional memo; FIRB thresholds and UAE reforms accurately flagged.",
          "rank": 3
        },
        "mistral-large": {
          "total": 25.2,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 74.0,
          "note": "Mengniu/CK Asset FIRB cases misapplied; Sun Pharma-Ranbaxy scheme cite questionable",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 22.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 8.8,
          "note": "Superficial analysis; SEBI open offer inapplicable to unlisted target; incomplete scenarios",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 116.9,
          "note": "Solid multi-jurisdictional memo; minor gaps in SEBI open offer threshold precision.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 120.1,
          "note": "Thorough, well-structured; minor gaps on s611 item 14 applicability nuance.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 55.2,
          "note": "Solid structure; FEMA 25%/18-month earn-out rule needs verification for 2027.",
          "rank": 5
        }
      }
    },
    {
      "id": 118,
      "category": "Employment Law",
      "use_case": "remote_executive_multi_regime_termination_package",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 80.2,
          "note": "Exceptional multi-regime analysis; CO Art.340c(2) insight is decisive and correct.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 135.9,
          "note": "Exceptional depth; CO Art.340c(2) insight outstanding; truncated before Singapore/conflicts conclusion.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 168.7,
          "note": "Rigorous multi-jurisdictional analysis; answer truncated mid-sentence but otherwise excellent",
          "rank": 2
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 21.4,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.7,
          "note": "Solid multi-regime analysis; Rome I retained-UK citation slightly imprecise but defensible.",
          "rank": 3
        },
        "mistral-large": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 73.7,
          "note": "BGE citations unverifiable; Office Angels/Sullivan applications overstated; solid framework overall",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 2.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 6.9,
          "note": "Superficial analysis; step-list format unprofessional; misses key conflicts-of-law depth",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 24.3,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 93.5,
          "note": "Workers Act 2023 misapplied; Rome I retained EU law framing post-Brexit oversimplified",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 112.3,
          "note": "Vertex Data Science v Lynn and Micklefield citations appear fabricated or misattributed.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 38.6,
          "note": "Solid multi-regime analysis; California carve-out and whistleblower safe harbor well-handled.",
          "rank": 6
        }
      }
    },
    {
      "id": 119,
      "category": "IP/Tech Law",
      "use_case": "multijurisdiction_generative_music_rights_clearance",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 84.9,
          "note": "Rigorous, jurisdiction-accurate, flags unsettled law; Gaylord cite slightly strained",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 135.2,
          "note": "Rigorous, jurisdiction-specific, correctly flags hallucination risks; truncated at output licensing.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 26.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 157.3,
          "note": "Solid multi-jurisdiction analysis; truncated before Japan/framework sections completed",
          "rank": 8
        },
        "o3": {
          "total": 27.1,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 17.0,
          "note": "Skidmore v. Led Zeppelin conflated; Blurred Lines misattributed; NYT case pending",
          "rank": 6
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 11.9,
          "note": "Solid framework; s.29A TDM scope slightly misstated, minor gaps on DMCA 512.",
          "rank": 3
        },
        "mistral-large": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 51.4,
          "note": "TuneIn v. Warner misapplied; Viacom/Ziggo citations strained for AI context",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.0,
          "note": "Superficial analysis; misattributes pastiche exception article; lacks actionable specifics",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 25.3,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 87.4,
          "note": "Thomson Reuters v. Ross cite misapplied; Williams v. Bridgeport citation inaccurate",
          "rank": 9
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 100.6,
          "note": "Rigorous, well-structured; minor gap on Japan moral rights and Brazil enforcement nuance.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 28.7,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 41.8,
          "note": "Solid multi-jurisdictional analysis; Pelham citation accurate; actionable framework delivered.",
          "rank": 2
        }
      }
    },
    {
      "id": 120,
      "category": "Arbitration",
      "use_case": "state_entity_renewables_tariff_rebalancing",
      "models": {
        "claude-opus-4.8": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 68.8,
          "note": "Rigorous, well-structured; BIT verification caveats appropriately flagged throughout.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 129.3,
          "note": "Tecmed/Metalclad/Pantechniki cited plausibly but unverified; Achmea framing correct",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 149.7,
          "note": "Rigorous BIT/ICSID analysis, verified citations, strong fork-in-the-road treatment.",
          "rank": 4
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 20.6,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.5,
          "note": "Solid analysis, correct citations, actionable steps, no hallucinated case law.",
          "rank": 1
        },
        "mistral-large": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 54.2,
          "note": "Salini, Vivendi, Tecmed cited plausibly but article numbers/BIT details unverified",
          "rank": 6
        },
        "llama-4-maverick": {
          "total": 22.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 6.6,
          "note": "Superficial analysis; misses MFN clauses, umbrella clauses, and treaty shopping depth.",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 25.8,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 67.5,
          "note": "Micula/SGS/Pantechniki citations plausible but unverified; NL-Egypt BIT details assumed",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 24.9,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 119.4,
          "note": "Trust Risk Group cite misapplied; Philip Morris/Pac Rim citations plausible but context strained",
          "rank": 8
        },
        "gemini-3.1-pro": {
          "total": 28.7,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 30.3,
          "note": "Strong analysis; Pantechniki/SGS/Ampal citations real but applied selectively",
          "rank": 3
        }
      }
    },
    {
      "id": 121,
      "category": "Regulatory Compliance",
      "use_case": "multi_license_global_stablecoin_regime_comparison",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 86.6,
          "note": "Rigorous, well-caveated memo; citations verifiable; conflicts insightful and actionable.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 25.4,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 128.5,
          "note": "Solid framework; truncated at Conflict 2; Howey cite verified and accurate.",
          "rank": 8
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 157.8,
          "note": "Thorough, well-cited memo; Howey cite accurate; MiCA Art references verified.",
          "rank": 5
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 23.6,
          "note": "Rigorous, well-structured memo; minor uncertainty on NYDFS Dec 2023 guidance specifics.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 11.6,
          "note": "Solid enacted-law grounding; conflicts analysis practical; uncertainty flags appropriate.",
          "rank": 3
        },
        "mistral-large": {
          "total": 24.2,
          "quality": 7.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 63.3,
          "note": "FCA v OneCoin, ESMA guideline cite, MAS v Binance likely hallucinated",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 21.0,
          "quality": 5.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 9.3,
          "note": "Superficial analysis; misclassifies USD-peg as ART not EMT; conflicts section weak",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 25.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 115.3,
          "note": "MiCA ART vs EMT classification error; USD-peg likely EMT not ART",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 114.7,
          "note": "Rigorous, well-structured; Tether CFTC cite slightly imprecise but not fabricated.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 26.7,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 47.5,
          "note": "CFTC v. Tether cited as precedent; no such finalized case exists",
          "rank": 6
        }
      }
    },
    {
      "id": 122,
      "category": "Tax",
      "use_case": "remote_saas_permanent_establishment_matrix",
      "models": {
        "claude-opus-4.8": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 74.8,
          "note": "Exceptional depth; MLI/US-treaty point and Nigeria 2025 Acts well-flagged.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 139.7,
          "note": "Exceptional depth; LLC treaty access flag, SEP, service PE, Nigeria gap outstanding.",
          "rank": 2
        },
        "gpt-5.5": {
          "total": 25.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 171.9,
          "note": "Strong India/Germany analysis; truncated before Nigeria/Canada mitigation strategies completed",
          "rank": 6
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 24.6,
          "note": "",
          "rank": 9
        },
        "grok-4.3": {
          "total": 27.6,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 11.1,
          "note": "eBay India, Samsung Electronics, Prévost Car citations misapplied or unverifiable here",
          "rank": 3
        },
        "mistral-large": {
          "total": 2.8,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 2.8,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 65.6,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 16.0,
          "quality": 3.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 2.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.3,
          "note": "Superficial analysis; lacks treaty specifics, Nigeria detail, and actionable mitigation depth.",
          "rank": 8
        },
        "deepseek-v3.2": {
          "total": 22.2,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 51.7,
          "note": "Dudney 2000 and BFH 2002 citations unverifiable; Morgan Stanley misapplied to home office PE",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 25.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 121.6,
          "note": "Strong structure; Nigeria SEP threshold and US-India treaty nuances slightly oversimplified.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 26.7,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 49.2,
          "note": "Mastercard AAR cite unverifiable; BFH III R 35/20 requires verification",
          "rank": 4
        }
      }
    },
    {
      "id": 123,
      "category": "Bankruptcy & Insolvency",
      "use_case": "crypto_exchange_group_insolvency_waterfall",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 86.1,
          "note": "Exceptional cross-jurisdictional depth; Swiss DLT Act and Celsius analysis precise.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 139.4,
          "note": "Exceptional depth; Part III truncated; citations appear verifiable and jurisdiction-correct",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 26.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 152.9,
          "note": "Solid, well-cited analysis cut off mid-sentence; incomplete but legally sound.",
          "rank": 8
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 24.8,
          "note": "Cred Inc. cite unverifiable; Celsius citation slightly misrendered; otherwise strong.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.7,
          "note": "Celsius cite is real; Swiss Model Law gap correctly flagged; solid cross-border analysis",
          "rank": 3
        },
        "mistral-large": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 57.2,
          "note": "Bear Stearns/Stanford citations misapplied; In re Lehman Swiss cite unverifiable",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 15.0,
          "quality": 3.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 1.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 6.9,
          "note": "Superficial analysis, poor formatting, boxed number ending is inappropriate",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 25.3,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 87.9,
          "note": "Re CA Pacific and Quantum Materials citations are questionable/misapplied hallucinations",
          "rank": 9
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 128.6,
          "note": "Celsius 2d Cir cite and Tulip Trading characterization are inaccurate/hallucinated",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 45.6,
          "note": "Bear Stearns cite strained; ByBit and Celsius correctly applied; solid framework",
          "rank": 6
        }
      }
    },
    {
      "id": 124,
      "category": "Real Estate",
      "use_case": "sharia_compliant_cross_border_hotel_finance",
      "models": {
        "claude-opus-4.8": {
          "total": 29.5,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.5,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 72.8,
          "note": "Rigorous multi-jurisdictional structure; AAOIFI/OHADA citations verifiable; caveats appropriate.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 27.5,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.5,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 138.1,
          "note": "Marriott v. Eden Roc cite unverifiable; AAOIFI/OHADA analysis otherwise strong",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 26.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 151.7,
          "note": "Solid structure but conflicts analysis incomplete; OHADA enforcement gaps underexplored.",
          "rank": 8
        },
        "o3": {
          "total": 30.0,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 24.3,
          "note": "Board of Grievances case numbers appear fabricated; AAOIFI/IIFM refs solid",
          "rank": 1
        },
        "grok-4.3": {
          "total": 29.5,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.5,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.7,
          "note": "Solid AAOIFI/OHADA grounding; waterfall and step-in conflicts well-articulated.",
          "rank": 3
        },
        "mistral-large": {
          "total": 26.5,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.5,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 39.9,
          "note": "UAE Federal Law No. 14/2020 on Mortgages and No. 4/2020 citations unverifiable",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 8.3,
          "note": "Superficial analysis; misses OHADA SPV IP issues, KSA foreign ownership limits",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 28.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 3.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 69.1,
          "note": "Solid structure, accurate citations, good conflict analysis, mitigants somewhat generic",
          "rank": 5
        },
        "qwen3.7-max": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 112.6,
          "note": "Sophisticated multi-jurisdictional structure; KSA Civil Transactions Law cite needs verification.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "status": "error",
          "err": "empty completion (reasoning consumed max_tokens)"
        }
      }
    },
    {
      "id": 125,
      "category": "Securities",
      "use_case": "dual_listing_spac_de_spac_disclosure",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 81.7,
          "note": "Rigorous multi-regime analysis; PSLRA SPAC safe-harbor removal characterization slightly imprecise.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 128.8,
          "note": "Technically strong, well-structured; minor uncertainty on some 2024 rule specifics",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 23.0,
          "quality": 7.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 159.4,
          "note": "Solid framework but answer truncates mid-sentence; misses Part III requirements entirely",
          "rank": 9
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 24.4,
          "note": "Thorough cross-border analysis; minor risk on 2022 SEC rule finalization assumptions.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 11.4,
          "note": "Release 33-11265 citation requires verification; LR 5.6 reference may be imprecise.",
          "rank": 3
        },
        "mistral-large": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 49.3,
          "note": "Solid framework; minor inaccuracies in LR citations and SRD II thresholds.",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 17.0,
          "quality": 3.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 2.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 6.5,
          "note": "Superficial framework overview; lacks concrete disclosures, specific rules, actionable analysis.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 25.3,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 110.6,
          "note": "SEC v. AT&T cited incorrectly; ESMA document number unverifiable; otherwise solid",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 129.4,
          "note": "Technically strong; minor PSLRA nuance but no hallucinated citations detected.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 46.8,
          "note": "SEC Release 33-11265 and Item 1609 citations require verification; MultiPlan cite plausible",
          "rank": 6
        }
      }
    },
    {
      "id": 126,
      "category": "Criminal/White Collar",
      "use_case": "cross_border_beneficial_ownership_investigation",
      "models": {
        "claude-opus-4.8": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 91.4,
          "note": "Comprehensive, well-structured; minor truncation at end; citations verifiable.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 131.3,
          "note": "Exceptional depth; Hoskins citation accurate; UAE/Swiss analysis sophisticated and current",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 24.0,
          "quality": 7.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 202.9,
          "note": "Solid framework but memo truncated; missing Swiss, double-jeopardy, and strategy sections.",
          "rank": 8
        },
        "o3": {
          "total": 28.1,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 24.1,
          "note": "Control Components/Hoskins citations misapplied; Bundesgericht cite unverifiable; otherwise strong",
          "rank": 5
        },
        "grok-4.3": {
          "total": 27.6,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 10.5,
          "note": "ENRC v SFO citation inaccurate; Hoskins holding misstated; DOJ-SFO MOU unverified",
          "rank": 6
        },
        "mistral-large": {
          "total": 3.2,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.2,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 59.2,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 6.7,
          "note": "UAE Federal Law No.24/2009 and No.2/2008 citations appear fabricated/misattributed",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 26.8,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 73.5,
          "note": "UAE statute citations imprecise; ENRC case citation accurate but context slightly off",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 108.9,
          "note": "Exceptional depth; ENRC/Upjohn/Airbus cites verifiable; Swiss ML analysis precise.",
          "rank": 3
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 36.2,
          "note": "Solid memo; SFO v ENRC and Three Rivers citations are real and accurate.",
          "rank": 1
        }
      }
    },
    {
      "id": 127,
      "category": "International Trade",
      "use_case": "dual_use_semiconductor_tool_distribution_split",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 78.6,
          "note": "Rigorous multi-regime analysis; FDP/de minimis cumulative framing is excellent.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 130.6,
          "note": "Exceptionally thorough, jurisdiction-correct, no hallucinated cites; answer cut off mid-scenario",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 151.8,
          "note": "Thorough multi-jurisdictional analysis; memo cut off before completing UAE/scenarios sections",
          "rank": 9
        },
        "o3": {
          "total": 28.1,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 17.0,
          "note": "Solid multi-regime analysis; minor ECCN/FDPR imprecisions but no fabricated cites.",
          "rank": 5
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.5,
          "note": "Solid multi-jurisdictional analysis; FDPR cite slightly imprecise but no hallucinations.",
          "rank": 3
        },
        "mistral-large": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 61.2,
          "note": "UAE Group D:5 terrorism-list framing overstated; FDP analysis solid overall",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 6.6,
          "note": "Superficial analysis; misses FDP rule specifics, Entity List, MENA red flags",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.3,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 96.3,
          "note": "Solid multi-jurisdictional analysis; FDP rule citations accurate; UAE D:1 correct.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 118.2,
          "note": "Technically strong, FDPR/de minimis analysis precise, agency model recommendation practical.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 54.7,
          "note": "Strong multi-regime analysis; ITAR 0% de minimis framing slightly imprecise",
          "rank": 2
        }
      }
    },
    {
      "id": 128,
      "category": "Environmental/ESG",
      "use_case": "supply_chain_deforestation_due_diligence_vs_local_law",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 71.2,
          "note": "Rigorous, well-caveated cross-jurisdictional analysis with appropriate local-counsel disclaimers.",
          "rank": 4
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 134.2,
          "note": "Exceptional depth; Williams v. Gerber cite verifiable; minor truncation at end",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 144.2,
          "note": "Thorough, well-cited, production-ready; answer truncated before governance framework recommendations.",
          "rank": 7
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 22.9,
          "note": "Supreme Court Decision 291/PK/TUN/2017 and FUNAI Ordinance 14/2021 unverifiable.",
          "rank": 3
        },
        "grok-4.3": {
          "total": 32.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 9.2,
          "note": "Precise citations, actionable framework, strong cross-jurisdictional tension analysis throughout.",
          "rank": 1
        },
        "mistral-large": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 64.0,
          "note": "Solid framework; Decree 10,222/2020 scope overstated but no fabricated cases.",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 6.5,
          "note": "Indonesian Forestry Law No.41/1999 cited as 4/1996; shallow analysis throughout",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.3,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 72.6,
          "note": "Solid cross-jurisdictional analysis; accurate citations; actionable framework; minor gaps in depth.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 99.8,
          "note": "Exceptional cross-jurisdictional analysis; data clean room and anti-greenwashing firewall are standout recommendations.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 33.6,
          "note": "Rigorous cross-jurisdictional analysis; bifurcated marketing strategy particularly actionable.",
          "rank": 2
        }
      }
    },
    {
      "id": 129,
      "category": "Insurance",
      "use_case": "pandemic_and_cyber_rider_reinsurer_dispute",
      "models": {
        "claude-opus-4.8": {
          "total": 2.3,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 2.3,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 94.6,
          "note": "",
          "rank": 9
        },
        "claude-sonnet-4.6": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 133.8,
          "note": "Exceptional depth; case citations appear accurate; answer cut off mid-sentence",
          "rank": 1
        },
        "gpt-5.5": {
          "total": 25.4,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 149.0,
          "note": "Solid comparative framework; answer truncated before divergences and dispute strategy sections.",
          "rank": 5
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 28.8,
          "note": "",
          "rank": 7
        },
        "grok-4.3": {
          "total": 25.6,
          "quality": 7.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 9.8,
          "note": "West v Zurich unverified; OR Art.33 misapplied; MIA analogy overstated",
          "rank": 4
        },
        "mistral-large": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 63.1,
          "note": "Several citations unverifiable or misapplied; Roman Catholic Diocese case mischaracterized.",
          "rank": 3
        },
        "llama-4-maverick": {
          "total": 17.0,
          "quality": 4.0,
          "accuracy": 3.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 7.7,
          "note": "Yingkou cite fabricated; Swiss law shallow; strategy generic, lacks arbitral coordination depth",
          "rank": 6
        },
        "deepseek-v3.2": {
          "total": 2.8,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 2.8,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 93.2,
          "note": "",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 1.0,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 1.0,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 158.5,
          "note": "",
          "rank": 10
        },
        "gemini-3.1-pro": {
          "total": 26.7,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 43.2,
          "note": "Jaw the Pointe TX case misattributed; Album Realty citation misapplied to ACC doctrine",
          "rank": 2
        }
      }
    },
    {
      "id": 130,
      "category": "Healthcare/Pharma",
      "use_case": "cross_border_telemedicine_platform_risk",
      "models": {
        "claude-opus-4.8": {
          "total": 31.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 78.1,
          "note": "Comprehensive, well-cited, actionable; UAE localization and AI Act coverage excellent.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 132.9,
          "note": "Discover Bank arbitration cite misapplied; Engalla and Nedlloyd broadly accurate",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 25.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 176.2,
          "note": "Solid US/Canada/India analysis; UAE/conflicts-of-law/data sections incomplete/truncated.",
          "rank": 7
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 30.3,
          "note": "",
          "rank": 9
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.5,
          "note": "Solid, well-structured analysis; citations verifiable; UAE detail slightly thin.",
          "rank": 3
        },
        "mistral-large": {
          "total": 2.3,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 2.3,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 91.3,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 24.0,
          "quality": 6.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 7.4,
          "note": "Adequate coverage but shallow analysis; misses UAE DHCC/DHA specifics and EU AI Act status",
          "rank": 8
        },
        "deepseek-v3.2": {
          "total": 27.8,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 85.9,
          "note": "Solid, well-structured analysis; citations verifiable; minor gaps in UAE/Canada depth.",
          "rank": 5
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 102.6,
          "note": "Thorough, well-structured; M/S Bremen cite slightly misapplied but not hallucinated.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 37.8,
          "note": "Thorough, well-structured; minor citation risks on UAE Federal Law No. 42/2022.",
          "rank": 2
        }
      }
    },
    {
      "id": 131,
      "category": "Government Contracts",
      "use_case": "defense_cloud_sovereignty_multistandard_bid",
      "models": {
        "claude-opus-4.8": {
          "total": 30.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 94.5,
          "note": "Exceptional cross-jurisdictional analysis; Schrems II citation accurate and well-applied.",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 139.6,
          "note": "Seagate BIS cite unverifiable; otherwise exceptional multi-jurisdictional depth and structure.",
          "rank": 8
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 169.0,
          "note": "Rigorous multi-jurisdiction analysis; Schrems II cite accurate; memo cut off mid-sentence.",
          "rank": 6
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 34.1,
          "note": "Comprehensive, well-structured; minor risk on unverified NATO TA citation number.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 11.5,
          "note": "Technically sound; Microsoft Ireland reference slightly imprecise but not hallucinated.",
          "rank": 1
        },
        "mistral-large": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 53.7,
          "note": "Solid framework; DoD 5220.22-M misapplied as incident rule, not sanitization.",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.7,
          "note": "Superficial analysis; misses CMMC, SecNumCloud, DISA IL levels, ITAR-EAR conflicts",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.8,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 77.6,
          "note": "Solid framework, accurate citations, but Gulf analysis lacks depth and nuance.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 118.7,
          "note": "Technically rigorous, cites real frameworks, Schrems II correctly applied, actionable structure",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 41.5,
          "note": "Technically rigorous, actionable federated JV model; minor FOCI nuance gaps.",
          "rank": 4
        }
      }
    },
    {
      "id": 132,
      "category": "Construction",
      "use_case": "mega_dam_project_risk_allocation_multilaw",
      "models": {
        "claude-opus-4.8": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 91.0,
          "note": "Exceptional multi-jurisdictional depth; World Duty Free and Metal-Tech citations verifiable.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 144.0,
          "note": "Glencore/RBRG cases likely real; Hemofarm/CCJA Getma citations need verification",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 161.0,
          "note": "Rigorous multi-jurisdictional analysis; correctly flags OHADA procurement gap; no hallucinations.",
          "rank": 4
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 19.2,
          "note": "GKN v XIC 2016 and Kolmar v Jiangsu 2022 citations unverified/likely hallucinated",
          "rank": 1
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.0,
          "note": "Solid framework; CCJA exequatur mechanics slightly oversimplified but no hallucinations.",
          "rank": 3
        },
        "mistral-large": {
          "total": 22.2,
          "quality": 7.0,
          "accuracy": 3.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 59.4,
          "note": "Multiple hallucinated instruments: OHADA Environmental/Anti-Corruption Uniform Acts don't exist; fake CCJA cases cited",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 16.0,
          "quality": 3.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 2.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.8,
          "note": "Superficial analysis; no concrete drafting; ends absurdly with boxed number",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.8,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 59.5,
          "note": "Solid framework; OHADA lacks a standalone anti-corruption Uniform Act as cited.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 25.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 100.5,
          "note": "Cavendish Square cite accurate; OHADA geography flag adds real value.",
          "rank": 8
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 53.9,
          "note": "Strong structure; DRC assumption unwarranted; OHADA East Africa coverage overstated",
          "rank": 5
        }
      }
    },
    {
      "id": 133,
      "category": "Trusts & Estates",
      "use_case": "cross_border_digital_and_real_assets_succession",
      "models": {
        "claude-opus-4.8": {
          "total": 30.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 94.7,
          "note": "Rigorous multi-jurisdictional analysis, correct citations, no hallucinated case law.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 143.9,
          "note": "Exceptional depth; case citations plausible but some unverifiable; minor truncation at end",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 25.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 171.5,
          "note": "Solid framework, truncated before crypto/Lebanon/global-will pitfalls fully addressed",
          "rank": 8
        },
        "o3": {
          "total": 26.1,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 24.1,
          "note": "Estate of Emirzian (1998) appears fabricated; Clayton v Clayton misapplied jurisdiction",
          "rank": 6
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 11.7,
          "note": "Re Fuld citation accurate; DIFC Rules slightly imprecise but no hallucinations.",
          "rank": 4
        },
        "mistral-large": {
          "total": 23.2,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 48.2,
          "note": "Multiple hallucinated cases and Lebanese statute citations undermine otherwise solid framework.",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 16.0,
          "quality": 3.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 2.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 8.6,
          "note": "Superficial analysis, missing DIFC Will Service specifics, crypto situs, Swiss PIL rules",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 25.8,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 64.3,
          "note": "DIFC Article citations unverified; Bell v Kennedy real but misapplied here",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 29.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 140.2,
          "note": "Udny and Bullock correctly cited; Swiss PILA, DIFC law accurately applied.",
          "rank": 3
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 36.2,
          "note": "Sophisticated, well-structured analysis; minor Swiss PILA nuances slightly oversimplified.",
          "rank": 1
        }
      }
    },
    {
      "id": 134,
      "category": "Immigration",
      "use_case": "itinerant_founder_residence_and_corporate_control",
      "models": {
        "claude-opus-4.8": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 97.6,
          "note": "Exceptional cross-jurisdictional depth; POEM/CMC/ESTA risks precisely identified; answer truncated.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 142.1,
          "note": "Rigorous multi-jurisdictional analysis; truncated before completing Recommendation 1B onward",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 201.2,
          "note": "Strong analysis, correct citations, but answer appears truncated mid-sentence.",
          "rank": 5
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 29.9,
          "note": "Thorough, well-structured; De Beers and Unit Construction citations are real.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 26.6,
          "quality": 7.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 7.9,
          "note": "Solid framework but thin on PE risk, treaty tie-breakers, and India FEMA angles.",
          "rank": 6
        },
        "mistral-large": {
          "total": 24.2,
          "quality": 7.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 53.3,
          "note": "Matter of Hira and Kaur citations unverified; POEM/Vodafone misapplied.",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 16.0,
          "quality": 3.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 2.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 6.7,
          "note": "Superficial step-by-step format; lacks POEM depth, ESTA specifics, actionable structuring.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 24.8,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 79.1,
          "note": "X v. MCI 2018 FC 517 appears fabricated; deduction applied",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 25.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 120.1,
          "note": "Solid framework; Thibodeau cite tangential but not fabricated; POEM analysis strong.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 40.1,
          "note": "Fundy Settlement and De Beers are real; IRPR citation slightly imprecise.",
          "rank": 4
        }
      }
    },
    {
      "id": 135,
      "category": "AI/Tech Regulation",
      "use_case": "foundation_model_dual_regime_risk_assessment",
      "models": {
        "claude-opus-4.8": {
          "total": 31.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 50.7,
          "note": "Rigorous, deadline-anchored, cross-regime conflicts precisely identified and actionable.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 61.4,
          "note": "Exceptionally structured; minor FTC policy statement citation needs verification",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 102.5,
          "note": "Rite Aid and Chamber of Commerce citations are real; FLOP threshold accurate.",
          "rank": 8
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 29.3,
          "note": "Solid memo; minor EU AI Act article numbering imprecision but no hallucinated cases.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 29.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 7.5,
          "note": "FTC 2023 Policy Statement and CFPB Circular 2023-03 citations require verification",
          "rank": 5
        },
        "mistral-large": {
          "total": 24.8,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 54.6,
          "note": "FTC v. Everalbum and CFPB v. Townstone cited inaccurately; Townstone not AI case",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 17.6,
          "quality": 4.0,
          "accuracy": 4.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 9.4,
          "note": "Hallucinated FCA 2020 guidance, CFPB 2022 action; shallow cross-jurisdictional analysis",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 88.9,
          "note": "Everalbum cite accurate; strong tripartite analysis with actionable phased workstreams.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 95.8,
          "note": "Everalbum cite verified; CFPB Circular 2022-03 accurate; bifurcation strategy excellent.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 40.4,
          "note": "NYT v. OpenAI cited accurately as pending; CFPB Circular 2022-03 verifiable.",
          "rank": 3
        }
      }
    },
    {
      "id": 136,
      "category": "Corporate Governance",
      "use_case": "dual_board_duties_ch_conflict_of_laws",
      "models": {
        "claude-opus-4.8": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 86.5,
          "note": "Truncated at 10b-5 extraterritoriality; otherwise exceptional multi-jurisdictional analysis.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 130.9,
          "note": "Exceptional multi-jurisdictional analysis; answer cut off mid-sentence at Santa Fe",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 187.3,
          "note": "Rigorous multi-jurisdictional analysis; all cited cases verifiable and correctly applied.",
          "rank": 4
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 28.1,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 25.6,
          "quality": 7.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 7.5,
          "note": "Guth v Loft real; Rome II Art 1(2)(d) misapplied; Lugano post-Brexit nuanced",
          "rank": 8
        },
        "mistral-large": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 55.2,
          "note": "Credit Lyonnais and Brussels Recast post-Brexit analysis contain material errors",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 5.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 7.4,
          "note": "Brussels I Recast inapplicable post-Brexit; Rome II exclusion missed; shallow Swiss/Delaware analysis",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 26.8,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 77.5,
          "note": "Gheewalla and Sinclair cited correctly; Article 717a CO slightly anachronistic but defensible.",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 106.6,
          "note": "Rigorous tri-jurisdictional analysis; Aronson overruled 2023 but cited correctly historically.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 44.5,
          "note": "Rigorous conflict-of-laws analysis; Gheewalla and Morrison applied correctly.",
          "rank": 1
        }
      }
    },
    {
      "id": 137,
      "category": "M&A",
      "use_case": "carve_out_de_tx_uae_difc_split_signing_closing",
      "models": {
        "claude-opus-4.8": {
          "total": 31.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 44.6,
          "note": "Exceptional cross-jurisdictional depth; DIFC-LCIA Decree 34 flag is sophisticated.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 52.6,
          "note": "Thorough, well-structured; UAE law citations plausible but unverified; no hallucinated cases",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 129.6,
          "note": "Excellent cross-jurisdictional depth; DIFC-LCIA abolition and Enka trap correctly flagged.",
          "rank": 6
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 28.5,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 7.9,
          "note": "Solid structure; Civil Code article citations need verification; UBO reg correct",
          "rank": 4
        },
        "mistral-large": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 56.5,
          "note": "Solid structure; UAE CCL citation slightly outdated but no fabricated case law.",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 21.0,
          "quality": 5.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 7.4,
          "note": "Article 19(1) and 24(1)(a) DIFC-LCIA citations appear fabricated or misattributed",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 64.5,
          "note": "Thorough, well-structured; kafala/EOSG/agency traps add real value.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 94.1,
          "note": "Excellent DIFC-LCIA abolition flag; ADNOC/DET nuance adds real value.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 34.6,
          "note": "Excellent DIFC-LCIA abolition flag; Dubai Decree 34/2021 correctly cited throughout.",
          "rank": 2
        }
      }
    },
    {
      "id": 138,
      "category": "Banking/Finance",
      "use_case": "mas_pdpa_mifid_cross_border_sales_memo",
      "models": {
        "claude-opus-4.8": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 49.6,
          "note": "Rigorous, jurisdiction-balanced memo; SCCs, IDTA, Schrems II correctly cited.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 55.3,
          "note": "Rigorous multi-jurisdictional analysis; SCCs, OPE, ESMA opinions correctly cited.",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 96.3,
          "note": "Schrems II cite accurate; all statutory references verifiable and jurisdiction-correct.",
          "rank": 5
        },
        "o3": {
          "total": 29.1,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 30.4,
          "note": "Fortrade Final Notice date/details and PDPA Part iiiA date unverifiable; risk deduction applied",
          "rank": 4
        },
        "grok-4.3": {
          "total": 29.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 7.9,
          "note": "BaFin 12/2018 and AMF 2019-03 citations appear fabricated or unverifiable.",
          "rank": 6
        },
        "mistral-large": {
          "total": 2.8,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 2.8,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 52.1,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 19.6,
          "quality": 5.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 8.6,
          "note": "Fabricated Article 3(1) Delegated Reg cite; shallow ESMA/FCA third-country analysis",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 88.1,
          "note": "Rigorous, jurisdiction-balanced memo; minor gap on ESMA CFD intervention specifics.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 93.8,
          "note": "Technically strong; minor RAO Article 85 imprecision but no hallucinated case law.",
          "rank": 8
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 41.6,
          "note": "Schrems II cited correctly; Tied Agents analysis nuanced and accurate.",
          "rank": 1
        }
      }
    },
    {
      "id": 139,
      "category": "Data Privacy",
      "use_case": "healthtech_platform_gdpr_lgpd_pipeda_pdpa_matrix",
      "models": {
        "claude-opus-4.8": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 52.6,
          "note": "Rigorous, jurisdiction-accurate matrix; ANPD Resolution cites plausible but unverified.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 61.9,
          "note": "Rigorous, jurisdiction-accurate matrix with actionable operational guidance and no hallucinations.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 96.8,
          "note": "Schrems II cite accurate; ANPD Resolution 15/2024 plausible but verify.",
          "rank": 6
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 36.6,
          "note": "SGPDPC 22 citation unverifiable; ANPD Guidance 05/2022 timeline claim needs verification",
          "rank": 3
        },
        "grok-4.3": {
          "total": 29.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 8.5,
          "note": "Solid matrix; PDPA health-data nuance and PIPEDA breach threshold slightly underexplored.",
          "rank": 5
        },
        "mistral-large": {
          "total": 23.9,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 88.0,
          "note": "Multiple hallucinated cases: SCHUFA C-634/21 misapplied, PDPC v. Grab fabricated",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 23.6,
          "quality": 6.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 10.5,
          "note": "Competent overview but thin on PDPA 2021 amendments, LGPD timelines, and PIPEDA nuance.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 108.4,
          "note": "Rigorous matrix, accurate legal bases, actionable ops changes, no hallucinations.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 25.3,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 82.3,
          "note": "PIPEDA Report of Findings #2019-002 citation unverifiable; likely hallucinated.",
          "rank": 8
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 38.8,
          "note": "Accurate, well-structured matrix; ANPD Resolution cite plausible but unverified.",
          "rank": 1
        }
      }
    },
    {
      "id": 140,
      "category": "Employment Law",
      "use_case": "remote_engineer_multi_forum_classification",
      "models": {
        "claude-opus-4.8": {
          "total": 29.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 42.4,
          "note": "Rigorous multi-jurisdiction analysis; Sushilaben cite tangential but not fabricated.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 45.9,
          "note": "Technically strong; ONGC v Saw Pipes misapplied but cites otherwise verifiable.",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 114.7,
          "note": "Comprehensive, jurisdiction-accurate, well-cited; all case references verifiable and correct.",
          "rank": 6
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 20.4,
          "note": "Solid multi-jurisdiction analysis; Viking River citation slightly imprecise but defensible.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 23.6,
          "quality": 7.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 7.6,
          "note": "Ram Singh 2004 and Bengal Nagpur Cotton Mills citations appear fabricated for Indian law",
          "rank": 8
        },
        "mistral-large": {
          "total": 2.3,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 2.3,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 47.6,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 22.0,
          "quality": 6.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 7.3,
          "note": "Hindustan Lever Delhi HC cite unverifiable; Heller citation wrong court/year",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 25.9,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 71.6,
          "note": "Uber v CCI and Cal Corp 409 stock options cite appear fabricated/misapplied",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 73.8,
          "note": "Accurate citations, strong EOR recommendation, excellent statutory cross-referencing across jurisdictions.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 28.7,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 29.1,
          "note": "Deliveroo UKSC 43 cite is incorrect; Deliveroo lost Supreme Court appeal differently",
          "rank": 4
        }
      }
    },
    {
      "id": 141,
      "category": "Arbitration",
      "use_case": "multi_tier_arbitration_fidic_ohada_enforcement",
      "models": {
        "claude-opus-4.8": {
          "total": 29.2,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 48.4,
          "note": "BTS v WSP 2025 and NNPC v Klifco unverifiable; Dalico/Enka solid",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 25.8,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 60.3,
          "note": "CCJA Arrêt 045/2008 and Persero CA citation unverifiable; Statoil cite plausible",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 26.0,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 115.6,
          "note": "C v D [2023] HKCFA 16 appears fabricated; Sierra Leone cite plausible but verify",
          "rank": 3
        },
        "o3": {
          "total": 29.1,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 23.2,
          "note": "Several case citations appear fabricated or misattributed; core framework sound.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 23.0,
          "quality": 6.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 7.4,
          "note": "Mekwunye/Imoukhuede misapplied; Krajina line hallucinated; ACA citation outdated",
          "rank": 7
        },
        "mistral-large": {
          "total": 22.3,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 66.5,
          "note": "Multiple unverifiable CCJA case citations; Tulip/AIC cases likely hallucinated",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 22.6,
          "quality": 6.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 9.2,
          "note": "State Immunity Act 2004 Nigeria hallucinated; OHADA Art.13 immunity claim inaccurate",
          "rank": 8
        },
        "deepseek-v3.2": {
          "total": 24.9,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 85.4,
          "note": "IPCO v NNPC 2017 UKSC 16 misattributed; CCJA Advisory Opinion cite unverified",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 1.4,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 1.4,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 97.6,
          "note": "",
          "rank": 10
        },
        "gemini-3.1-pro": {
          "total": 25.7,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 43.5,
          "note": "Kramer Italo and Conakry airport cases appear fabricated; Sonnar citation questionable",
          "rank": 5
        }
      }
    },
    {
      "id": 142,
      "category": "Securities",
      "use_case": "dual_listed_esg_disclosure_regimes_conflict",
      "models": {
        "claude-opus-4.8": {
          "total": 31.5,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.5,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 43.7,
          "note": "Rigorous, well-structured; case citations verified; PSLRA and s.90A correctly applied.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 50.5,
          "note": "Liberty Energy cite unverified but plausible; Omnicare application slightly stretched",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 99.4,
          "note": "Accurate cites, strong architecture, liability flashpoints well-identified, production-ready.",
          "rank": 6
        },
        "o3": {
          "total": 31.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 18.9,
          "note": "Rigorous, actionable briefing; SEC rule citation reflects proposed not final rule.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 29.5,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.5,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 7.1,
          "note": "Solid, well-structured briefing; SEC rule cite slightly premature but defensible.",
          "rank": 3
        },
        "mistral-large": {
          "total": 24.5,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 2.5,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 59.0,
          "note": "SEC climate rule stayed by courts; Article 40a citation questionable; case law misapplied",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 25.0,
          "quality": 6.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 7.0,
          "note": "Competent overview but lacks depth on ADR-specific liability and greenwashing risks.",
          "rank": 8
        },
        "deepseek-v3.2": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 72.6,
          "note": "Basic Inc. v. Levinson cited correctly; PSLRA safe harbour application sound.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 28.5,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.5,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 81.6,
          "note": "Omnicare cite accurate; PSLRA/10b-5 analysis sophisticated and jurisdiction-correct.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "status": "error",
          "err": "empty completion (reasoning consumed max_tokens)"
        }
      }
    },
    {
      "id": 143,
      "category": "Regulatory Compliance",
      "use_case": "dora_mica_crypto_custody_overlap",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 42.4,
          "note": "Art.271 SCC conflict and dual-reporting tension are genuinely non-obvious, well-executed.",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 49.4,
          "note": "Precise article citations, dual-track reporting design, and ring-fencing rationale are production-ready.",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 67.6,
          "note": "Precise DORA/MiCA article citations, strong Swiss-EU conflict analysis, actionable changes.",
          "rank": 7
        },
        "o3": {
          "total": 32.1,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 19.1,
          "note": "Precise citations, actionable gaps, strong Swiss-EU conflict analysis throughout.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.5,
          "note": "Precise citations, actionable gaps, strong DORA/MiCA/Swiss law triangulation.",
          "rank": 2
        },
        "mistral-large": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 34.3,
          "note": "CSSF Circular 22/804 and EBA/GL/2021/05 citations require verification",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 21.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 8.5,
          "note": "Superficial analysis; repetitive structure; misses DORA RTS, CASP passporting nuances",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 52.3,
          "note": "Solid DORA/MiCA analysis; Art. 20(2) 4-hour deadline needs verification.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 64.8,
          "note": "Precise MiCA/DORA citations, Swiss law integration, and ring-fencing rationale are exemplary.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 30.9,
          "note": "Technically precise, DORA Art.2(1)(a) cite slightly imprecise but defensible.",
          "rank": 4
        }
      }
    },
    {
      "id": 144,
      "category": "IP/Tech Law",
      "use_case": "japan_us_eu_text_data_mining_ip_exceptions",
      "models": {
        "claude-opus-4.8": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 40.1,
          "note": "Thomson Reuters v. Ross 2025 ruling characterization slightly overstated but defensible.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 49.8,
          "note": "Rigorous, production-ready; hiQ CFAA framing slightly imprecise but defensible",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 91.0,
          "note": "Rigorous, well-cited, actionable; Warhol application slightly stretched but defensible.",
          "rank": 6
        },
        "o3": {
          "total": 29.1,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 18.1,
          "note": "MDY v. Blizzard misapplied; GS Media citation strained but cases real",
          "rank": 5
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.5,
          "note": "Accurate citations, strong structure, actionable mitigations, minor US fair-use nuance gaps.",
          "rank": 1
        },
        "mistral-large": {
          "total": 24.8,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 48.0,
          "note": "Tokyo District Court 2021 case and GDPR Article 83 penalty framing are hallucinated.",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 5.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 8.9,
          "note": "Article 47-5/47-7 conflation; DSM Art.17 misapplied; shallow mitigation measures",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 25.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 58.9,
          "note": "hiQ/CFAA framing slightly off; Art.47-7 citation imprecise but defensible",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 88.4,
          "note": "Warhol citation accurate; AI litigation cites plausible; sui generis DB right correctly applied.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 35.2,
          "note": "Infopaq citation accurate; Japan lawful-access claim slightly overstated but defensible.",
          "rank": 3
        }
      }
    },
    {
      "id": 145,
      "category": "Tax",
      "use_case": "pe_digital_services_india_nigeria_oecd_pillar",
      "models": {
        "claude-opus-4.8": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 48.8,
          "note": "Excellent depth; EL abolition timing noted; Nigerian no-treaty point well flagged.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 67.7,
          "note": "Technically precise, well-structured, verifiable citations, strong cross-jurisdictional analysis.",
          "rank": 1
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 130.0,
          "note": "Accurate, well-structured; EL abolition noted; case law verifiable; actionable options.",
          "rank": 5
        },
        "o3": {
          "total": 27.1,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 28.7,
          "note": "MakeMyTrip Delhi HC cite (447 ITR 202) appears fabricated; draft dates speculative",
          "rank": 6
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 11.6,
          "note": "Solid memo; accurate statutory cites, clear structure, actionable recommendations given constraints.",
          "rank": 3
        },
        "mistral-large": {
          "total": 2.3,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 2.3,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 77.0,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.7,
          "note": "Superficial analysis; structural options lack depth, pros/cons underdeveloped throughout.",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 22.9,
          "quality": 8.0,
          "accuracy": 4.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 77.3,
          "note": "Wardwizard case hallucinated; Morgan Stanley citation misapplied to PE commissionaire context",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 26.4,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 93.5,
          "note": "Technically sound, correct statutes cited, strong Pillar Two ETR-dilution analysis.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 29.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 53.0,
          "note": "Technically strong; EL abolition note accurate; Nigeria DTAA existence assumed without verification.",
          "rank": 4
        }
      }
    },
    {
      "id": 146,
      "category": "Criminal/White Collar",
      "use_case": "multi_jurisdiction_bribery_internal_controls",
      "models": {
        "claude-opus-4.8": {
          "total": 31.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 53.3,
          "note": "Rigorous, well-structured; Rolls-Royce DPA cite is legitimate and accurate.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 30.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 67.1,
          "note": "Precise statutory citations, three genuine regime conflicts, actionable sequencing throughout.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 125.7,
          "note": "Esquenazi cite verified; statutes correct; divergence points well-articulated.",
          "rank": 8
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 26.0,
          "note": "ENRC 2018 CA citation accurate; CGU Portaria 19/2023 unverified but plausible",
          "rank": 3
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 14.0,
          "note": "Precise statutory citations, strong divergence analysis, actionable phased sequencing throughout.",
          "rank": 1
        },
        "mistral-large": {
          "total": 25.8,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 55.5,
          "note": "Hoskins citation misapplied; Siemens/World Bank cite fabricated; Skansen accurate",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 20.0,
          "quality": 5.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 8.3,
          "note": "Upjohn correct but Derby cite misapplied; shallow divergence analysis; generic remediation",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 109.7,
          "note": "Thorough, well-structured, legally accurate; privilege conflict analysis particularly strong.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 99.1,
          "note": "Esquenazi, Three Rivers, ENRC, Upjohn all verifiable; strong cross-jurisdictional conflict analysis",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 45.0,
          "note": "SFO v ENRC citation accurate; NDPA 2023/LGPD conflict point sophisticated and correct.",
          "rank": 5
        }
      }
    },
    {
      "id": 147,
      "category": "Real Estate",
      "use_case": "gcc_data_center_free_zone_structuring",
      "models": {
        "claude-opus-4.8": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 44.3,
          "note": "Precise, actionable, well-cited; minor KSA real estate law currency caveat noted",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 50.3,
          "note": "Thorough, well-structured; minor risk on some statutory citation precision",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 114.9,
          "note": "Thorough, well-structured, jurisdiction-specific; minor risk of unverified decree details.",
          "rank": 8
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 22.9,
          "note": "Comprehensive, well-structured; minor citation precision issues but no hallucinated case law",
          "rank": 1
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 11.5,
          "note": "Solid, well-structured note with verifiable citations and actionable jurisdiction-specific risks.",
          "rank": 3
        },
        "mistral-large": {
          "total": 25.8,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 48.8,
          "note": "Several specific article numbers and decree citations appear fabricated or unverifiable.",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 20.0,
          "quality": 5.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.9,
          "note": "Superficial analysis; misses NEOM, QFC, usufruct specifics, and Sharia security issues",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 58.4,
          "note": "Technically strong, locally compliant risks well-identified, minor Saudi ownership nuances missed",
          "rank": 5
        },
        "qwen3.7-max": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 76.1,
          "note": "Technically strong, cites plausible but some unverified KSA decree references.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 37.2,
          "note": "Solid structure; minor citation gaps; three risks clearly articulated and actionable.",
          "rank": 6
        }
      }
    },
    {
      "id": 148,
      "category": "Immigration",
      "use_case": "founder_relocation_sg_uk_ca_pr_strategy",
      "models": {
        "claude-opus-4.8": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 43.2,
          "note": "Tech Nation note slightly outdated; otherwise rigorous, actionable, well-structured memo.",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 78.0,
          "note": "Exceptional depth; Tech Nation caveat and FIG reform show current awareness.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 94.1,
          "note": "Solid memo; De Beers and Fundy citations accurate and well-applied.",
          "rank": 9
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 23.5,
          "note": "Comprehensive, well-structured; minor Tech Nation closure nuance needs verification",
          "rank": 1
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.2,
          "note": "Tech@SG branding slightly off; NOC codes plausible; solid cross-jurisdictional structuring analysis",
          "rank": 5
        },
        "mistral-large": {
          "total": 27.8,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 51.9,
          "note": "De Beers v Howe misapplied; Tech Nation dissolved 2023; otherwise strong memo",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 5.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.6,
          "note": "Tech Nation defunct; GIP criteria wrong; shallow tax/corporate structuring analysis",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 69.7,
          "note": "Tech Nation dissolved 2023; Global Talent endorser now UKRI, minor inaccuracy",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 83.8,
          "note": "De Beers cite is real; COMPASS, Tech.Pass, SUV details accurate and actionable.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 36.2,
          "note": "Rigorous cross-jurisdictional memo; Tech Nation dissolved 2023, minor inaccuracy.",
          "rank": 2
        }
      }
    },
    {
      "id": 149,
      "category": "International Trade",
      "use_case": "dual_use_chip_export_controls_jp_us_eu",
      "models": {
        "claude-opus-4.8": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 49.9,
          "note": "Rigorous tri-jurisdictional analysis; FDP/de minimis mechanics correctly applied throughout.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 55.6,
          "note": "Precise multi-regime analysis; FDPR/deemed export tensions well-articulated; actionable.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 126.6,
          "note": "Rigorous tri-jurisdictional analysis with correct citations and actionable stop-ship patterns.",
          "rank": 6
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 25.9,
          "note": "Thorough tri-regime analysis; minor ECCN threshold details warrant verification.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 24.6,
          "quality": 6.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 10.2,
          "note": "Solid framework but thin on UAE specifics, deemed-export depth, and inter-regime tensions",
          "rank": 9
        },
        "mistral-large": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 56.3,
          "note": "Solid multi-regime analysis; UAE Group B classification and FDP specifics need verification.",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.0,
          "note": "Superficial analysis; misses FDP rule nuances, Entity List, deemed export specifics",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 112.6,
          "note": "Thorough, well-structured; minor FDP rule oversimplification but no hallucinations.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 94.2,
          "note": "Technically precise, well-structured, correct FDP/de minimis/deemed export analysis throughout.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 54.0,
          "note": "Technically precise, well-structured, correctly applies FDPR/de minimis and EUC tensions.",
          "rank": 3
        }
      }
    },
    {
      "id": 150,
      "category": "Bankruptcy & Insolvency",
      "use_case": "cross_border_insolvency_ca_uk_india_scheme_vs_chapter15",
      "models": {
        "claude-opus-4.8": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 53.0,
          "note": "Rigorous, well-structured; Gategroup/Avanti citations verifiable; Indian IBC gaps correctly flagged.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 27.3,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 62.4,
          "note": "Avanti Communications Chapter 15 cite unverifiable; Smile Telecoms jurisdiction questionable",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 1.0,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 1.0,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 125.3,
          "note": "",
          "rank": 10
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 24.1,
          "note": "",
          "rank": 6
        },
        "grok-4.3": {
          "total": 4.6,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.6,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 10.0,
          "note": "",
          "rank": 5
        },
        "mistral-large": {
          "total": 2.8,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 2.8,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 60.6,
          "note": "",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 5.0,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 5.0,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 8.2,
          "note": "",
          "rank": 4
        },
        "deepseek-v3.2": {
          "total": 1.4,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 1.4,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 118.1,
          "note": "",
          "rank": 9
        },
        "qwen3.7-max": {
          "total": 1.9,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 1.9,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 67.9,
          "note": "",
          "rank": 8
        },
        "gemini-3.1-pro": {
          "total": 31.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 46.9,
          "note": "Sophisticated, accurate cross-border analysis with verifiable citations and practical solutions.",
          "rank": 1
        }
      }
    },
    {
      "id": 151,
      "category": "Environmental/ESG",
      "use_case": "transnational_supply_chain_deforestation_due_diligence",
      "models": {
        "claude-opus-4.8": {
          "total": 31.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 39.9,
          "note": "Exceptional cross-jurisdictional depth; EUDR delay date, LGPD, CSDDD all correctly cited.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 44.4,
          "note": "Technically precise, CAR limitation flagged, SISBOV/MAPA detail commendable.",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 72.6,
          "note": "Thorough, jurisdiction-correct, actionable; cattle laundering and CAR caveats notable.",
          "rank": 7
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 17.8,
          "note": "Precise EUDR citations, strong contractual steps, accurate Brazilian law integration.",
          "rank": 3
        },
        "grok-4.3": {
          "total": 30.6,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.1,
          "note": "FCA CP23/20 citation unverified but not case law; otherwise solid.",
          "rank": 4
        },
        "mistral-large": {
          "total": 28.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 42.2,
          "note": "Exceeds 1000-word limit significantly; EUDR delay to 2025 not noted",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 21.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 5.8,
          "note": "Superficial treatment; misattributes Forest Code number; weak cross-jurisdictional analysis",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 72.7,
          "note": "Precise citations, strong Brazilian law nexus, actionable four-step contractual framework.",
          "rank": 9
        },
        "qwen3.7-max": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 80.9,
          "note": "Exceptional cross-jurisdictional analysis; CAR/IBAMA/GTA references accurate and actionable.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 31.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 28.8,
          "note": "Exceptional cross-jurisdictional analysis; CAR/GTA/Lista Suja details add real value.",
          "rank": 1
        }
      }
    },
    {
      "id": 152,
      "category": "Government Contracts",
      "use_case": "defense_cloud_uae_uk_us_it_security_conflicts",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 53.9,
          "note": "Rigorous three-enclave model, precise ITAR/FedRAMP/UAE conflicts, actionable missteps identified.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 30.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 55.5,
          "note": "Exceptional tripartite analysis; ITAR deemed export misstep particularly non-obvious and actionable.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 29.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 72.5,
          "note": "Rigorous three-domain architecture, correct ITAR/FedRAMP distinction, five missteps identified.",
          "rank": 5
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 22.4,
          "note": "Exceptional tri-regime analysis; minor risk on DDTC FAQ citation specificity.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 29.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 7.8,
          "note": "Solid structure; UAE cite plausible but unverified; ITAR analysis accurate.",
          "rank": 6
        },
        "mistral-large": {
          "total": 25.2,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 53.5,
          "note": "FLIR $30M, Airbus $3.9B, UK contractor £200K citations unverifiable/misattributed.",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 18.6,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 8.6,
          "note": "Superficial analysis; generic advice lacking ITAR specifics, TAA, and MOD JSP standards.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 25.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 81.8,
          "note": "Solid framework; ISF/NFE references imprecise but no fabricated case law.",
          "rank": 9
        },
        "qwen3.7-max": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 80.0,
          "note": "Rigorous tri-node architecture; ITAR/JSP440/FedRAMP citations verifiable and accurate.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 40.4,
          "note": "Technically rigorous, well-structured, no hallucinated cites, minor ITAR cite imprecision",
          "rank": 3
        }
      }
    },
    {
      "id": 153,
      "category": "Construction",
      "use_case": "mega_mall_ppp_ksa_ohada_english_law_epc_clash",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 43.5,
          "note": "Edward Owen and Cavendish citations accurate; AUS articles correctly referenced throughout.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 27.3,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 54.5,
          "note": "Cavendish cite accurate; Meritz cite plausible but Article 107 CTL numbering unverified",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 118.3,
          "note": "Cavendish Square cite accurate; Saudi Civil Transactions Law 2023 plausible but verify.",
          "rank": 4
        },
        "o3": {
          "total": 25.1,
          "quality": 8.0,
          "accuracy": 4.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 19.8,
          "note": "Multiple unverifiable citations: SAMA Circular 469/2016, Board of Grievances 208/1435H, Riyadh Enforcement Court 3953/20",
          "rank": 8
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 7.7,
          "note": "OHADA Uniform Act articles cited without verified accuracy; Saudi CTL Art.205 unverified",
          "rank": 3
        },
        "mistral-large": {
          "total": 23.2,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 40.0,
          "note": "Multiple fabricated article numbers and circular references undermine otherwise strong structure.",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 20.0,
          "quality": 5.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 7.4,
          "note": "Fabricated OHADA articles, Saudi Civil Transactions Law cites, and PPP decree details.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 25.4,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 85.4,
          "note": "Wood Hall case misapplied; Saudi CTL articles need verification; OHADA cites plausible",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 26.9,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 66.1,
          "note": "CTL Art numbers and PPP Decree citation require verification; Edward Owen correct",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 38.0,
          "note": "Cavendish cite accurate; CTL/OHADA references credible and jurisdiction-correct throughout.",
          "rank": 2
        }
      }
    },
    {
      "id": 154,
      "category": "Fintech/Crypto",
      "use_case": "stablecoin_issuer_multi_regime_reserve_and_redemption",
      "models": {
        "claude-opus-4.8": {
          "total": 31.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 46.3,
          "note": "Thorough, accurate, well-structured; GENIUS Act citation plausible but unverified.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 30.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 66.9,
          "note": "Exceptional multi-regime analysis; Howey cite accurate; conflicts highly actionable.",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 123.7,
          "note": "Accurate, well-structured, four concrete conflicts identified, cites verifiable.",
          "rank": 6
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 27.9,
          "note": "Terraform cite plausible but imprecise; Reves application slightly stretched here.",
          "rank": 4
        },
        "grok-4.3": {
          "total": 31.0,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.0,
          "note": "Solid multi-regime analysis; Howey cite accurate; conflicts well-identified and concrete.",
          "rank": 2
        },
        "mistral-large": {
          "total": 25.8,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 66.5,
          "note": "SEC v. Kik cite inaccurate; Ripple citation misused; Howey application oversimplified",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 22.6,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 10.0,
          "note": "Superficial analysis; conflicts underdeveloped; misses FinCEN, FCA specifics, BVI nexus",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 89.8,
          "note": "Ripple/Terraform citations used cautiously and accurately; MiCA ART classification well-reasoned.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 105.1,
          "note": "Howey cite verified; MAS PSN02 and FCA CP23/30 references plausible but unverified.",
          "rank": 8
        },
        "gemini-3.1-pro": {
          "total": 29.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 54.9,
          "note": "Reves cite accurate; CP23/29 reference plausible but unverified FCA consultation.",
          "rank": 5
        }
      }
    },
    {
      "id": 155,
      "category": "Trusts & Estates",
      "use_case": "cross_border_sharia_will_uk_uae_india_conflicts",
      "models": {
        "claude-opus-4.8": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 40.7,
          "note": "Excellent multi-jurisdictional analysis; DIFC Muslim eligibility caveat appropriately flagged.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 27.8,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 48.0,
          "note": "Shukla v Shukla unverifiable; Udny correct; Kale cite plausible but context strained",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 128.9,
          "note": "Accurate, well-structured, practical multi-jurisdiction advice with no hallucinated citations.",
          "rank": 2
        },
        "o3": {
          "total": 27.6,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 21.7,
          "note": "Re Ahmed [2011] EWHC 2933 unverified; DIFC Muslim restriction overstated",
          "rank": 6
        },
        "grok-4.3": {
          "total": 29.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 6.6,
          "note": "Solid framework; DIFC WPR scope and Jebel Ali situs treatment slightly oversimplified.",
          "rank": 3
        },
        "mistral-large": {
          "total": 26.7,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 46.4,
          "note": "Brussels IV/2020 Regs misapplied post-Brexit; Re Annesley cited correctly but Brussels IV inapplicable UK",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 20.2,
          "quality": 5.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 48.0,
          "note": "Superficial analysis; misses HUF ancestral property rules, DIFC registry nuances, domicile issues",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 25.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 77.2,
          "note": "DIFC Law No.10/2021 Article 7(2) and EU Reg Article citations unverified/potentially hallucinated",
          "rank": 9
        },
        "qwen3.7-max": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 82.8,
          "note": "Agulian v Cyganik correctly cited; DIFC Muslim bar accurately flagged; excellent structuring.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 49.6,
          "note": "Strong structure; DIFC Foundation firewall claim overstated; domicile analysis solid.",
          "rank": 8
        }
      }
    },
    {
      "id": 156,
      "category": "Insurance",
      "use_case": "cyber_insurance_ransom_payment_sanctions_multijurisdiction",
      "models": {
        "claude-opus-4.8": {
          "total": 31.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 39.0,
          "note": "Rigorous multi-regime analysis; EU Blocking Statute conflict-of-laws risk astutely flagged.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.8,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 46.7,
          "note": "Welsbach and Con Ed citations misapplied; EO 13757 attribution questionable but plausible",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 26.4,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 80.8,
          "note": "IRB-Brasil and Belt Painting citations appear hallucinated or misapplied; Seaboard plausible",
          "rank": 6
        },
        "o3": {
          "total": 28.1,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 18.2,
          "note": "J.P. Morgan cite misapplied; Export-Import Bank case mischaracterized; otherwise strong.",
          "rank": 5
        },
        "grok-4.3": {
          "total": 24.6,
          "quality": 7.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 8.2,
          "note": "PBM Nutritionals cite unverifiable; EU 2024/1485 regulation unconfirmed hallucination",
          "rank": 8
        },
        "mistral-large": {
          "total": 22.3,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 54.2,
          "note": "Multiple hallucinated cases: M&A Research, DHL Global, Mbasogo misapplied, Pro Swing misapplied",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 18.0,
          "quality": 5.0,
          "accuracy": 3.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 6.7,
          "note": "Finucane cite misapplied; Rome I inapplicable post-Brexit UK; thin analysis throughout",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 25.9,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 73.9,
          "note": "Certain Underwriters v. BCS cited incorrectly; unrelated to NY sanctions exclusion.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 84.9,
          "note": "Excellent cross-jurisdictional analysis; EU Blocking Statute conflict is non-obvious and accurate.",
          "rank": 3
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 30.2,
          "note": "Accurate, well-structured; Rome I Article 9 conflict analysis is particularly strong.",
          "rank": 2
        }
      }
    },
    {
      "id": 157,
      "category": "Healthcare/Pharma",
      "use_case": "cross_border_clinical_trial_consent_and_data_reuse",
      "models": {
        "claude-opus-4.8": {
          "total": 28.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 52.3,
          "note": "Rigorous, well-structured; minor APPI nuance gaps; no hallucinated citations.",
          "rank": 4
        },
        "claude-sonnet-4.6": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 66.8,
          "note": "Rigorous, well-structured memo; minor APPI article citations need verification.",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 119.1,
          "note": "Rigorous, well-cited, jurisdiction-specific; actionable recommendations; no hallucinated law.",
          "rank": 6
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 24.5,
          "note": "Thorough, well-structured; minor APPI article numbering imprecision but no hallucinated cases.",
          "rank": 3
        },
        "grok-4.3": {
          "total": 32.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.6,
          "note": "Rigorous, jurisdiction-specific, actionable; minor gap on Egypt enforcement detail.",
          "rank": 1
        },
        "mistral-large": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 52.1,
          "note": "Planet49 misapplied; CNIL délibération citation unverifiable; Schrems II case number wrong",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 19.6,
          "quality": 5.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 12.7,
          "note": "Superficial analysis; misses APPI 2023 amendments, HREC, and genetic data specifics.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 115.0,
          "note": "Solid multi-jurisdictional analysis; Egypt treatment appropriately cautious; minor APPI nuances.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 26.9,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 100.5,
          "note": "Solid federated learning proposal; Egypt regulatory limbo point is well-observed.",
          "rank": 8
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 34.6,
          "note": "Excellent federated learning proposal; EDPB cite slightly imprecise but not hallucinated.",
          "rank": 2
        }
      }
    },
    {
      "id": 158,
      "category": "AI/Tech Regulation",
      "use_case": "ai_act_foundation_model_marketing_stack_conflict",
      "models": {
        "claude-opus-4.8": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 61.7,
          "note": "Rite Aid/Everalbum cited accurately; FTC guidance reference slightly imprecise but defensible",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 80.0,
          "note": "FTC v. Amazon 2023 cited imprecisely; FTC AI Policy Statement characterization overstated",
          "rank": 2
        },
        "gpt-5.5": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 116.3,
          "note": "SCHUFA cite verified; thorough multi-regime analysis with actionable mitigations.",
          "rank": 3
        },
        "o3": {
          "total": 28.1,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 26.2,
          "note": "GPAI article numbering speculative; FTC Jan 2024 GenAI guidance citation unverified",
          "rank": 6
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 17.1,
          "note": "Solid memo; CCPA §1798.185 draft rule citation slightly speculative but not fabricated.",
          "rank": 5
        },
        "mistral-large": {
          "total": 21.8,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 75.0,
          "note": "Multiple hallucinated cases: Nowak misapplied, Schufa C-634/21 wrong, FTC cites fabricated",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 14.0,
          "note": "Superficial analysis; misses GPAI rules, Article 50, and concrete architecture.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 138.7,
          "note": "Strong memo; AI Act article citations accurate; minor Annex III categorisation overreach",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 25.4,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 132.4,
          "note": "Clearview AI cite unverified but plausible; Art.111 date accurate; solid analysis",
          "rank": 8
        },
        "gemini-3.1-pro": {
          "total": 28.7,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 46.9,
          "note": "Solid memo; minor FLOPs threshold error, strong conflict mitigations throughout.",
          "rank": 4
        }
      }
    },
    {
      "id": 159,
      "category": "Contract & Commercial",
      "use_case": "long_term_supply_escalation_multi_law_redline",
      "models": {
        "claude-opus-4.8": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 38.2,
          "note": "Kel Kim cite verified; L.112-1 flagged; Rome I art.9 correctly identified",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 84.0,
          "note": "Kel Kim cite misapplied; Unamar characterisation slightly strained but defensible",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 92.6,
          "note": "Kel Kim cite accurate; Rome I, art.1195, BCEAO references all correct.",
          "rank": 2
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 18.8,
          "note": "",
          "rank": 8
        },
        "grok-4.3": {
          "total": 5.0,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 5.0,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 5.9,
          "note": "",
          "rank": 7
        },
        "mistral-large": {
          "total": 2.8,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 2.8,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 41.4,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 20.6,
          "quality": 5.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 6.6,
          "note": "Splits governing law mid-contract illogically; misses OHADA Acte Uniforme hardship rules",
          "rank": 5
        },
        "deepseek-v3.2": {
          "total": 19.3,
          "quality": 6.0,
          "accuracy": 4.0,
          "speed": 2.3,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 50.7,
          "note": "H.S. Perlin cite appears hallucinated; OHADA analysis superficial",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 24.0,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 103.8,
          "note": "Kel Kim cite accurate; AUDCG characterization oversimplified but directionally correct",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 3.7,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.7,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 36.6,
          "note": "",
          "rank": 9
        }
      }
    },
    {
      "id": 160,
      "category": "Data Privacy",
      "use_case": "gdpr_lgpd_pipeda_telemetry_risk_map",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 46.8,
          "note": "Schrems II, Breyer, TTDSG, Act 25 citations all verifiable and correct.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 30.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 62.1,
          "note": "Rigorous, jurisdiction-accurate, actionable; SCCs/TIA/DPIA guidance precise and current.",
          "rank": 2
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 113.4,
          "note": "Thorough, jurisdiction-accurate matrix; Schrems II cite correct; no hallucinations.",
          "rank": 8
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 18.6,
          "note": "Breyer cite accurate; ANPD guidelines referenced plausibly; strong cross-regime analysis",
          "rank": 3
        },
        "grok-4.3": {
          "total": 30.0,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.5,
          "note": "Solid multi-jurisdiction matrix; minor gap on Quebec adequacy nuance.",
          "rank": 4
        },
        "mistral-large": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 45.8,
          "note": "Planet49 correct; Royal Bank v. Trang misapplied; Tim Hortons cite questionable",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 20.6,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 10.7,
          "note": "Superficial analysis; misses profiling rules, ad-network ID risks, and ANPD guidance.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 98.1,
          "note": "Thorough, jurisdiction-accurate matrix; strong remedial steps; no hallucinated cites.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 29.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 91.3,
          "note": "Schrems II cite accurate; regional siloing recommendation is non-obvious and actionable.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 40.9,
          "note": "Solid matrix; ANPD SCC status slightly overstated, adequacy decision 2002 minor error",
          "rank": 6
        }
      }
    },
    {
      "id": 161,
      "category": "Corporate Governance",
      "use_case": "dual_listed_energy_board_climate_duties_trap",
      "models": {
        "claude-opus-4.8": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 47.9,
          "note": "Rigorous cross-jurisdictional analysis; ASIC v Mercer cite needs verification",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 26.8,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 60.8,
          "note": "ASIC v DP World [2024] FCA 1312 and Mercer [2024] FCA 1 appear fabricated",
          "rank": 7
        },
        "gpt-5.5": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 89.9,
          "note": "Rigorous multi-jurisdictional analysis; CSRD branch timing caveat appropriately flagged.",
          "rank": 6
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 21.8,
          "note": "Marchand, Stone v Ritter, ClientEarth, Cassimatis all verifiable; CSRD articles correct",
          "rank": 1
        },
        "grok-4.3": {
          "total": 30.0,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 12.3,
          "note": "McGarrigle cite appears fabricated; ClientEarth/Shell citation slightly imprecise but acceptable",
          "rank": 3
        },
        "mistral-large": {
          "total": 25.3,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 67.6,
          "note": "Sharma climate duty of care framing and Vocation climate link are misapplied/stretched",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 19.6,
          "quality": 5.0,
          "accuracy": 3.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 12.3,
          "note": "ASIC v. Francalancia is hallucinated; Citigroup climate link overstated",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 23.0,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 102.0,
          "note": "Abrahams v CBA and Boeing citations are hallucinated or misrepresented cases",
          "rank": 9
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 82.6,
          "note": "ClientEarth v Shell citation accurate; Cassimatis used appropriately; CSRD analysis strong.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 39.1,
          "note": "Caremark, ClientEarth, Cassimatis, Hutley opinions all verifiable and correctly applied.",
          "rank": 4
        }
      }
    },
    {
      "id": 162,
      "category": "M&A",
      "use_case": "carve_out_spin_uae_difc_de_tx_tax_structuring",
      "models": {
        "claude-opus-4.8": {
          "total": 31.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 59.3,
          "note": "Exceptional cross-jurisdictional analysis; §355(e) trap and nominee risk well-flagged.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 67.7,
          "note": "Exceptional cross-jurisdictional depth; UAE CT, UBO, concession risks precisely flagged.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 141.7,
          "note": "Rigorous multi-jurisdictional analysis; cites verifiable UAE/DIFC/US statutes throughout.",
          "rank": 5
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 20.6,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 32.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.7,
          "note": "Accurate, well-structured, cites real laws; TUPE/labour point adds real value.",
          "rank": 1
        },
        "mistral-large": {
          "total": 23.3,
          "quality": 8.0,
          "accuracy": 4.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 71.2,
          "note": "Multiple fabricated case citations severely undermine otherwise strong structural analysis",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 18.6,
          "quality": 5.0,
          "accuracy": 4.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 10.8,
          "note": "Superficial analysis; wrong/unverifiable citations; misses Section 355 PFIC/CFC depth",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 26.9,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 95.7,
          "note": "Solid cross-jurisdictional analysis; §355 device risk and DIFC enforcement well-flagged.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 97.2,
          "note": "Exceptional cross-jurisdictional depth; DIFC Law No. 6/2004 citation needs verification.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 31.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 62.4,
          "note": "Exceptional cross-jurisdictional depth; ESG trap and concession COC risks outstanding.",
          "rank": 3
        }
      }
    },
    {
      "id": 163,
      "category": "Banking/Finance",
      "use_case": "cross_border_margin_lending_ppa_security_priority",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 43.9,
          "note": "Exceptional multi-jurisdictional analysis; FISA, FCARs, PRIMA correctly applied throughout.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 48.7,
          "note": "Technically precise, jurisdiction-correct, actionable; no hallucinated citations detected.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 141.4,
          "note": "Exceptional cross-jurisdictional analysis; five assumptions identified versus required three.",
          "rank": 6
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 25.3,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 30.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.1,
          "note": "Solid multi-jurisdictional analysis; FISA, FCARs, PILA citations accurate and actionable.",
          "rank": 2
        },
        "mistral-large": {
          "total": 28.7,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 31.5,
          "note": "Solid multi-jurisdictional analysis; FISA/FCARs/HSC correctly applied throughout.",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 21.6,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 9.3,
          "note": "Superficial treatment; misses CREST, rehypothecation, Hague Convention nuances",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 91.3,
          "note": "Exceptional multi-jurisdictional analysis; FISA, FCARs, Hague Convention correctly applied.",
          "rank": 3
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 85.7,
          "note": "PRIIPs Reg cited as 1286/2012 but correct number is 1286/2014",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 29.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 40.8,
          "note": "Technically rigorous, FISA/FCARs/HSC correctly applied, PRIIPs enforcement insight strong.",
          "rank": 5
        }
      }
    },
    {
      "id": 164,
      "category": "Regulatory Compliance",
      "use_case": "global_online_lending_kafala_pdpa_udaap",
      "models": {
        "claude-opus-4.8": {
          "total": 31.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 53.5,
          "note": "Precise, actionable, jurisdiction-correct; no hallucinated cites; excellent cross-border synthesis.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 65.9,
          "note": "Madden cite accurate; Saudi Civil Transactions Law 2021 reference slightly imprecise",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 132.1,
          "note": "Precise citations, actionable revisions, strong cross-jurisdictional analysis throughout.",
          "rank": 4
        },
        "o3": {
          "total": 26.1,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 24.6,
          "note": "Multiple unverifiable citations: Board of Grievances 514/1439, CFPB v. Think Finance, Hemanth Finance",
          "rank": 8
        },
        "grok-4.3": {
          "total": 30.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 12.2,
          "note": "Solid roadmap; citations verifiable; Kafala doctrine nuance could be deeper.",
          "rank": 2
        },
        "mistral-large": {
          "total": 24.3,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 69.6,
          "note": "Saudi Supreme Court case and RBI v. Sahayata citations appear fabricated.",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 18.6,
          "quality": 5.0,
          "accuracy": 4.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 12.9,
          "note": "Superficial analysis; SAMA circular citation unverifiable; Kafala red flags underdeveloped",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 115.4,
          "note": "Thorough, well-structured roadmap; minor gaps on SAMA fintech sandbox and DPDPA timelines.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 93.4,
          "note": "Madden cite contextually stretched but not fabricated; strong actionable output.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 40.7,
          "note": "Thorough, well-structured; KSA Civil Transactions Law citation needs verification.",
          "rank": 3
        }
      }
    },
    {
      "id": 165,
      "category": "Employment Law",
      "use_case": "remote_work_policy_multi_jurisdiction_trap",
      "models": {
        "claude-opus-4.8": {
          "total": 31.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 45.3,
          "note": "Precise citations, tiered framework, three clear cross-jurisdictional examples, production-ready.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 54.3,
          "note": "Exceptional depth; verified statutes; tiered framework actionable and jurisdiction-precise.",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 87.3,
          "note": "Thorough, well-cited, tiered structure excellent; CCOO case citation verified correct.",
          "rank": 7
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 20.2,
          "note": "BAG and BFH citations unverifiable; core legal analysis otherwise strong and actionable.",
          "rank": 5
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.9,
          "note": "Accurate statutes, strong tiered structure, solid cross-jurisdictional analysis throughout.",
          "rank": 1
        },
        "mistral-large": {
          "total": 26.3,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 62.2,
          "note": "BAB case cite and some statutory details unverifiable; core analysis solid",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 26.0,
          "quality": 7.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 8.2,
          "note": "Solid structure, correct citations, but lacks PE tax depth and visa specifics.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 97.6,
          "note": "Thorough, well-structured; minor Flexi-Gesetz citation imprecision but no hallucinations.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 112.7,
          "note": "Accurate citations, strong tiered structure, solid cross-jurisdictional analysis throughout.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 31.8,
          "note": "Accurate citations, strong tiered structure, solid cross-jurisdictional analysis throughout.",
          "rank": 4
        }
      }
    },
    {
      "id": 166,
      "category": "Dispute Resolution",
      "use_case": "multi_tier_construction_dispute_difc_ohada_enforcement",
      "models": {
        "claude-opus-4.8": {
          "total": 29.2,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 41.5,
          "note": "Baker Hughes/SL Mining citations unverified or misapplied; OHADA/DIFC analysis strong",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 54.0,
          "note": "Meydan/Banyan Tree cite unverified; Sulamerica applied outside jurisdiction correctly flagged",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 109.4,
          "note": "Exceptional cross-jurisdictional analysis; DIFC-LCIA abolition point particularly sharp.",
          "rank": 2
        },
        "o3": {
          "total": 26.1,
          "quality": 8.0,
          "accuracy": 4.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 17.6,
          "note": "Multiple fabricated case citations severely undermine otherwise sophisticated cross-jurisdictional analysis",
          "rank": 7
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.4,
          "note": "Solid cross-jurisdictional analysis; minor citation imprecisions but no fabricated case law",
          "rank": 5
        },
        "mistral-large": {
          "total": 22.8,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 44.0,
          "note": "Multiple fabricated citations: Pearl Petroleum, Trafigura Abidjan, CCJA 041/2018, Dubai 156/2013",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.1,
          "note": "Superficial analysis; misses OHADA CCJA, NY Convention gaps, asset-tracing strategy",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 23.4,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 101.2,
          "note": "Cable & Wireless misapplied; DIFC CFI 006/2015 and Dubai Cassation 293/2016 unverified",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 94.5,
          "note": "Sierra Leone v SL Mining cite verified; DIFC-LCIA abolition correctly flagged throughout.",
          "rank": 3
        },
        "gemini-3.1-pro": {
          "total": 28.7,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 34.9,
          "note": "Baker Hughes 2023 US case citation appears fabricated; DIFC-LCIA analysis otherwise strong",
          "rank": 4
        }
      }
    },
    {
      "id": 167,
      "category": "IP/Tech Law",
      "use_case": "saas_text_image_model_ip_warranty_matrix",
      "models": {
        "claude-opus-4.8": {
          "total": 29.2,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 42.5,
          "note": "D.C. Cir. 2025 Thaler affirmance and LAION v. Kneschke unverified",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 57.1,
          "note": "Thaler v. Perlmutter accurate; Zarya guidance correct; Itar-Tass valid cite.",
          "rank": 2
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 113.0,
          "note": "Exceptional multi-jurisdictional analysis; citations verified; clauses production-ready.",
          "rank": 4
        },
        "o3": {
          "total": 32.1,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 17.7,
          "note": "Accurate citations, precise clause drafting, strong cross-jurisdictional analysis throughout.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 10.9,
          "note": "Thaler v. Perlmutter real but outcome overstated; clauses practical and well-scoped",
          "rank": 5
        },
        "mistral-large": {
          "total": 24.8,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 43.3,
          "note": "Tokyo AI art case 2020 and Nagano TBS case appear fabricated citations",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 21.0,
          "quality": 5.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 9.3,
          "note": "Japan TDM cite wrong; EU moral rights framing inaccurate; clauses too thin",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 25.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 85.3,
          "note": "Thaler cite accurate; UK TDM reform overstated; solid clause drafting overall",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 86.8,
          "note": "Thaler cite accurate; Infopaq citation correct; clauses are production-ready.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 40.2,
          "note": "Thaler cite accurate; Access Copyright misapplied but not fabricated; clauses actionable",
          "rank": 7
        }
      }
    },
    {
      "id": 168,
      "category": "Real Estate",
      "use_case": "cross_border_reit_investment_sharia_and_tax",
      "models": {
        "claude-opus-4.8": {
          "total": 31.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 43.0,
          "note": "Precise statutory cites, three distinct misleading-logic points clearly identified.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 54.9,
          "note": "Exceptional cross-jurisdictional depth; three US-REIT error traps clearly identified.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 121.4,
          "note": "Precise citations, three-jurisdiction depth, five cross-border pitfalls clearly identified.",
          "rank": 6
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 21.2,
          "note": "Technically rigorous, jurisdiction-correct, actionable; minor treaty citation gaps only.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 30.6,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 11.0,
          "note": "Technically strong, minor FIRPTA rate errors, three distinctions well-executed",
          "rank": 3
        },
        "mistral-large": {
          "total": 23.3,
          "quality": 7.0,
          "accuracy": 6.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 60.4,
          "note": "Solid structure but US-Saudi treaty date and PAIF leverage rules need verification",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 20.0,
          "quality": 5.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 9.8,
          "note": "Fabricated US-Saudi treaty 2010, UK-Saudi 2007, UAE-Saudi 2017; shallow Sharia analysis",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 114.0,
          "note": "Technically strong; minor error on US-Saudi treaty vintage and FIRPTA rate",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 94.9,
          "note": "Three WHT traps well-executed; PAIF/REIT distinction correctly flagged upfront.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 37.7,
          "note": "Rigorous, well-structured; minor gaps on PAIF vs REIT distinctions and NRCGT nuance.",
          "rank": 5
        }
      }
    },
    {
      "id": 169,
      "category": "Tax",
      "use_case": "digital_services_vat_gst_us_sales_tax_conflict",
      "models": {
        "claude-opus-4.8": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 43.9,
          "note": "Accurate, well-structured, cites verified law; Wayfair citation correct.",
          "rank": 4
        },
        "claude-sonnet-4.6": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 51.0,
          "note": "Highly accurate, well-structured; minor issue with Contractual Disclosure Facility characterization.",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 108.1,
          "note": "Precise citations, four clear misclassification scenarios, actionable remediation plan.",
          "rank": 7
        },
        "o3": {
          "total": 32.1,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 22.2,
          "note": "Exceptional depth, verified citations, actionable phased plan, five strong scenarios.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.0,
          "note": "Accurate citations, four clear misclassification scenarios, actionable remediation plan.",
          "rank": 2
        },
        "mistral-large": {
          "total": 28.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 50.2,
          "note": "Solid comparative analysis; minor threshold inaccuracies for AU/CA non-residents.",
          "rank": 5
        },
        "llama-4-maverick": {
          "total": 20.0,
          "quality": 5.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 8.8,
          "note": "MOSS replaced by OSS 2021; UK regs citation unverifiable; scenarios shallow",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 24.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 78.8,
          "note": "Article 14a cite misapplied; VAT Amendment No.2 2019 unverified; otherwise solid",
          "rank": 9
        },
        "qwen3.7-max": {
          "total": 25.4,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 81.6,
          "note": "WTL C-452/03 citation is hallucinated; core framework otherwise solid.",
          "rank": 8
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 39.9,
          "note": "Technically precise, well-structured; Welmory cite accurate and relevant.",
          "rank": 3
        }
      }
    },
    {
      "id": 170,
      "category": "Immigration",
      "use_case": "founder_mobility_canada_uk_sg_uae_startup",
      "models": {
        "claude-opus-4.8": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 31.4,
          "note": "Accurate, well-structured, strong PR-distinction analysis, good US-side flag.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 57.7,
          "note": "Thorough, well-structured; minor IRPR citation imprecision but no hallucinated case law",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 98.8,
          "note": "Accurate, well-structured, cites real regulations, strong non-uniformity analysis.",
          "rank": 6
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 16.3,
          "note": "Solid, well-structured memo; minor salary threshold and PR timeline imprecisions.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 12.5,
          "note": "Solid comparative memo; Tech Nation dissolved 2023, minor inaccuracy noted.",
          "rank": 5
        },
        "mistral-large": {
          "total": 26.8,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 35.8,
          "note": "Solid structure; minor errors: UK-EU mobility claim incorrect post-Brexit.",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 20.0,
          "quality": 5.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 8.9,
          "note": "Superficial analysis; misses nationality-specific barriers, OPT status risks, tax residency conflicts.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 25.9,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 59.8,
          "note": "Solid comparative memo; misses passport-specific visa nuances and US tax implications.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 25.4,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 75.9,
          "note": "Solid memo; minor inaccuracies on UAE/Singapore rules but no hallucinated caselaw.",
          "rank": 9
        },
        "gemini-3.1-pro": {
          "total": 28.7,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 28.3,
          "note": "Solid comparative memo; citations verifiable; Singapore PR quota detail slightly speculative.",
          "rank": 4
        }
      }
    },
    {
      "id": 171,
      "category": "Criminal/White Collar",
      "use_case": "multi_jurisdiction_sanctions_evasion_internal_report",
      "models": {
        "claude-opus-4.8": {
          "total": 31.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 56.0,
          "note": "Rigorous multi-regime analysis; FDP/0% de minimis trap particularly sharp.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 63.8,
          "note": "Rigorous multi-regime analysis; three traps well-articulated; no hallucinated citations.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 117.8,
          "note": "Thorough, citation-accurate, multi-regime analysis with strong analytical traps section.",
          "rank": 5
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 17.5,
          "note": "Rigorous multi-regime analysis; minor EAR de-minimis threshold nuance needed.",
          "rank": 3
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 10.1,
          "note": "Precise citations, strong multi-regime analysis, three traps well-articulated.",
          "rank": 1
        },
        "mistral-large": {
          "total": 24.3,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 79.2,
          "note": "Epsilon Electronics cite wrong; Banki misapplied; BNP Paribas cite fabricated",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 22.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 8.8,
          "note": "Superficial analysis; thin self-disclosure treatment; weak analytical traps; under 1200 words",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 95.8,
          "note": "Rigorous multi-regime analysis; three traps well-articulated; actionable and production-ready.",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 100.5,
          "note": "Rigorous multi-regime analysis; three traps well-articulated; minor FDPR nuance gaps.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 26.7,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 40.3,
          "note": "Transcat/Cobalt Holdings citations unverifiable; 31 CFR 587.201 reference suspect.",
          "rank": 8
        }
      }
    },
    {
      "id": 172,
      "category": "International Trade",
      "use_case": "rules_of_origin_restructuring_asia_americas",
      "models": {
        "claude-opus-4.8": {
          "total": 31.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 41.8,
          "note": "Rigorous multi-agreement matrix; correct CPTPP cumulation and USMCA RVC analysis.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 46.1,
          "note": "Strong cross-jurisdictional analysis; minor EVFTA diagonal cumulation oversimplification noted.",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 114.4,
          "note": "Rigorous multi-agreement analysis with accurate cumulation distinctions and actionable destination-specific guidance.",
          "rank": 6
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 18.5,
          "note": "Rigorous multi-regime analysis; minor EU-Mexico FTA tariff rate imprecision noted.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 11.9,
          "note": "Solid framework; EVFTA full cumulation claim overstated, some RVC figures imprecise.",
          "rank": 5
        },
        "mistral-large": {
          "total": 25.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 47.7,
          "note": "Solid framework; RVC thresholds cited as illustrative, not always verified.",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 23.0,
          "quality": 6.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 7.8,
          "note": "Several article citations unverifiable or incorrect; cumulation analysis oversimplified",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 24.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 77.6,
          "note": "Solid framework; CPTPP cumulation overstated as 'full'; some RVC figures unverified",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 24.4,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 94.5,
          "note": "Anheuser-Busch/Energetic cited as WTO origin cases—hallucinated references.",
          "rank": 9
        },
        "gemini-3.1-pro": {
          "total": 28.7,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 35.4,
          "note": "Solid analysis; CPTPP Art. 3.10 cumulation mechanics slightly oversimplified but accurate.",
          "rank": 4
        }
      }
    },
    {
      "id": 173,
      "category": "Environmental/ESG",
      "use_case": "csrd_scope3_supply_chain_africa_conflict",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 39.7,
          "note": "Legally precise, actionable, cross-jurisdictional nuance excellent, no hallucinations detected.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 55.2,
          "note": "Rigorous, jurisdiction-aware, actionable; minor CS3D article citation unverified",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 94.6,
          "note": "Comprehensive, legally precise, cross-jurisdictional, actionable; no hallucinated citations detected.",
          "rank": 5
        },
        "o3": {
          "total": 28.1,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 17.7,
          "note": "Sturgeon C-104/03 misapplied; unrelated to proportionality in supply contracts",
          "rank": 6
        },
        "grok-4.3": {
          "total": 30.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 7.7,
          "note": "Solid, actionable CSRD/ESRS analysis with realistic Africa-specific caveats flagged.",
          "rank": 3
        },
        "mistral-large": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 34.3,
          "note": "Solid framework; OHADA Common Court cite misapplied, CSDDD timeline slightly off.",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 20.6,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 8.8,
          "note": "OHADA has no Uniform Act on Labour Law; shallow cross-jurisdictional analysis",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 74.9,
          "note": "Legally sound, well-structured, realistic cross-jurisdictional analysis with actionable framework.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 26.4,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 80.9,
          "note": "Solid cross-jurisdictional analysis; phased data approach and arbitration pivot are actionable.",
          "rank": 9
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 32.5,
          "note": "Rigorous, jurisdiction-aware, actionable; NDPA and rupture brutale flags excellent.",
          "rank": 2
        }
      }
    },
    {
      "id": 174,
      "category": "Arbitration",
      "use_case": "sports_broadcasting_rights_multi_forum_selection",
      "models": {
        "claude-opus-4.8": {
          "total": 29.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 36.1,
          "note": "Fiona Trust and Dutco citations verified; Arbitration Act 2024 slightly premature",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 25.8,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 40.4,
          "note": "ATF 138 III 29 and Arbitration Act 2025 require verification; CAS analysis strong",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 107.4,
          "note": "Thorough, jurisdiction-accurate, production-ready clause with well-identified pitfalls.",
          "rank": 2
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 15.8,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 27.6,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 13.3,
          "note": "Fabricated Qatari media-rights awards post-2020 and unverifiable Article R41/R42 analogy claim",
          "rank": 4
        },
        "mistral-large": {
          "total": 23.3,
          "quality": 8.0,
          "accuracy": 4.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 45.9,
          "note": "Multiple fabricated citations: Case 132/2020, FIFA CAS 4490, Case 14/2019 unverifiable",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 21.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 8.3,
          "note": "Superficial analysis; misses UAE Federal Law No.6/2018, DIFC-LCIA nuances, QFC specifics",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 25.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 59.2,
          "note": "Solid structure; QICDRC characterization slightly imprecise, CAS carve-out well-handled.",
          "rank": 5
        },
        "qwen3.7-max": {
          "total": 23.4,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 103.1,
          "note": "First Abu Dhabi Bank v QIC cite appears fabricated; Bauhinia misapplied",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 31.1,
          "note": "Enka cite accurate; CAS Rule R28 seat characterization slightly oversimplified",
          "rank": 3
        }
      }
    },
    {
      "id": 175,
      "category": "Insurance",
      "use_case": "cyber_policy_global_ransomware_endorsement",
      "models": {
        "claude-opus-4.8": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 43.8,
          "note": "Rigorous clause drafting; Patel v Mirza and OFAC citations verifiable and apt.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 128.4,
          "note": "Exceptional cross-jurisdictional depth; case citations appear verifiable and jurisdiction-correct.",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 29.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 78.1,
          "note": "Rigorous multi-jurisdictional endorsement; Home Ins. Co. cite verifiable and accurate.",
          "rank": 3
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 26.5,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 29.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 6.4,
          "note": "Patel v Mirza cited correctly; OFAC case law vague but not fabricated.",
          "rank": 4
        },
        "mistral-large": {
          "total": 22.8,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 55.7,
          "note": "TIG Insurance, JP Morgan, Aziz citations misapplied or fabricated; FCPA/POCA analysis weak",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 15.6,
          "quality": 4.0,
          "accuracy": 2.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 6.5,
          "note": "Multiple hallucinated cases; clause lacks specificity on jurisdiction-by-jurisdiction illegality triggers",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 58.5,
          "note": "Technically sound, jurisdiction-aware; OSFI/DFAT minor inaccuracies noted.",
          "rank": 5
        },
        "qwen3.7-max": {
          "total": 24.4,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 97.5,
          "note": "Home Ins. Co. v. Am. Home Prods. citation misapplied; Patel v Mirza accurate",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 34.5,
          "note": "Safeway v Twigger cited correctly; LOPMI and OFAC advisory accurate.",
          "rank": 1
        }
      }
    },
    {
      "id": 176,
      "category": "Healthcare/Pharma",
      "use_case": "telehealth_cross_border_doctor_patient_regulatory_mismatch",
      "models": {
        "claude-opus-4.8": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 47.0,
          "note": "Thorough, well-structured; case citations verifiable; four errors clearly identified.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 27.8,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 51.5,
          "note": "Hiser v. Randolph cited incorrectly; Arizona case misapplied to CA/NY analysis",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 135.0,
          "note": "Thorough, well-cited, actionable; minor gaps in Gulf telehealth specifics.",
          "rank": 7
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 23.1,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 28.0,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 9.1,
          "note": "Indian Medical Association v. V.P. Shantha misapplied; telemedicine extension unverified.",
          "rank": 4
        },
        "mistral-large": {
          "total": 24.3,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 74.5,
          "note": "Multiple hallucinated cases and statutes; DISHA, Rogers v. Troutman, Arsenault unverifiable",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 17.6,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 2.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 9.4,
          "note": "Superficial analysis; step-by-step format unprofessional; misses Gulf-specific licensing detail",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 103.4,
          "note": "Thorough, well-structured; statutory citations plausible but some details unverified.",
          "rank": 5
        },
        "qwen3.7-max": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 107.1,
          "note": "Excellent cross-jurisdictional analysis; VP Shantha cite is real and accurate.",
          "rank": 3
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 37.4,
          "note": "Tunkl cite verified; UAE/Qatar criminal exposure well-flagged; actionable and thorough.",
          "rank": 2
        }
      }
    },
    {
      "id": 177,
      "category": "Bankruptcy & Insolvency",
      "use_case": "tech_startup_insolvency_data_and_ip_assets",
      "models": {
        "claude-opus-4.8": {
          "total": 31.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 39.6,
          "note": "Rubin v Eurofinance and Linden Gardens correctly cited; India gap well-flagged.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 46.4,
          "note": "Linden Gardens cite accurate; IBC/PPSA/CBIR analysis solid and actionable.",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 98.1,
          "note": "Comprehensive, jurisdiction-accurate, cites verified; Gibbs rule correctly applied.",
          "rank": 4
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 17.8,
          "note": "",
          "rank": 9
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 15.3,
          "note": "Accurate, well-structured, actionable; correctly flags GDPR, IBC, CBIR gaps.",
          "rank": 2
        },
        "mistral-large": {
          "total": 2.3,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 2.3,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 67.7,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 17.0,
          "quality": 3.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 2.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.2,
          "note": "Superficial step-by-step format; lacks depth, specificity, and actionable bidder guidance.",
          "rank": 8
        },
        "deepseek-v3.2": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 89.9,
          "note": "Re Pan Ocean cite is questionable; Linden Gardens citation accurate but context stretched",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 110.6,
          "note": "Linden Gardens correctly cited; IBC, CBIR, PIPEDA, CIGA all accurate.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 44.0,
          "note": "Gibbs rule, Jet Airways, CCAA, IBC citations all verifiable and correctly applied.",
          "rank": 3
        }
      }
    },
    {
      "id": 178,
      "category": "Securities",
      "use_case": "dual_listing_spac_mica_crypto_securities_offer",
      "models": {
        "claude-opus-4.8": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 44.2,
          "note": "Reves and Howey correctly cited; MiCA/MiFID sequencing analysis is sophisticated.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 58.5,
          "note": "SEC Release 33-11265 date/details need verification; otherwise rigorous and production-ready.",
          "rank": 7
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 114.8,
          "note": "Comprehensive, jurisdiction-accurate, well-structured; Howey cite correct and verifiable.",
          "rank": 5
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 24.0,
          "note": "FCA PS24/1, IFRS IC March 2024, ASC 840-10-15 staking citations unverifiable",
          "rank": 3
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 10.3,
          "note": "Solid multi-jurisdictional analysis; SAB 121 and FG 23/1 citations accurate.",
          "rank": 4
        },
        "mistral-large": {
          "total": 24.3,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 58.6,
          "note": "Quoine v B2C2 misapplied; PS23/6 citation unverified; Mills cited incorrectly",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 9.2,
          "note": "SEC Release 33-10574 fabricated; analysis superficial, lacks staking-specific depth",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 98.5,
          "note": "Coinbase admin proceeding citation details appear fabricated; Howey cite accurate",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 88.2,
          "note": "Technically rigorous, well-structured; SEC v. Coinbase citation slightly premature but defensible.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 47.6,
          "note": "Exceptional cross-jurisdictional depth; SAB 121 vs MiCA custody paradox is outstanding.",
          "rank": 2
        }
      }
    },
    {
      "id": 179,
      "category": "Construction",
      "use_case": "fidic_turnkey_difc_sg_np_conflict_clause_drafting",
      "models": {
        "claude-opus-4.8": {
          "total": 31.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 39.0,
          "note": "Technically precise, Decree 34 correctly flagged, actionable cross-jurisdictional architecture.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 53.0,
          "note": "Dubai Court of Cassation 2018-2020 DAB decisions cited without verifiable references",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 107.1,
          "note": "Precise drafting, strong cross-jurisdictional analysis, verifiable citations, minor gaps.",
          "rank": 4
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 15.1,
          "note": "",
          "rank": 9
        },
        "grok-4.3": {
          "total": 28.0,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 8.3,
          "note": "DIFC CA 001/2014 'A v B' citation unverifiable; likely hallucinated case reference.",
          "rank": 5
        },
        "mistral-large": {
          "total": 2.8,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 2.8,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 49.5,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 19.6,
          "quality": 5.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 9.1,
          "note": "Cites non-existent UAE Civil Procedure Law articles; shallow analysis, generic recommendations",
          "rank": 8
        },
        "deepseek-v3.2": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 65.4,
          "note": "Precise drafting, strong UAE/DIFC analysis, minor gaps on India enforcement",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 86.6,
          "note": "Vijay Karia cite is real; UAL Art.4 authority point is accurate and actionable.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 29.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 44.1,
          "note": "Precise, actionable drafting with accurate UAE/DIFC law citations and SOPA nuance.",
          "rank": 2
        }
      }
    },
    {
      "id": 180,
      "category": "Fintech/Crypto",
      "use_case": "stablecoin_global_reserve_and_redemption_framework",
      "models": {
        "claude-opus-4.8": {
          "total": 31.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 51.3,
          "note": "Precise, well-structured, four divergences clearly articulated, Howey cite accurate.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 70.1,
          "note": "Comprehensive, well-structured; minor uncertainty on unenacted US legislation cited as near-final.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 113.5,
          "note": "Comprehensive, jurisdiction-accurate, cites real instruments, four divergences clearly articulated.",
          "rank": 6
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 24.9,
          "note": "Thorough, well-structured; minor inaccuracies on UK timeline and MAS rules.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 30.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 10.3,
          "note": "Solid multi-jurisdictional analysis; minor gaps in Brazil BCB specifics.",
          "rank": 3
        },
        "mistral-large": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 73.2,
          "note": "Solid framework; minor inaccuracies on MiCA fee rules and MAS buffer specifics.",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 18.6,
          "quality": 5.0,
          "accuracy": 4.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 11.1,
          "note": "Fabricated MiCA articles, MAS framework name, CVM instruction; shallow analysis throughout",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 117.8,
          "note": "Thorough, well-structured; minor MiCA ART vs EMT classification nuance missed.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 98.7,
          "note": "Rigorous, well-structured; minor gaps in Brazil BCB specifics and VARA details.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 39.2,
          "note": "Rigorous, well-structured; minor uncertainty on NYDFS T+2 SLA specificity.",
          "rank": 5
        }
      }
    },
    {
      "id": 181,
      "category": "AI/Tech Regulation",
      "use_case": "ai_foundation_model_open_source_dual_licensing_gdpr_ai_act_pdpa",
      "models": {
        "claude-opus-4.8": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 43.4,
          "note": "Clearview reference imprecise but not fabricated; FLOP threshold correctly cited.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 54.3,
          "note": "Re Grab and EDPB Opinion 28/2024 citations unverifiable; core analysis strong",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 101.5,
          "note": "Thorough cross-jurisdictional analysis; FLOP threshold framing slightly imprecise but defensible.",
          "rank": 4
        },
        "o3": {
          "total": 29.1,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 16.1,
          "note": "Re Aviva Ltd [2022] PDPC citation unverifiable; Art 52 GPAI reference slightly off",
          "rank": 2
        },
        "grok-4.3": {
          "total": 26.6,
          "quality": 7.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 7.2,
          "note": "Solid framework but thin on PDPA deemed consent nuance and licensee structuring depth.",
          "rank": 6
        },
        "mistral-large": {
          "total": 24.8,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 45.2,
          "note": "Hallucinated EU case citations undermine otherwise strong cross-jurisdictional analysis",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 6.4,
          "note": "Superficial analysis; misses GPAI tiers, Art.53, PDPA deemed consent nuances, indemnity specifics",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 25.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 61.7,
          "note": "Solid analysis; PDPA Schedule 2 citation slightly imprecise but no fabricated case law.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 25.4,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 95.9,
          "note": "Clearview AI cited as precedent is imprecise but not fabricated; solid overall.",
          "rank": 8
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 36.1,
          "note": "Solid analysis; AI Act article citations mostly accurate but some nuance missing.",
          "rank": 5
        }
      }
    },
    {
      "id": 182,
      "category": "Contract & Commercial",
      "use_case": "cross_border_saas_reseller_governing_law_payment_and_tax_allocation",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 73.5,
          "note": "Exceptional cross-jurisdictional analysis; case citations verifiable; local-law caveats appropriate.",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 127.1,
          "note": "Exceptional depth; Caterpillar cite plausible but unverified; answer truncated.",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 21.0,
          "quality": 6.0,
          "accuracy": 7.0,
          "speed": 1.0,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 173.1,
          "note": "Answer cuts off mid-sentence; missing LGPD/NDPR analysis and revised clause framework",
          "rank": 9
        },
        "o3": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 62.6,
          "note": "Comprehensive, well-structured; minor uncertainty on ANPD Resolution citation specifics.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.8,
          "note": "Solid cross-jurisdictional analysis; revised clauses practical and internally consistent.",
          "rank": 4
        },
        "mistral-large": {
          "total": 25.2,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 62.9,
          "note": "Splendid Sunrise and Aeroflot citations misapplied; Beximco tangential but real.",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 17.0,
          "quality": 4.0,
          "accuracy": 3.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 8.6,
          "note": "Vitol cite unverifiable/misapplied; shallow analysis; vague revised clauses",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 25.3,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 89.2,
          "note": "Bumper Development citation misapplied; Rome I post-Brexit inapplicable to English courts",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 104.0,
          "note": "Excellent cross-jurisdictional analysis; hub-and-spoke pivot is commercially astute.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 46.1,
          "note": "Accurate, well-structured, actionable; buy-sell restructure advice is excellent.",
          "rank": 1
        }
      }
    },
    {
      "id": 183,
      "category": "Data Privacy",
      "use_case": "health_research_data_reuse_gdpr_csrd_pipeda_clash",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 70.3,
          "note": "Rigorous multi-jurisdictional analysis; Hague consumer exclusion and OSS gap well-spotted.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 133.1,
          "note": "Exceptional depth; Douez and Van Breda correctly applied; Swiss HCCCA non-ratification accurate.",
          "rank": 2
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 159.2,
          "note": "Thorough, citation-accurate analysis; answer truncated before completing US transfer and forum sections.",
          "rank": 5
        },
        "o3": {
          "total": 27.1,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 19.4,
          "note": "C-64/21 'A v Volvo' appears fabricated; Douez/Pompey citations plausible but verify",
          "rank": 4
        },
        "grok-4.3": {
          "total": 27.6,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 9.0,
          "note": "Fashion ID cited incorrectly; adequacy decision 2000/518/EC is wrong reference",
          "rank": 3
        },
        "mistral-large": {
          "total": 26.7,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 45.5,
          "note": "Schrems II cite misapplied to Switzerland; Rigas satiksme mischaracterized; Rome II inapplicable",
          "rank": 6
        },
        "llama-4-maverick": {
          "total": 15.0,
          "quality": 3.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 1.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 5.4,
          "note": "Superficial step-by-step format; ends absurdly with boxed '1'; lacks depth",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 91.0,
          "note": "Douez v Facebook cited correctly; CSRD-GDPR interaction well-handled; transfer analysis solid",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 24.9,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 110.4,
          "note": "Switzerland not HCCCA party is correct; Jones v Tsige cited inaccurately re standing",
          "rank": 9
        },
        "gemini-3.1-pro": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 45.7,
          "note": "RBC v. Trang misapplied; Schrems C-498/16 citation inaccurate/mischaracterized",
          "rank": 8
        }
      }
    },
    {
      "id": 184,
      "category": "Corporate Governance",
      "use_case": "dual_listed_board_duties_climate_and_controlling_shareholder_ksa_uk",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 80.1,
          "note": "Sophisticated dual-regime analysis; UKLR 2024 transition handled with appropriate nuance.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 134.9,
          "note": "Exceptional dual-jurisdiction analysis; case citations verifiable; UKLR 2024 correctly applied.",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 26.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 143.3,
          "note": "Thorough dual-jurisdiction analysis; answer truncated mid-sentence, reducing completeness score.",
          "rank": 7
        },
        "o3": {
          "total": 27.1,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 18.2,
          "note": "Re Drax Group plc [2004] EWHC 2743 appears hallucinated; LR numbering imprecise",
          "rank": 6
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.2,
          "note": "Solid dual-jurisdiction analysis; case citations plausible but peripheral to core duties.",
          "rank": 2
        },
        "mistral-large": {
          "total": 25.2,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 56.9,
          "note": "CMA decisions, FCA v Carillion, Rome I Art.1(2)(f) misapplied; BTI stretched",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 18.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 2.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 9.1,
          "note": "Superficial analysis; step-list format unprofessional; lacks depth on IFRS S2, LR conflicts",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 86.3,
          "note": "Rigorous lex societatis framing; SCL article citations need independent verification.",
          "rank": 4
        },
        "qwen3.7-max": {
          "total": 24.9,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 119.1,
          "note": "Base Metal Trading and Konamaneni citations misapplied; UKLR categorization imprecise",
          "rank": 9
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 35.2,
          "note": "Solid dual-jurisdiction analysis; UKLR 2024 restructuring characterization slightly imprecise.",
          "rank": 5
        }
      }
    },
    {
      "id": 185,
      "category": "M&A",
      "use_case": "carve_out_eu_ai_vendor_acquisition_data_and_regulatory_continuity",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 90.0,
          "note": "Exceptional cross-jurisdictional depth; Schrems II and DPF risk correctly flagged.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 138.6,
          "note": "Exceptional depth; answer truncated mid-sentence on German FDI section",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 178.3,
          "note": "Exceptional cross-jurisdictional depth; AI Act article citations verified and accurate.",
          "rank": 4
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 22.2,
          "note": "Technically strong, minor AI Act citation imprecision, DPDPA enforcement date uncertain",
          "rank": 3
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 11.4,
          "note": "Precise citations, actionable SPA drafting, strong cross-jurisdictional sequencing analysis.",
          "rank": 1
        },
        "mistral-large": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 58.6,
          "note": "Solid structure; minor inaccuracies in AI Act article citations and HSR thresholds",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 9.7,
          "note": "Superficial treatment; misses CE-mark non-transferability, SCCs novation, DPDPA gaps",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 102.1,
          "note": "Strong structure; AI Act Art.113 dates slightly imprecise; HSR thresholds approximate but flagged",
          "rank": 9
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 126.9,
          "note": "Exceptional cross-jurisdictional depth; minor AI Act article citations need verification.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 49.1,
          "note": "Solid memo; DPDPA 'fully operational' overstated; HSR threshold slightly off",
          "rank": 7
        }
      }
    },
    {
      "id": 186,
      "category": "Banking/Finance",
      "use_case": "cross_border_syndicated_loan_security_priority_ohada_ny_uae",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 87.8,
          "note": "Exceptional cross-jurisdictional analysis; minor uncertainty on UAE 2024 law details.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 130.7,
          "note": "Exceptional depth; OHADA Article 5/87 analysis and parallel debt risks well-handled.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 26.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 147.8,
          "note": "Solid OHADA/UAE analysis; UK IP section entirely missing; parallel debt underexplored.",
          "rank": 6
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 26.7,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 29.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.5,
          "note": "Solid multi-jurisdictional analysis; UAE decree numbers need verification before reliance.",
          "rank": 2
        },
        "mistral-large": {
          "total": 21.2,
          "quality": 6.0,
          "accuracy": 3.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 46.3,
          "note": "Multiple fabricated citations: Tribunal d'Abidjan case, wrong UAE statute articles",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 17.6,
          "quality": 4.0,
          "accuracy": 4.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 8.8,
          "note": "Superficial analysis; misses parallel debt, RCCM nuances, UAE 2020 Movables Law",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 92.8,
          "note": "Solid framework; UAE Federal Law No.20/2016 citation slightly misapplied but no fabrications.",
          "rank": 5
        },
        "qwen3.7-max": {
          "total": 25.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 118.0,
          "note": "Macmillan cite accurate; OHADA AUS articles plausible but unverified precisely",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 28.7,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 31.7,
          "note": "Solid multi-jurisdictional analysis; UAE Pledge Law citation and OHADA AUS accurate.",
          "rank": 3
        }
      }
    },
    {
      "id": 187,
      "category": "Dispute Resolution",
      "use_case": "multi_forum_distribution_ip_and_antitrust_litigation_strategy",
      "models": {
        "claude-opus-4.8": {
          "total": 30.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 84.2,
          "note": "Rigorous multi-forum analysis; Eco Swiss, Mitsubishi, Leegin all correctly cited.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 132.7,
          "note": "Exceptional multi-forum analysis; BGer cite unverified but plausible; memo truncated.",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 24.0,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 175.9,
          "note": "General Investment Co. cite misapplied; Marrese usage strained for removal argument",
          "rank": 7
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 25.7,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.8,
          "note": "China Trade cite is real but misapplied; Lugano/Brussels Ia analysis solid.",
          "rank": 2
        },
        "mistral-large": {
          "total": 23.7,
          "quality": 7.0,
          "accuracy": 5.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 43.6,
          "note": "BGE cite plausible but unverified; In re Chevron misapplied; PILA Art.112 wrong",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 17.0,
          "quality": 4.0,
          "accuracy": 3.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 6.8,
          "note": "Quaak cite misapplied; Brussels Ia inapplicable to US judgments; shallow analysis",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 25.8,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 81.6,
          "note": "Judgment 4A_233/2019 citation unverified; likely hallucinated Swiss FSC case.",
          "rank": 5
        },
        "qwen3.7-max": {
          "total": 24.9,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 118.4,
          "note": "VBER cited as 2022/726 (wrong number); DFT/SFSC cites unverifiable",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 28.2,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 45.7,
          "note": "Solid multi-forum analysis; Brussels Ia inapplicability correctly flagged; no hallucinations.",
          "rank": 4
        }
      }
    },
    {
      "id": 188,
      "category": "IP/Tech Law",
      "use_case": "cross_border_text_and_data_mining_uae_uk_jp_brazil",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 69.2,
          "note": "Getty v Stability AI characterisation slightly imprecise but no hallucinated citations.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 129.3,
          "note": "Minor issue: Apis-Hristovich cite misapplied but no fabricated law",
          "rank": 7
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 147.1,
          "note": "Exceptionally thorough, jurisdiction-correct, commercially actionable; no hallucinated citations detected.",
          "rank": 5
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 29.9,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.6,
          "note": "Accurate, well-structured; correctly flags UK commercial TDM gap and Japan Art 30-4.",
          "rank": 1
        },
        "mistral-large": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 42.5,
          "note": "Solid analysis; minor inaccuracies on UAE law article citations and MEXT guidance.",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 21.0,
          "quality": 6.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 9.2,
          "note": "Section 29B hallucinated; UK commercial TDM exception was rejected post-consultation",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 108.3,
          "note": "Accurate, well-structured, jurisdiction-correct analysis with actionable licensing recommendations.",
          "rank": 4
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 111.7,
          "note": "Clearview AI ICO cite accurate; UAE PDPL transfer adequacy framing slightly speculative.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 33.3,
          "note": "Getty v Stability AI cite is real; PDPL legitimate interest gap well-spotted.",
          "rank": 3
        }
      }
    },
    {
      "id": 189,
      "category": "Employment Law",
      "use_case": "remote_first_equity_compensation_and_misclassification_sg_ca_tx_fr",
      "models": {
        "claude-opus-4.8": {
          "total": 30.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 88.4,
          "note": "Dynamex, AB5, Uber Cass.soc. 2020, Rome I all verifiable and correct.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 138.5,
          "note": "Exceptional depth; Dynamex, Borello, Viking River citations verified and accurate.",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 163.1,
          "note": "Thorough, well-structured; case citations appear verifiable; memo cuts off mid-France section.",
          "rank": 8
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 25.8,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 10.2,
          "note": "Cass. soc. 2021 cite unverifiable; Limestone case plausible but unconfirmed",
          "rank": 4
        },
        "mistral-large": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 53.1,
          "note": "Solid memo; Nedlloyd cite accurate; BSPCE/ESOP distinction slightly imprecise.",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 20.0,
          "quality": 5.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 7.9,
          "note": "Limestone Products cite misapplied; shallow ESOP/social-charge analysis; generic recommendations",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 28.8,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 84.4,
          "note": "Dynamex cite accurate; PE risk flagged; France URSSAF detail solid.",
          "rank": 3
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 122.7,
          "note": "Newspapers Inc cite unverified but plausible; Société Générale cite credible.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 52.5,
          "note": "Thorough, jurisdiction-accurate memo; Rome I citation technically post-Brexit but contextually valid.",
          "rank": 1
        }
      }
    },
    {
      "id": 190,
      "category": "Tax",
      "use_case": "cross_border_saas_vat_gst_withholding_and_pe_risk",
      "models": {
        "claude-opus-4.8": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 89.4,
          "note": "Engineering Analysis cite verified; EL 2.0 abolition date flagged appropriately as verify-needed.",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 26.4,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 137.8,
          "note": "Citrix ITAT cite unverifiable; Engineering Analysis citation details need verification",
          "rank": 7
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 157.5,
          "note": "Thorough, well-cited analysis; answer appears cut off before completing PE section",
          "rank": 5
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 20.1,
          "note": "Thorough, well-structured; minor EL mechanics and OSS scope imprecisions noted.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 30.6,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 11.7,
          "note": "Thorough, well-cited analysis; minor EL sunset risk and treaty nuance gaps.",
          "rank": 1
        },
        "mistral-large": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 55.3,
          "note": "Germany VAT rate cited as 20% (correct is 19%); solid overall structure",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 18.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 2.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 6.9,
          "note": "Superficial analysis, wrong EL rate, no actionable PE/withholding depth",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 25.8,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 86.9,
          "note": "Nigeria-US DTAA cited as existing but no such treaty is in force",
          "rank": 9
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 118.6,
          "note": "Engineering Analysis cite verified; Nigeria SEP threshold and treaty gap correctly flagged.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 26.7,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 38.0,
          "note": "Mastercard AAR cite unverified; Finance Act 2024 EL abolition needs verification",
          "rank": 6
        }
      }
    },
    {
      "id": 191,
      "category": "Regulatory Compliance",
      "use_case": "global_digital_health_app_multiregime_authorization_and_marketing",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 64.0,
          "note": "Exceptional depth, accurate citations, UAE localization flagged, actionable sequencing provided.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 133.7,
          "note": "Exceptionally thorough; truncated at India DPDPA; DEN180044 verifiable.",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 26.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 143.5,
          "note": "Solid framework but answer truncated; India/UAE/privacy sections incomplete",
          "rank": 9
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 20.7,
          "note": "Comprehensive, well-structured roadmap; minor risk on UAE Class III specificity.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.8,
          "note": "Solid roadmap; UAE localization and India Class C nuance could be deeper.",
          "rank": 3
        },
        "mistral-large": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 54.0,
          "note": "Solid roadmap; some UAE/India citations imprecise but no outright hallucinations.",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 18.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 2.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.4,
          "note": "Superficial analysis; missing EUDAMED, DPDPA specifics, UAE HAAD/DOH, SaMD rules",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 76.0,
          "note": "Solid roadmap; minor gaps in UAE ESMA role and India SaMD nuances.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 105.7,
          "note": "Comprehensive, well-structured; minor UAE law citation imprecision but no hallucinations.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 35.6,
          "note": "Solid roadmap; UK CE Mark transition timeline slightly oversimplified; DPDP nuances thin.",
          "rank": 6
        }
      }
    },
    {
      "id": 192,
      "category": "Real Estate",
      "use_case": "cross_border_propco_opco_structure_with_sharia_and_foreign_ownership",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 67.3,
          "note": "Exceptional multi-jurisdictional structure; correct citations; honest enforcement caveats throughout.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 140.1,
          "note": "Exceptional multi-jurisdictional depth; parallel debt caveat appropriately flagged; no hallucinations.",
          "rank": 7
        },
        "gpt-5.5": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 138.8,
          "note": "Exceptional multi-jurisdictional depth; all cited laws appear verifiable and jurisdiction-correct.",
          "rank": 4
        },
        "o3": {
          "total": 29.1,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 20.0,
          "note": "Impressive structure but several case citations appear fabricated or unverifiable.",
          "rank": 5
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 10.2,
          "note": "Robust multi-jurisdictional structure; citations verifiable; nominee trust nuance well-handled.",
          "rank": 1
        },
        "mistral-large": {
          "total": 25.2,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 47.9,
          "note": "DIFC CA-005-2016 and DHCC Authority Law No. 9/2011 appear fabricated",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 20.0,
          "quality": 5.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.2,
          "note": "Superficial treatment; misses DIFC/ADGM structuring, Murabaha debt, healthcare licensing nuances.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 95.5,
          "note": "Solid structure; nominee agreement enforceability risk and DIFC eligibility understated.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 100.4,
          "note": "Exceptional cross-jurisdictional depth; minor uncertainty on 100% foreign healthcare ownership.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 40.8,
          "note": "Robust structure; UAE mortgage licensing nuance slightly oversimplified but citations verifiable.",
          "rank": 3
        }
      }
    },
    {
      "id": 193,
      "category": "Immigration",
      "use_case": "distributed_founders_residency_and_investor_visa_strategy",
      "models": {
        "claude-opus-4.8": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 125.1,
          "note": "Exceptional cross-jurisdictional depth; Harding cite plausible but verify current status.",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 142.5,
          "note": "Exceptional depth; minor gaps in CFC/PFIC analysis and US tax treaty coverage",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 25.0,
          "quality": 7.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 172.7,
          "note": "Solid framework but answer truncated; COMPASS, CFC, phased plan incomplete",
          "rank": 9
        },
        "o3": {
          "total": 28.1,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 28.1,
          "note": "Solid framework; De Beers cite valid but NOR scheme details need verification.",
          "rank": 5
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 11.4,
          "note": "Solid comparative analysis; minor gaps on E-2 treaty eligibility nuances.",
          "rank": 2
        },
        "mistral-large": {
          "total": 25.2,
          "quality": 7.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 73.6,
          "note": "Solid framework but EntrePass/Tech.Pass eligibility details oversimplified; CFC analysis thin",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 9.0,
          "note": "Superficial analysis; misses CFC specifics, PFIC risks, and substance requirements.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 100.6,
          "note": "Solid framework; minor inaccuracies on EntrePass shareholding and Canada SUV thresholds.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 95.4,
          "note": "Exceptional cross-jurisdictional analysis; POEM/CFC/departure tax risks well-flagged.",
          "rank": 1
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 50.9,
          "note": "Solid framework; minor inaccuracies on EntrePass thresholds and NIV status.",
          "rank": 6
        }
      }
    },
    {
      "id": 194,
      "category": "Criminal/White Collar",
      "use_case": "multijurisdiction_bribery_investigation_vendor_chain",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 91.4,
          "note": "Exceptional depth; SFO v ENRC citation accurate; French blocking statute correctly flagged.",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 139.6,
          "note": "Exceptional depth; Three Rivers citation accurate; answer truncated before UK labor law completion",
          "rank": 7
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 140.0,
          "note": "Thorough, well-structured, accurate; answer appears cut off mid-sentence",
          "rank": 8
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 22.3,
          "note": "Comprehensive, well-structured; Three Rivers cite slightly imprecise but defensible.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.8,
          "note": "ANAC/CADE misapplied; CGU/AGU correct; Airbus/Rolls-Royce cites valid.",
          "rank": 6
        },
        "mistral-large": {
          "total": 25.3,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 98.1,
          "note": "SFO v. Guralp 2021 conviction claim and Safran 2021 cite unverifiable",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 21.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.8,
          "note": "Superficial treatment; missing sequencing detail, privilege conflicts, and labor law specifics.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 29.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 86.2,
          "note": "Solid multi-jurisdictional plan; Decree 11,129/2022 citation needs verification.",
          "rank": 4
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 103.6,
          "note": "Exceptional cross-jurisdictional depth; Kozeny cite slightly misapplied but not hallucinated.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 56.3,
          "note": "ENRC citation accurate; LGPD/NDPA/Sapin II/FCPA analysis solid and actionable.",
          "rank": 1
        }
      }
    },
    {
      "id": 195,
      "category": "International Trade",
      "use_case": "sanctions_export_controls_and_parallel_imports_of_chipsets",
      "models": {
        "claude-opus-4.8": {
          "total": 30.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 73.0,
          "note": "Rigorous multi-jurisdictional analysis; FDP rules, Art.12g, VSD incentives correctly cited.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 140.2,
          "note": "Lamesa v Cynergy cited incorrectly as DIFC precedent; otherwise strong analysis",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 24.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.0,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 161.5,
          "note": "Answer truncated mid-sentence; Epsilon Electronics cite is real but tangential",
          "rank": 9
        },
        "o3": {
          "total": 28.1,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 21.2,
          "note": "Lakatamia DIFC cite and Istanbul 2020/5986 appear fabricated/unverifiable",
          "rank": 4
        },
        "grok-4.3": {
          "total": 30.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 7.6,
          "note": "Solid extraterritorial analysis; retroactive remediation and UAE/Turkey sections well-handled.",
          "rank": 3
        },
        "mistral-large": {
          "total": 24.8,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 62.4,
          "note": "Dubai Court No.142/2018 and Yargıtay E.2016/12345 appear fabricated citations",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 21.6,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 7.9,
          "note": "Superficial analysis; misses FDP rule, re-export controls, CJEU nuances, and Japan FEFTA.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 59.5,
          "note": "United States v. Iraj Lavi 2d Cir. 1992 citation unverifiable/likely hallucinated",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 98.8,
          "note": "Thorough, well-structured; minor gap on DIFC vs onshore UAE nuance.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 52.3,
          "note": "Accurate, well-structured, cites real regulations, strong practical remediation guidance.",
          "rank": 1
        }
      }
    },
    {
      "id": 196,
      "category": "Environmental/ESG",
      "use_case": "cross_border_supply_chain_deforestation_and_esg_reporting",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 81.3,
          "note": "Rigorous, well-caveated memo; EUDR/CSRD flux appropriately flagged throughout.",
          "rank": 4
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 138.0,
          "note": "Exceptional multi-jurisdictional depth; Part III on contracts/LGPD missing entirely.",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 161.3,
          "note": "Rigorous, jurisdiction-correct, LGPD section missing due to truncation",
          "rank": 5
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 25.3,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 30.6,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 11.1,
          "note": "REsp 1.367.923 and In re Vale citations require verification before reliance.",
          "rank": 2
        },
        "mistral-large": {
          "total": 25.2,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 76.3,
          "note": "Multiple hallucinated cases: Vale SEC 2022, Tesla ESG, Hodsdon Mars, Arch Cru FCA.",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 6.8,
          "note": "Superficial analysis; wrong SEC release numbers; Brazilian Civil Code cite misapplied",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 113.5,
          "note": "STJ REsp cite unverified but plausible; Vale litigation reference accurate enough.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 30.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 99.0,
          "note": "Vedanta, Okpabi, Nestlé v Doe all correctly cited and applied.",
          "rank": 3
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 50.6,
          "note": "Accurate case law, correct EUDR cotton exclusion, strong LGPD-CAR integration.",
          "rank": 1
        }
      }
    },
    {
      "id": 197,
      "category": "Arbitration",
      "use_case": "investment_treaty_arbitration_vs_commercial_arbitration_firb_and_ohada",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 77.9,
          "note": "Rigorous multi-regime analysis; BIT conditionality caveat appropriately flagged throughout.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 136.1,
          "note": "Sophisticated multi-forum analysis; truncated at ICSID Art.54 enforcement section.",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 25.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 154.8,
          "note": "Strong framework; answer truncated mid-table, missing FIRB and OHADA enforcement sections",
          "rank": 8
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 21.6,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 10.1,
          "note": "Solid analysis; CMS/Toto citations used cautiously as general approach only.",
          "rank": 2
        },
        "mistral-large": {
          "total": 25.2,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 53.0,
          "note": "Multiple case citations unverifiable or misattributed; ICSID Art.55 mischaracterized",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 5.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 8.2,
          "note": "Switzerland-Philippines BIT cited irrelevantly; OHADA AUA analysis superficial and incomplete",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 96.4,
          "note": "Pantechniki and Aguas del Tunari citations are real and correctly applied.",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 98.0,
          "note": "Solid dual-track analysis; CMS/Hochtief citations accurate; OHADA enforcement nuanced.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 49.3,
          "note": "Vivendi/Lauder citations accurate; FIRB analysis thin but directionally correct.",
          "rank": 5
        }
      }
    },
    {
      "id": 198,
      "category": "Insurance",
      "use_case": "parametric_climate_insurance_payout_dispute_multilaw",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 74.6,
          "note": "Stephens cite slightly off but flagged; Safety National accurate; strong federated protocol.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 131.4,
          "note": "CDI Bulletin CB-2022-04 and some case citations unverifiable; strong overall framework",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 155.8,
          "note": "Thorough multilaw analysis; Nigerian AMA 2023 and all cites appear verifiable.",
          "rank": 4
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 17.5,
          "note": "",
          "rank": 9
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 11.0,
          "note": "McGill cite accurate; solid multi-jurisdictional framework with actionable steps throughout.",
          "rank": 2
        },
        "mistral-large": {
          "total": 23.7,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 38.5,
          "note": "Cal. Ins. Code §1758.90 and McGill misapplied; Nigerian ACA cite questionable",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 18.0,
          "quality": 5.0,
          "accuracy": 3.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 7.3,
          "note": "Montrose citation misapplied; Lee v State Farm and Oluwole unverifiable/hallucinated",
          "rank": 8
        },
        "deepseek-v3.2": {
          "total": 2.3,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 2.3,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 80.3,
          "note": "",
          "rank": 10
        },
        "qwen3.7-max": {
          "total": 24.9,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 99.8,
          "note": "MHR Capital Partners misapplied; Unigard cite plausible but context strained.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 28.2,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 51.1,
          "note": "Solid multi-jurisdictional analysis; bifurcated protocol practical and well-structured.",
          "rank": 3
        }
      }
    },
    {
      "id": 199,
      "category": "Healthcare/Pharma",
      "use_case": "cross_border_cell_therapy_trial_contracting_and_biobank_rights",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 88.9,
          "note": "Moore cited accurately with caveat; Egypt CT-law and PDPL flagged appropriately.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 136.4,
          "note": "BGH NJW 1994,127 and Cass. 1re civ. 2003 citations unverified/likely hallucinated",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 162.5,
          "note": "Thorough, jurisdiction-accurate, well-structured; truncated before Lebanon/Egypt consent sections.",
          "rank": 4
        },
        "o3": {
          "total": 27.1,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 21.2,
          "note": "BGH 'Knochen' case and RIKEN/Egypt CAR-T citations unverifiable; core framework solid",
          "rank": 6
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 10.4,
          "note": "Solid framework; minor risk of overstating Nagoya Protocol applicability to human genetics.",
          "rank": 2
        },
        "mistral-large": {
          "total": 3.2,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.2,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 59.3,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 16.0,
          "quality": 3.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 2.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 6.5,
          "note": "Case C-377/98 misapplied; output is generic outline, not actionable analysis",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 89.2,
          "note": "Thorough, jurisdiction-specific, actionable; minor APPI article citation imprecision noted.",
          "rank": 3
        },
        "qwen3.7-max": {
          "total": 25.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 102.0,
          "note": "Yearworth citation accurate; Nagoya Protocol human exclusion correctly flagged; solid framework.",
          "rank": 8
        },
        "gemini-3.1-pro": {
          "total": 26.7,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 52.8,
          "note": "Egypt Law 214/2020 Art.14 specifics and Moore applicability overstated",
          "rank": 7
        }
      }
    },
    {
      "id": 200,
      "category": "Bankruptcy & Insolvency",
      "use_case": "cross_border_platform_insolvency_digital_wallets_and_trusts",
      "models": {
        "claude-opus-4.8": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 89.1,
          "note": "Exceptional cross-border analysis; Ipagoo and Hodlnaut citations require local verification.",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 136.1,
          "note": "Exceptional depth; Rubin v Eurofinance jurisdiction slightly mischaracterized but minor.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 25.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 167.3,
          "note": "Solid multi-jurisdiction analysis; truncated before Canada/Japan/strategy sections.",
          "rank": 5
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 27.0,
          "note": "",
          "rank": 8
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 10.1,
          "note": "Hunter v Moss cited correctly; solid cross-border analysis with actionable recommendations.",
          "rank": 2
        },
        "mistral-large": {
          "total": 3.2,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.2,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 62.1,
          "note": "",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 18.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 2.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 8.3,
          "note": "Superficial analysis; misses commingling doctrine, CASS specifics, and Japan Model Law gap",
          "rank": 7
        },
        "deepseek-v3.2": {
          "total": 2.8,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 2.8,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 72.2,
          "note": "",
          "rank": 10
        },
        "qwen3.7-max": {
          "total": 24.4,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 138.2,
          "note": "Re London Wine, Hunter v Moss misapplied; Integrated Res cite questionable; Rubin mischaracterized.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 42.2,
          "note": "Exceptional cross-border analysis; Gibbs rule and FSA ring-fencing insights outstanding.",
          "rank": 1
        }
      }
    },
    {
      "id": 201,
      "category": "Securities",
      "use_case": "tokenized_equity_offering_under_mica_mas_and_sec_risk",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 83.1,
          "note": "Exceptional multi-jurisdictional analysis; correctly flags Reg CF Swiss issuer ineligibility.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 132.4,
          "note": "Exceptional depth; Landreth cite accurate; Reg CF foreign issuer bar correctly flagged.",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 164.2,
          "note": "Landreth and Howey citations verified; MiCA/SFA/RegD analysis accurate and thorough.",
          "rank": 7
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 23.5,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.7,
          "note": "Solid multi-jurisdiction analysis; Howey and MiCA exclusion correctly applied.",
          "rank": 2
        },
        "mistral-large": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 59.3,
          "note": "Spector Photo Group misapplied; B2C2 citation context inaccurate; Reg S categories misstated",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 21.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 8.7,
          "note": "Superficial analysis; misclassifies tokens under MiCA; lacks DLT Pilot Regime discussion",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 28.3,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 94.2,
          "note": "Solid multi-jurisdiction analysis; Howey cite accurate; FinSA gap noted appropriately.",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 118.0,
          "note": "Technically precise, MiCA exclusion correct, Howey cite verified, ERC-3643 practical.",
          "rank": 3
        },
        "gemini-3.1-pro": {
          "total": 28.7,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 51.9,
          "note": "Landreth cite accurate; Reg CF ineligibility flag is strong practical value.",
          "rank": 4
        }
      }
    },
    {
      "id": 202,
      "category": "Construction",
      "use_case": "ppp_construction_risk_allocation_and_step_in_rights_multijurisdiction",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 90.0,
          "note": "Exceptional multi-jurisdictional depth; minor uncertainty on Qatar Law No.5/2024 registry.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 134.1,
          "note": "Exceptional depth; Cavendish and IBC analysis accurate; Qatari law well-handled.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 151.2,
          "note": "Thorough, accurate multi-jurisdiction analysis; truncated before completing Section 2.",
          "rank": 3
        },
        "o3": {
          "total": 27.1,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 18.9,
          "note": "QCB Circular BSD/GOV/2022/09 and Qatar PPP Law art 18(3) unverifiable; IE Contractors citation plausible",
          "rank": 7
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 14.3,
          "note": "Cavendish Square correctly cited; IBC s.14 and Qatari law accurately applied.",
          "rank": 2
        },
        "mistral-large": {
          "total": 3.2,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.2,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 53.0,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 21.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.3,
          "note": "Superficial analysis; misses QICDRC, IBC nuances, Nigerian CAMA, and insurance gaps",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 25.3,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 107.5,
          "note": "Wood Hall v Pipeline Authority misapplied; Qatari Law No.12/2020 Article 30 unverified",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 109.7,
          "note": "Comau UK cite unverifiable/likely hallucinated; core analysis otherwise strong",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 41.9,
          "note": "Cavendish cite verified; Qatar PPP Law No.12/2020 plausible but unverified",
          "rank": 6
        }
      }
    },
    {
      "id": 203,
      "category": "Trusts & Estates",
      "use_case": "cross_border_family_trust_digital_assets_and_forced_heirship",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 82.0,
          "note": "Exceptional cross-border analysis; DIFC firewall, Swiss Pflichtteil reform, confessional flag all correct.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 133.7,
          "note": "Blount v. Blount cite unverified; BGer 5A_141/2016 needs verification",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 25.0,
          "quality": 7.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 159.1,
          "note": "Solid foundation but memo truncated; PILA arts and Hague Art.15 correctly cited.",
          "rank": 7
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 25.9,
          "note": "",
          "rank": 9
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 10.6,
          "note": "Solid structure; vague Swiss case law reference but no fabricated citations.",
          "rank": 3
        },
        "mistral-large": {
          "total": 3.2,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.2,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 63.0,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 18.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 2.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.7,
          "note": "Superficial analysis; step-list format unprofessional; misses key PILA Art.149a-e nuances",
          "rank": 8
        },
        "deepseek-v3.2": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 101.2,
          "note": "Solid structure; Lebanese Code citation imprecise; Article 15 nuance slightly oversimplified.",
          "rank": 5
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 122.5,
          "note": "Rigorous cross-border analysis; DIFC firewall and Swiss clawback treatment excellent.",
          "rank": 2
        },
        "gemini-3.1-pro": {
          "total": 25.7,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 54.0,
          "note": "In re Meyer cite appears fabricated; DIFC Law No.4/2018 Art.14 needs verification",
          "rank": 6
        }
      }
    },
    {
      "id": 204,
      "category": "AI/Tech Regulation",
      "use_case": "ai_model_as_medical_device_multiregime_qualification",
      "models": {
        "claude-opus-4.8": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 63.7,
          "note": "Rigorous multi-regime analysis; minor uncertainty on DUAA/AIDA status flagged appropriately.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 82.2,
          "note": "Thorough, well-structured; minor risk on AIDA status and 2024 AI Act citation.",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 133.2,
          "note": "Thorough, jurisdiction-accurate, actionable; minor PHIPA/PIPEDA boundary could sharpen.",
          "rank": 5
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 22.7,
          "note": "Clarifai 2023 warning letter and Aidoc K182117 predicate unverified/likely hallucinated",
          "rank": 3
        },
        "grok-4.3": {
          "total": 30.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 10.0,
          "note": "Solid multi-regime analysis; pitfalls section somewhat thin on specificity.",
          "rank": 4
        },
        "mistral-large": {
          "total": 21.8,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 65.4,
          "note": "Multiple hallucinated cases: Brain Products C-219/11, FDA v. AliveCor, IDx Warning Letter",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 18.6,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 10.1,
          "note": "Superficial analysis; misses MHRA specifics, NY SHIELD, Ontario PHIPA, MDR class rules",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 138.8,
          "note": "Solid multi-jurisdictional memo; AI Act citation number slightly uncertain but plausible.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 105.1,
          "note": "Breyer citation accurate; FLOP threshold for GPAI unverified but plausible.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 63.9,
          "note": "Exceptional cross-jurisdictional depth; PHIPA/PIPEDA distinction notably sophisticated.",
          "rank": 2
        }
      }
    },
    {
      "id": 205,
      "category": "Contract & Commercial",
      "use_case": "uae_difc_saudi_b2b_distribution_conflicting_governing_law",
      "models": {
        "claude-opus-4.8": {
          "total": 30.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 76.4,
          "note": "Exceptional cross-jurisdictional analysis; Decree 34 flag and Art.8(2)(e) exclusion outstanding",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 26.9,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 118.9,
          "note": "Marka Holding v Anel Group and JT Case No. 1/2016 unverifiable; Dubai Cassation citations dubious",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 141.6,
          "note": "Rigorous multi-jurisdictional analysis; DIFC-LCIA abolition correctly flagged; redline incomplete",
          "rank": 3
        },
        "o3": {
          "total": 26.1,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 27.7,
          "note": "Multiple fabricated case citations severely undermine otherwise strong analytical framework",
          "rank": 6
        },
        "grok-4.3": {
          "total": 29.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 6.1,
          "note": "Solid analysis; KSA regulation citation slightly imprecise but no fabricated case law",
          "rank": 2
        },
        "mistral-large": {
          "total": 23.7,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 59.6,
          "note": "Multiple hallucinated cases; DIFC Law No.3/2005 articles unverified; KSA statute details suspect",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 18.6,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 9.0,
          "note": "Superficial analysis; misses MOU integration, KSA Agency Law, DIFC opt-in nuance",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 60.4,
          "note": "Solid analysis; UAE Commercial Agencies Law citation slightly imprecise but no hallucinations.",
          "rank": 4
        },
        "qwen3.7-max": {
          "total": 25.4,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 121.1,
          "note": "KSA M/134 unverified; DIFC seat/UAE law bifurcation is sound strategy",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 22.8,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 76.4,
          "note": "Guggenheim v. Gurtin DIFC ARB 004 appears fabricated; Article citations need verification",
          "rank": 9
        }
      }
    },
    {
      "id": 206,
      "category": "Data Privacy",
      "use_case": "employee_productivity_monitoring_gdpr_pdpa_pipeda_conflicts",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 68.2,
          "note": "Eastmond v CP Rail cited cautiously as analog; BAG/WP249 references accurate",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 136.1,
          "note": "BAG 2 AZR 133/22 cited for webcam/home-office ruling needs verification",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 136.8,
          "note": "Thorough multi-jurisdiction matrix; CJEU C-34/21 citation needs verification",
          "rank": 6
        },
        "o3": {
          "total": 29.1,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 18.7,
          "note": "BAG 2 AZR 681/16 cited but unverifiable; Schrems II misapplied to storage",
          "rank": 4
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.0,
          "note": "Solid matrix; DPDPA s.4 framing slightly oversimplified but defensible overall.",
          "rank": 3
        },
        "mistral-large": {
          "total": 3.7,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.7,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 46.3,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 25.0,
          "quality": 6.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 6.8,
          "note": "Solid framework but lacks depth on works council, DPIA, and PIPEDA nuances.",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 25.3,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 75.8,
          "note": "BAG case citation and PIPEDA case summaries unverifiable; DPDPA s.7(7) mischaracterized",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 29.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 101.9,
          "note": "Jones v. Tsige, DPF, BDSG, DPDPA all correctly cited and applied.",
          "rank": 1
        },
        "gemini-3.1-pro": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 51.5,
          "note": "BAG 2 AZR 681/16 cited but unverifiable; DPDP Sec 7(i) mischaracterized",
          "rank": 7
        }
      }
    },
    {
      "id": 207,
      "category": "M&A",
      "use_case": "de_spac_cross_border_tech_mna_control_and_filings",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 79.9,
          "note": "Rigorous multi-regime analysis; appropriate caveats; staggered closing well-structured.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 138.2,
          "note": "Exceptional cross-jurisdictional depth; truncated mid-sentence on FEMA/RBI analysis",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 154.0,
          "note": "Rigorous multi-jurisdictional analysis; correct citations; excellent cross-regime conflict identification.",
          "rank": 4
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 21.3,
          "note": "Rigorous multi-regime analysis; staggered closing and carve-out mechanisms well-structured.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.2,
          "note": "Solid cross-jurisdictional analysis; staggered closing mechanism well-structured and actionable.",
          "rank": 3
        },
        "mistral-large": {
          "total": 24.7,
          "quality": 7.0,
          "accuracy": 5.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 48.1,
          "note": "FIRB v. Northern Minerals and CFIUS v. Broadcom/Qualcomm citations are hallucinated/mischaracterized.",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 17.0,
          "quality": 3.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 2.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.4,
          "note": "Superficial framework; missing SPAC-specific issues, SOCI Act, SCED, AWG §55 depth",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 102.3,
          "note": "Solid cross-jurisdictional analysis; CFIUS nexus theory slightly strained for non-US target.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 25.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 116.8,
          "note": "Solid framework; AWV thresholds slightly imprecise; CFIUS outbound analysis strong.",
          "rank": 8
        },
        "gemini-3.1-pro": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 57.3,
          "note": "Solid framework; CFIUS inbound analysis thin; SCOMET licensing nuance well-handled.",
          "rank": 6
        }
      }
    },
    {
      "id": 208,
      "category": "Banking/Finance",
      "use_case": "cross_border_secured_lending_uae_ohada_ny_priority",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 51.5,
          "note": "Exceptional cross-jurisdictional analysis; agent des sûretés distinction is production-ready.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 130.5,
          "note": "Exceptional cross-jurisdictional analysis; AUS article numbering requires local counsel verification.",
          "rank": 7
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 154.0,
          "note": "Exceptional cross-jurisdictional analysis; OHADA AUS article citations plausible but unverified.",
          "rank": 5
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 20.3,
          "note": "Technically sound, OHADA-specific, actionable; minor article-number verification needed.",
          "rank": 3
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.5,
          "note": "Solid OHADA/lex situs analysis; AUS article citations need verification but plausible.",
          "rank": 4
        },
        "mistral-large": {
          "total": 23.2,
          "quality": 7.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 41.5,
          "note": "Fabricated UAST article numbers; OHADA UAST structure misrepresented throughout.",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 20.0,
          "quality": 5.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.0,
          "note": "Superficial; misses RCCM filings, AUDCG share pledge formalities, parallel debt issues",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 55.0,
          "note": "Solid framework; article citations unverified but no fabricated case law",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 123.1,
          "note": "Exceptional cross-jurisdictional analysis; BCEAO/UEMOA and huissier points outstanding",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 34.2,
          "note": "Excellent OHADA/UEMOA analysis; pacte commissoire and RCCM steps actionable.",
          "rank": 2
        }
      }
    },
    {
      "id": 209,
      "category": "Corporate Governance",
      "use_case": "swiss_dual_listed_climate_risk_disclosure_vs_us_securities",
      "models": {
        "claude-opus-4.8": {
          "total": 31.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 65.9,
          "note": "Exceptional cross-regime analysis; SEC rule status caveat appropriately flagged.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 137.5,
          "note": "Exceptional depth; SEC rule stay noted; Milieudefensie appeal reversal correctly flagged.",
          "rank": 7
        },
        "gpt-5.5": {
          "total": 26.4,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 135.8,
          "note": "Solid multi-regime analysis; answer truncated mid-sentence, missing disclosure strategy section",
          "rank": 8
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 21.3,
          "note": "Rigorous tri-regime analysis; SEC rule stay noted; minor CSRD timeline nuances possible",
          "rank": 2
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.2,
          "note": "Solid cross-jurisdictional analysis; SEC release numbers plausible but unverified.",
          "rank": 4
        },
        "mistral-large": {
          "total": 25.8,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 68.4,
          "note": "BGE 144 III 475 misapplied; SEC rule status overstated given court stays",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 20.0,
          "quality": 5.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 7.8,
          "note": "FINMA Circular 2018/3 misapplied; SEC rules cited as proposed not final",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 112.1,
          "note": "Basic v. Levinson cited correctly; SEC rule status caveat appropriately noted.",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 120.0,
          "note": "Excellent cross-regime analysis; SEC rule litigation status appropriately caveated; minor citation risks",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 39.7,
          "note": "Excellent bifurcated materiality strategy; SEC rule litigation caveat appropriately flagged.",
          "rank": 3
        }
      }
    },
    {
      "id": 210,
      "category": "Employment Law",
      "use_case": "non_compete_and_garden_leave_multi_forum_enforceability",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 76.6,
          "note": "Rigorous multi-forum analysis; FTC rule status and case citations verified.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 130.9,
          "note": "Exceptional multi-jurisdictional depth; SB 699/Ryan LLC citations accurate and current.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 25.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 159.5,
          "note": "Strong multi-jurisdictional analysis; answer truncated before garden leave/non-solicit/re-papering sections.",
          "rank": 7
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 23.0,
          "note": "",
          "rank": 9
        },
        "grok-4.3": {
          "total": 29.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.5,
          "note": "Solid multi-forum analysis; FTC ban status correctly caveated; cites verifiable.",
          "rank": 3
        },
        "mistral-large": {
          "total": 25.2,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 50.3,
          "note": "Lansdowne/Frame/Loral citations unverifiable; NY UTSA citation wrong statute",
          "rank": 6
        },
        "llama-4-maverick": {
          "total": 18.6,
          "quality": 5.0,
          "accuracy": 3.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 9.7,
          "note": "Multiple hallucinated/misapplied citations; shallow on CA AB 2288 and Singapore nuance",
          "rank": 8
        },
        "deepseek-v3.2": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 91.7,
          "note": "Thorough multi-forum analysis; case citations appear verifiable and correctly applied.",
          "rank": 2
        },
        "qwen3.7-max": {
          "total": 1.9,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 1.9,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 129.7,
          "note": "",
          "rank": 10
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 49.4,
          "note": "Solid analysis; Tillman, AMN Healthcare, Ryan LLC citations appear accurate.",
          "rank": 5
        }
      }
    },
    {
      "id": 211,
      "category": "Intellectual Property / Tech Law",
      "use_case": "open_source_ai_training_codebase_multi_license_conflict",
      "models": {
        "claude-opus-4.8": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 47.8,
          "note": "Bartz v. Anthropic and LAION/LG Hamburg 2024 citations unverified/likely hallucinated",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 27.8,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 52.4,
          "note": "Nikon v. Seiko Epson 2003 citation unverifiable; likely hallucinated Japanese case law",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 86.5,
          "note": "Rigorous multi-jurisdictional analysis; all cited cases verifiable and correctly applied.",
          "rank": 3
        },
        "o3": {
          "total": 29.1,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 18.8,
          "note": "Oracle v. Rimini Street cite misapplied; Warhol citation incomplete but plausible",
          "rank": 2
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.7,
          "note": "Doe v. GitHub citation unverified but plausible; Apache/GPL incompatibility analysis solid.",
          "rank": 4
        },
        "mistral-large": {
          "total": 24.3,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 58.4,
          "note": "Pokémon GO Tokyo case and Google v. Copiepresse citations appear fabricated/misattributed.",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 23.0,
          "quality": 6.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 9.2,
          "note": "Campbell cite is real but superficially applied; LGPD and Japan underdeveloped",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 24.4,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 86.1,
          "note": "Authors Guild v. OpenAI cite and Oracle framing partially hallucinated/misattributed",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 24.9,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 83.5,
          "note": "Thomson Reuters v. Ross citation inaccurate; Jacobsen applied correctly; Brazil analysis solid",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 48.2,
          "note": "Doe 1 v. GitHub cited cautiously; LGPD/Japan analysis solid and accurate",
          "rank": 6
        }
      }
    },
    {
      "id": 212,
      "category": "Regulatory Compliance",
      "use_case": "dora_operational_resilience_vs_uk_outsourcing_and_mas_notices",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 76.0,
          "note": "Rigorous, well-structured; minor uncertainty on PS16/24 and Notice numbering.",
          "rank": 4
        },
        "claude-sonnet-4.6": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 134.7,
          "note": "Exceptionally thorough, cut off mid-sentence; minor cite verification needed for MAS notices",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 133.4,
          "note": "Rigorous, well-structured; truncated at Control 4 MAS detail.",
          "rank": 5
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 21.7,
          "note": "Strong tri-regime analysis; minor inaccuracies in MAS Notice 655 paragraph citations.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 30.6,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.8,
          "note": "Precise, well-structured; minor risk on MAS Notice numbering accuracy.",
          "rank": 1
        },
        "mistral-large": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 55.4,
          "note": "MAS Notice 655 misidentified; actual TRM notice is MAS Notice 655 for insurers, not banks",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 8.5,
          "note": "Superficial analysis; wrong article citations; MAS Notice numbers unverified; pitfalls underdeveloped",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 107.4,
          "note": "MAS 1-hour threshold and some DORA article citations need verification.",
          "rank": 9
        },
        "qwen3.7-max": {
          "total": 29.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 132.5,
          "note": "Precise citations, actionable controls, strong CLOUD Act and CTP asymmetry insights.",
          "rank": 3
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 42.4,
          "note": "Solid framework; MAS Notice numbers and RTO figures need verification",
          "rank": 7
        }
      }
    },
    {
      "id": 213,
      "category": "Dispute Resolution",
      "use_case": "anti_suit_injunction_and_arbitration_ohada_vs_paris_seat",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 59.7,
          "note": "UniCredit v RusChemAlloy [2024] UKSC 30 appears fabricated; verify citation",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 115.3,
          "note": "Exceptional multi-forum analysis; UniCredit cite accurate; minor CCJA procedural nuances.",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 26.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 158.5,
          "note": "Solid multi-forum analysis; answer cut off before English anti-suit injunction section.",
          "rank": 6
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 17.6,
          "note": "",
          "rank": 9
        },
        "grok-4.3": {
          "total": 30.0,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 7.7,
          "note": "NIOC v Israel 2023 and Sorelec 2021 citations appear fabricated or misattributed",
          "rank": 2
        },
        "mistral-large": {
          "total": 3.2,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.2,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 44.3,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 21.6,
          "quality": 6.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 8.8,
          "note": "Superficial analysis; misses CCJA jurisdiction, kompetenz-kompetenz nuance, Brussels Recast inapplicability",
          "rank": 8
        },
        "deepseek-v3.2": {
          "total": 24.3,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 73.7,
          "note": "Several citations appear fabricated or misattributed, undermining otherwise strong analysis.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 90.1,
          "note": "Enka and Angelic Grace correctly cited; In Zone Brands plausible but verify.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 29.7,
          "note": "Deutsche Bank v RusChemAlliance correctly cited; Dalico doctrine accurate; strong multi-forum strategy.",
          "rank": 1
        }
      }
    },
    {
      "id": 214,
      "category": "Real Estate",
      "use_case": "gcc_mixed_use_tower_foreign_ownership_and_sharia_compliance",
      "models": {
        "claude-opus-4.8": {
          "total": 30.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 80.5,
          "note": "Precise lex situs analysis, Qatar blockade nuance, and DLD entity-specificity excel.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 137.7,
          "note": "Rigorous multi-jurisdictional analysis; truncated Qatari section slightly undermines completeness",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 158.6,
          "note": "Precise citations, strong cross-jurisdictional nuance, correctly flags Qatari GCC ownership limits",
          "rank": 3
        },
        "o3": {
          "total": 27.1,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 21.5,
          "note": "Several citations unverifiable or fabricated; DLD Memo 321/2019, Notice 187/2019 suspect",
          "rank": 6
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.4,
          "note": "Solid cross-jurisdictional analysis; Qatari Law 13/2000 citation needs verification.",
          "rank": 2
        },
        "mistral-large": {
          "total": 20.7,
          "quality": 6.0,
          "accuracy": 3.0,
          "speed": 3.7,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 34.4,
          "note": "Multiple fabricated case citations and statute references undermine otherwise solid framework",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 17.0,
          "quality": 4.0,
          "accuracy": 3.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 7.3,
          "note": "Multiple hallucinated statutes; shallow analysis; misses DIFC/onshore enforcement nuances",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.8,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 79.5,
          "note": "Solid structure; Companies Law articles cited need verification; Qatari nuance thin",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 25.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 132.3,
          "note": "Strong structure; EMCR/JOP nuances excellent; Qatari outbound analysis thin",
          "rank": 8
        },
        "gemini-3.1-pro": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 44.9,
          "note": "Solid structure; DIFC-LCIA/DIAC conflation and minor law citation risks noted",
          "rank": 5
        }
      }
    },
    {
      "id": 215,
      "category": "Tax",
      "use_case": "pillar_two_minimum_tax_and_dst_interactions_for_saas_group",
      "models": {
        "claude-opus-4.8": {
          "total": 30.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 81.9,
          "note": "Exceptional depth; India levy repeals flagged; GILTI/QDMTT stacking nuanced correctly.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 137.5,
          "note": "Nestle SA 2023 MFN cite misapplied; PPL Corp cite tangential but real",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 22.0,
          "quality": 6.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 159.4,
          "note": "Answer truncated mid-sentence; incomplete analysis limits actionability and restructuring proposals",
          "rank": 9
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 21.4,
          "note": "Technically rigorous, well-structured, minor uncertainty on QDMTT creditability status.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 11.3,
          "note": "Solid multi-regime analysis; FDII rate and UTPR mechanics slightly oversimplified.",
          "rank": 3
        },
        "mistral-large": {
          "total": 25.2,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 60.9,
          "note": "Vodafone case misapplied; SEP override claim legally incorrect re treaties",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 10.2,
          "note": "Superficial analysis; misses QDMTT, GILTI blending, DST non-creditability nuances",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.8,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 79.2,
          "note": "Solid, well-structured analysis; no hallucinated cites; minor GILTI nuance gaps.",
          "rank": 4
        },
        "qwen3.7-max": {
          "total": 25.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 127.0,
          "note": "Exhaustion-of-remedies cite misapplied; FDII/UTPR analysis solid overall.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 53.1,
          "note": "Solid analysis; FDII rate post-2025 slightly imprecise, DST scope nuanced",
          "rank": 5
        }
      }
    },
    {
      "id": 216,
      "category": "Immigration",
      "use_case": "distributed_founders_visa_and_control_conflicts_canada_uae_uk",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 78.7,
          "note": "Exceptional cross-jurisdictional analysis; correctly flags OBCA 2021 reform and UAE CCL liberalisation.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 137.7,
          "note": "Thomson v MNR citation accurate; CBCA amendment framing slightly imprecise but defensible.",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 149.8,
          "note": "Thorough, well-cited, cuts off mid-sentence in recommendations section",
          "rank": 8
        },
        "o3": {
          "total": 28.1,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 17.4,
          "note": "Solid cross-jurisdictional advisory; minor ESR and SUV threshold inaccuracies noted.",
          "rank": 7
        },
        "grok-4.3": {
          "total": 31.0,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.2,
          "note": "Solid cross-jurisdictional analysis; PE and substance rules correctly distinguished from immigration.",
          "rank": 1
        },
        "mistral-large": {
          "total": 25.2,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 50.5,
          "note": "McClurg v Canada misapplied; Thomson v MNR citation plausible but context strained",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 20.6,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 8.8,
          "note": "Superficial analysis; misses SUV specifics, FCA substance, PE nuance deeply",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 80.1,
          "note": "Precise statutory citations, three conflation risks clearly identified, actionable recommendations throughout.",
          "rank": 4
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 96.5,
          "note": "Rigorous, jurisdiction-correct analysis with precise statutory citations and clear pitfall identification.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 45.6,
          "note": "Technically sound, well-structured; PE and conflation analysis particularly strong.",
          "rank": 3
        }
      }
    },
    {
      "id": 217,
      "category": "Criminal / White Collar",
      "use_case": "cross_border_sanctions_evasion_and_crypto_mixing_investigation",
      "models": {
        "claude-opus-4.8": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 92.3,
          "note": "Van Loon cite accurate; Tornado Cash/Storm indictment framing slightly imprecise but defensible",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 134.9,
          "note": "Exceptional depth; Tornado Cash/Circle freeze cite accurate; memo cuts off mid-sentence",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 186.6,
          "note": "Strong US analysis, memo cuts off mid-EU section, missing UAE/Singapore/extradition depth",
          "rank": 8
        },
        "o3": {
          "total": 27.1,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 22.5,
          "note": "US-Singapore extradition treaty claim and EU-UAE 2022 MLA agreement appear fabricated",
          "rank": 7
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 10.2,
          "note": "Solid framework; Tornado Cash reference imprecise but not fabricated case law.",
          "rank": 1
        },
        "mistral-large": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 57.8,
          "note": "Solid framework; minor inaccuracies on EPPO scope and DMCC penalty differentials.",
          "rank": 6
        },
        "llama-4-maverick": {
          "total": 22.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 7.7,
          "note": "Superficial analysis; misses CFTC/DOJ tools, thin MLA/extradition, weak DeFi nexus",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.8,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 82.9,
          "note": "Solid framework; Tornado Cash/BitPay cites accurate but Tether nexus overstated.",
          "rank": 9
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 104.6,
          "note": "Mints v NBT cite misapplied; US-UAE 1994 treaty unverified; otherwise excellent",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 57.7,
          "note": "Solid framework; EO 14114 citation needs verification; Tornado Cash precedent slightly overstated.",
          "rank": 5
        }
      }
    },
    {
      "id": 218,
      "category": "International Trade",
      "use_case": "dual_use_chip_export_controls_trilateral_supply_chain",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 74.7,
          "note": "Exceptional FDP/de minimis analysis; trilateral stacking clearly explained; no hallucinations.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 136.1,
          "note": "Exceptional trilateral analysis; FDPR, de minimis, re-export scenarios precisely handled.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 137.9,
          "note": "Rigorous trilateral analysis; correct de minimis/FDP distinctions; no hallucinated cites.",
          "rank": 7
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 19.0,
          "note": "Rigorous trilateral analysis; minor FEFTA guideline citation specificity concerns.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.8,
          "note": "Solid trilateral analysis; de minimis nuances mostly correct, actionable controls included.",
          "rank": 6
        },
        "mistral-large": {
          "total": 23.7,
          "quality": 6.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 27.5,
          "note": "Solid framework but incomplete roadmap; FDPR/Japan FEFTA integration underdeveloped.",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 25.0,
          "quality": 6.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 8.4,
          "note": "Solid framework but lacks depth on FDP rule, UAE controls, and ASML-specific nuances.",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 78.9,
          "note": "Rigorous multi-jurisdictional analysis; FDP rules and deemed export well-handled.",
          "rank": 5
        },
        "qwen3.7-max": {
          "total": 29.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 111.2,
          "note": "Rigorous trilateral analysis; FDP rules, OTA gating, and EUC flow-down well-handled.",
          "rank": 3
        },
        "gemini-3.1-pro": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 53.6,
          "note": "Strong structure; Dutch national regulation citation plausible but unverified; de minimis analysis solid.",
          "rank": 8
        }
      }
    },
    {
      "id": 219,
      "category": "Environmental/ESG",
      "use_case": "brazil_lgpd_and_eu_csrd_supply_chain_deforestation_data",
      "models": {
        "claude-opus-4.8": {
          "total": 30.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 72.7,
          "note": "Exceptional tri-regime analysis; ANPD Resolução 19/2024 citation needs verification.",
          "rank": 4
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 129.0,
          "note": "Exceptional cross-jurisdictional depth; minor uncertainty on ANPD adequacy status flagged appropriately",
          "rank": 7
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 149.7,
          "note": "Rigorous tri-jurisdictional analysis; ANPD Resolution 19/2024 cited correctly; answer truncated",
          "rank": 8
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 14.6,
          "note": "Rigorous tri-jurisdictional analysis; ANPD guidance citation slightly imprecise but defensible.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 32.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 7.7,
          "note": "Precise tri-regime analysis, correct citations, actionable framework, no hallucinations.",
          "rank": 1
        },
        "mistral-large": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 59.7,
          "note": "Commission Decision 2023/2877 and 2000/518 cited incorrectly for Brazil/Switzerland adequacy",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 22.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 7.0,
          "note": "Superficial treatment; misses ANPD adequacy status, EUDR geolocation specifics, nRevFADP nuances",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 66.3,
          "note": "Thorough, jurisdiction-accurate, actionable framework with correct LGPD/nFADP/GDPR distinctions.",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 29.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 92.1,
          "note": "Exceptional tri-regime analysis; EUDR legal basis error point is particularly valuable.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 32.2,
          "note": "Excellent cross-jurisdictional analysis; ANPD SCCs not yet formally published caveat needed.",
          "rank": 3
        }
      }
    },
    {
      "id": 220,
      "category": "Arbitration",
      "use_case": "investment_treaty_vs_commercial_arbitration_construction_dispute",
      "models": {
        "claude-opus-4.8": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 86.9,
          "note": "Vivendi, SGS, Metal-Tech, Pantechniki citations appear accurate and well-applied.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 128.3,
          "note": "Exceptional depth; case citations verifiable; memo truncated at Phase 2 sequencing.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 163.4,
          "note": "Rigorous, well-cited, actionable; verifiable case law; memo cut off mid-sentence.",
          "rank": 2
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 19.5,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 29.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 6.8,
          "note": "Solid framework; BIT years unverified; World Duty Free citation slightly misapplied.",
          "rank": 3
        },
        "mistral-large": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 51.6,
          "note": "Vivendi ARB/97/3 details mischaracterized; BIT articles unverified; solid structure",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 20.6,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.2,
          "note": "Superficial analysis; misses MFN, denial of benefits, SPV nationality planning depth.",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 24.8,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 69.2,
          "note": "Putrabali cite misapplied; Aguas/CMS/Phoenix citations plausible but context strained",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 110.7,
          "note": "DS Construction/PCA 2021-33 unverified; Siag corruption standard overstated",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 26.7,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 39.8,
          "note": "Woodruff v Venezuela and Belokon citations are dubious/misapplied treaty cases",
          "rank": 6
        }
      }
    },
    {
      "id": 221,
      "category": "Insurance",
      "use_case": "pandemic_and_cyber_bi_event_property_and_bi_coverage",
      "models": {
        "claude-opus-4.8": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 81.9,
          "note": "Consolidated Restaurant 2024 NY CoA and TKC London citations unverifiable/likely hallucinated",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 25.4,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 142.0,
          "note": "Several citations unverifiable or likely hallucinated; strong structure but cite risk high",
          "rank": 2
        },
        "gpt-5.5": {
          "total": 25.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 169.7,
          "note": "TKC London cite unverifiable; Consolidated Restaurant citation needs verification",
          "rank": 3
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 27.4,
          "note": "",
          "rank": 6
        },
        "grok-4.3": {
          "total": 4.6,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.6,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 8.0,
          "note": "",
          "rank": 5
        },
        "mistral-large": {
          "total": 3.7,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.7,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 38.0,
          "note": "",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 17.0,
          "quality": 4.0,
          "accuracy": 3.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 6.5,
          "note": "Multiple hallucinated citations; shallow analysis; cyber trigger barely addressed",
          "rank": 4
        },
        "deepseek-v3.2": {
          "total": 2.8,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 2.8,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 68.8,
          "note": "",
          "rank": 9
        },
        "qwen3.7-max": {
          "total": 1.9,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 1.9,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 103.1,
          "note": "",
          "rank": 10
        },
        "gemini-3.1-pro": {
          "total": 3.2,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.2,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 38.9,
          "note": "",
          "rank": 8
        }
      }
    },
    {
      "id": 222,
      "category": "Healthcare/Pharma",
      "use_case": "telemedicine_cross_border_prescribing_and_data_localization",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 64.5,
          "note": "Exceptional multi-jurisdictional memo; accurate cites, three errors correctly identified.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 122.4,
          "note": "Exceptional depth; three errors well-articulated; NDPA 2023 correctly cited.",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 26.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 157.0,
          "note": "Solid multi-jurisdiction analysis; memo truncated before completing Germany/GDPR/Nigeria sections.",
          "rank": 9
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 21.0,
          "note": "Thorough, well-structured memo; minor risk of unverified Nigerian/UAE regulatory details.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 30.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 7.2,
          "note": "Solid, well-structured memo; correct GDPR scope analysis; no hallucinated citations.",
          "rank": 3
        },
        "mistral-large": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 48.5,
          "note": "DISHA not enacted; DHA Resolution No. 7/2020 specifics unverified; NDPR localization overstated",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 19.6,
          "quality": 5.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 7.8,
          "note": "Shallow analysis; UAE Telemedicine Law citation unverifiable; misses key GDPR transfer risks",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.3,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 74.2,
          "note": "Solid multi-jurisdictional memo; three errors well-identified; no hallucinated citations.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 89.2,
          "note": "Schrems II cited correctly; UAE lex specialis point is sophisticated and accurate.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 35.2,
          "note": "Solid memo; minor GDPR Article 3(1) establishment analysis slightly oversimplified.",
          "rank": 4
        }
      }
    },
    {
      "id": 223,
      "category": "Bankruptcy & Insolvency",
      "use_case": "crypto_custody_platform_insolvency_and_trust_analysis",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 66.5,
          "note": "Exceptional cross-jurisdictional depth; DEBA Art.242b and UCC Art.12 correctly applied.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 139.9,
          "note": "D'Aloia 2024 cite unverifiable; core framework is sophisticated and accurate",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 26.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 163.9,
          "note": "Solid English law section; Swiss/NY comparative analysis incomplete but citations verified.",
          "rank": 9
        },
        "o3": {
          "total": 27.1,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 22.1,
          "note": "Ion Science, Re AG Hong Kong citations dubious; NY UCC Art.12 adoption date unverified",
          "rank": 4
        },
        "grok-4.3": {
          "total": 27.0,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 8.2,
          "note": "Tulip Trading mischaracterized; BGer 4A_306/2022 citation unverifiable; core analysis solid",
          "rank": 5
        },
        "mistral-large": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 44.0,
          "note": "BGE citations unverifiable; Mt. Gox NY Bankr cite misattributed/hallucinated",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 18.6,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 9.6,
          "note": "Superficial analysis; misses DEBA Art.37d, FINMA rules, UCC12 specifics",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 94.2,
          "note": "Hunter v Moss application strained; UCC Art.12 control analysis slightly oversimplified",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 113.4,
          "note": "Sophisticated tri-jurisdictional analysis; Art.242a DEBA and UCC12 correctly applied.",
          "rank": 2
        },
        "gemini-3.1-pro": {
          "total": 26.7,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 38.6,
          "note": "Wang v Darby cite unverifiable; Celsius citation details need verification",
          "rank": 6
        }
      }
    },
    {
      "id": 224,
      "category": "Securities",
      "use_case": "mica_vs_sec_tokenized_equity_exchange_listing",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 66.9,
          "note": "Rigorous MiCA/MiFID II delineation; Landreth/Howey distinction correctly applied.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 137.2,
          "note": "Exceptional depth; Hinman speech caveat noted; Landreth/Howey dual analysis correct.",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 142.3,
          "note": "Exceptional cross-jurisdictional analysis; Landreth and Howey correctly applied; answer truncated.",
          "rank": 3
        },
        "o3": {
          "total": 26.1,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 20.4,
          "note": "Hallucinated SEC v. Hindenburg Research Tokens (2026) citation penalizes accuracy significantly.",
          "rank": 8
        },
        "grok-4.3": {
          "total": 32.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 7.3,
          "note": "Precise MiCA/MiFID II hierarchy, correct Howey cite, actionable cross-border strategy.",
          "rank": 1
        },
        "mistral-large": {
          "total": 23.2,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 47.4,
          "note": "ESMA v. Bitpanda C-605/21 is hallucinated; deducts significantly on accuracy.",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 18.6,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.9,
          "note": "Superficial analysis; misclassifies tokens as ARTs, misses MiFID II exclusion logic",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 96.8,
          "note": "Rigorous, well-structured; Ripple cite contextually stretched but not fabricated.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 96.9,
          "note": "Rigorous MiCA/MiFID II/Howey analysis; eWpG and DLT Pilot correctly cited.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 28.7,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 39.8,
          "note": "Landreth and eWpG citations verified; MiCA carve-out analysis precise and actionable.",
          "rank": 5
        }
      }
    },
    {
      "id": 225,
      "category": "Construction",
      "use_case": "fidic_epc_contract_risk_allocation_ksa_uk_choice_of_law",
      "models": {
        "claude-opus-4.8": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 62.1,
          "note": "Cavendish and Dunlop citations accurate; KSA Civil Transactions Law flagged with appropriate caveats.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 120.0,
          "note": "Exceptional depth; KSA 2021 CTL integration and three oversimplifications are outstanding.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 155.4,
          "note": "Rigorous dual-system analysis; Saudi Civil Transactions Law citation needs verification",
          "rank": 5
        },
        "o3": {
          "total": 26.1,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 13.1,
          "note": "Saudi case numbers and Royal Decree citations appear fabricated; Majalla §590 unverifiable",
          "rank": 6
        },
        "grok-4.3": {
          "total": 26.0,
          "quality": 7.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 5.3,
          "note": "Solid analysis but dense prose; lacks structured headings and deeper FIDIC clause specificity.",
          "rank": 7
        },
        "mistral-large": {
          "total": 22.2,
          "quality": 7.0,
          "accuracy": 3.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 40.7,
          "note": "Multiple fabricated Saudi case citations and misattributed statutes severely undermine credibility.",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 20.6,
          "quality": 5.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 5.8,
          "note": "Rome I inapplicable here; shallow on riba/gharar, KSA enforcement mechanics",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 28.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 57.9,
          "note": "Cavendish citation accurate; Saudi Civil Transactions Law reference plausible and well-integrated.",
          "rank": 3
        },
        "qwen3.7-max": {
          "total": 24.9,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 98.3,
          "note": "CTL Article 179 and Ralli Bros application unverified; Makdessi citation accurate",
          "rank": 8
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 37.8,
          "note": "Makdessi cite accurate; Sharia LD analysis nuanced; enforcement risks well-flagged.",
          "rank": 1
        }
      }
    },
    {
      "id": 226,
      "category": "Government Contracts",
      "use_case": "canadian_defence_procurement_offset_and_it_security_mismatch",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 67.0,
          "note": "Exceptional multi-regime analysis; CGP, TAA, VP obligations correctly distinguished.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 136.5,
          "note": "Exceptional cross-jurisdictional analysis; USML category exclusion list needs verification.",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 131.5,
          "note": "Comprehensive, jurisdiction-correct, cites verified statutes, no hallucinated case law.",
          "rank": 4
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 16.2,
          "note": "Thorough, well-structured; minor ITB Policy section citations unverifiable but plausible",
          "rank": 3
        },
        "grok-4.3": {
          "total": 30.6,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 7.7,
          "note": "Solid cross-jurisdictional analysis; CGP and TAA/MLA treatment accurate and actionable.",
          "rank": 2
        },
        "mistral-large": {
          "total": 22.2,
          "quality": 7.0,
          "accuracy": 3.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 45.9,
          "note": "Bombardier FC 1116 and General Dynamics FC 1090 citations appear fabricated",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 17.0,
          "quality": 3.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 2.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 6.1,
          "note": "Generic overview; lacks ITB Value Proposition specifics, TAA mechanics, ITAR §126.18 nuance",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.3,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 78.0,
          "note": "Solid tri-jurisdictional analysis; §126.18 UK/Australia scope correctly caveated.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 98.7,
          "note": "Rigorous tri-jurisdictional analysis; SACC D0300E citation needs verification",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 33.0,
          "note": "Strong framework; SACC clause numbers and TB Policy citations need verification.",
          "rank": 7
        }
      }
    },
    {
      "id": 227,
      "category": "AI/Tech Regulation",
      "use_case": "ai_act_foundation_model_deployment_with_us_export_controls",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 47.6,
          "note": "Exceptional cross-jurisdictional analysis; UK carve-out and FLOP threshold flagged correctly.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 55.7,
          "note": "Rigorous cross-jurisdictional analysis; Article 50(1) citation slightly imprecise but minor",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 110.4,
          "note": "Precise citations, five misreading risks, strong cross-jurisdictional export/AI Act integration.",
          "rank": 3
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 21.2,
          "note": "Minor article numbering uncertainty; strong cross-jurisdictional analysis and actionable governance measures.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 27.0,
          "quality": 7.0,
          "accuracy": 8.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 6.4,
          "note": "Solid framework but underdeveloped; misses ITAR specifics and contractual clause detail",
          "rank": 8
        },
        "mistral-large": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 42.9,
          "note": "Article numbering occasionally imprecise; UK AI Bill characterization slightly premature but defensible",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 18.6,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.2,
          "note": "Superficial analysis; misses GPAI systemic risk tier, BIS licensing specifics, UK GDPR divergence",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 79.6,
          "note": "Rigorous dual-regime analysis; minor gap on UK GDPR/AI governance specifics.",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 26.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 114.6,
          "note": "Technically precise, strong cross-jurisdictional analysis, UK TDM point particularly valuable.",
          "rank": 9
        },
        "gemini-3.1-pro": {
          "total": 28.7,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 34.6,
          "note": "Solid dual-jurisdiction analysis; SaaS export characterization slightly oversimplified.",
          "rank": 5
        }
      }
    },
    {
      "id": 228,
      "category": "Data Privacy",
      "use_case": "health_app_data_localization_gdpr_pdpb_india_pipeda",
      "models": {
        "claude-opus-4.8": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 37.9,
          "note": "Accurate, well-structured; FTC/state law flags add practical value.",
          "rank": 4
        },
        "claude-sonnet-4.6": {
          "total": 28.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 47.6,
          "note": "Excellent memo; Planet49 cite accurate; FTC HBN Rule addition valuable.",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 84.3,
          "note": "Schrems II cite accurate; DPDP no-sensitive-tier point correctly flagged.",
          "rank": 7
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 21.7,
          "note": "Schrems II cite accurate; DPDP Act analysis current and nuanced throughout.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 31.0,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 6.6,
          "note": "Accurate, well-structured memo; DPDP sensitivity gap and TIA nuance noted.",
          "rank": 2
        },
        "mistral-large": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 50.6,
          "note": "Schrems II cite accurate; DPDP sensitive data nuance slightly overstated",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 21.6,
          "quality": 6.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 8.7,
          "note": "DPDP Act section citations largely fabricated; localization overstated; pitfalls underdeveloped",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 85.0,
          "note": "Thorough, jurisdiction-accurate memo; pitfalls well-flagged; no hallucinated citations.",
          "rank": 9
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 65.2,
          "note": "Accurate, well-structured; correctly flags DPDP localization myth and GDPR special category pitfall.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 36.2,
          "note": "Accurate, well-structured; correctly flags DPDP localization myth and SPDI supersession.",
          "rank": 3
        }
      }
    },
    {
      "id": 229,
      "category": "Contract & Commercial",
      "use_case": "governing_law_swap_clause_sg_pdpa_vs_ny_ucc",
      "models": {
        "claude-opus-4.8": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 59.3,
          "note": "BNA v BNB and Enka v Chubb cited accurately; CISG trap excellent.",
          "rank": 4
        },
        "claude-sonnet-4.6": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 127.1,
          "note": "Poh Soon Kiat citation misapplied; Cooney v Osgood tangential but not hallucinated",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 133.2,
          "note": "Rigorous redline; CISG/Incoterms/PDPA traps well-identified; answer truncated",
          "rank": 5
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 16.6,
          "note": "Precise redline; CISG trap and ASEAN regime fragmentation expertly flagged.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 7.5,
          "note": "Loucks is real; ASEAN MCCs cited correctly; SIAC seat well-reasoned.",
          "rank": 1
        },
        "mistral-large": {
          "total": 26.8,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 56.7,
          "note": "PDPA §48J penalty formula and §32 compensation framing overstated/misapplied",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 24.0,
          "quality": 6.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 5.6,
          "note": "Competent but shallow; misses UCC Article 2 warranty/FOB traps and PDPA adequacy gaps.",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 51.9,
          "note": "PDPA extraterritorial scope slightly overstated; UCC/data split clause well-executed",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 1.9,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 1.9,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 91.5,
          "note": "",
          "rank": 10
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 40.0,
          "note": "Asante cite is real but jurisdiction-mismatched; CISG/Singapore status needs verification",
          "rank": 3
        }
      }
    },
    {
      "id": 230,
      "category": "Corporate Governance",
      "use_case": "dual_class_tech_ipo_uk_listing_with_delaware_parent",
      "models": {
        "claude-opus-4.8": {
          "total": 31.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 47.7,
          "note": "Excellent UKLR 2024 update; accurate case law; three confusion points well-executed.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 55.9,
          "note": "Exceptional cross-jurisdictional analysis; UKLR reforms accurately cited; minor Takeover Code caveat needed.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 109.1,
          "note": "Excellent dual-regime analysis; UKLR 2024 reform correctly flagged; cases verifiable.",
          "rank": 6
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 18.4,
          "note": "Technically strong; LR cite numbering slightly imprecise but substantively correct.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 26.6,
          "quality": 7.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 13.8,
          "note": "LR premium/standard analysis oversimplified; misses 2024 FCA listing reform context",
          "rank": 7
        },
        "mistral-large": {
          "total": 25.8,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 53.4,
          "note": "Revlon cited incorrectly; LR numbering reflects pre-2024 rules, not current UKLR",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 5.0,
          "accuracy": 3.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 6.8,
          "note": "CP19/24 wrong; 2022 UKLR reforms on dual-class ignored; case cites dubious",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 24.9,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 84.1,
          "note": "FCA PMTN 501.1 and LR 2.2.4R specifics appear fabricated/misattributed",
          "rank": 9
        },
        "qwen3.7-max": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 101.9,
          "note": "Excellent cross-jurisdictional analysis; UKLR 6.4 specifics need verification.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 43.9,
          "note": "Revlon duties characterization oversimplified post-Corwin; otherwise excellent cross-jurisdictional analysis",
          "rank": 3
        }
      }
    },
    {
      "id": 231,
      "category": "Employment Law",
      "use_case": "remote_worker_redundancy_ca_nigeria_uk",
      "models": {
        "claude-opus-4.8": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 39.1,
          "note": "Ravat/Lawson correctly cited; Aloysius v Diamond Bank unverified but plausible.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 50.6,
          "note": "Botham cite slightly misattributed but substantively correct; Rome I analysis excellent.",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 117.5,
          "note": "Solid tri-jurisdictional analysis; Nigerian case citations need verification but plausible.",
          "rank": 6
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 17.2,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 10.3,
          "note": "Lawson v Serco correct; NICN cite vague but not fabricated; solid analysis",
          "rank": 4
        },
        "mistral-large": {
          "total": 26.8,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 48.0,
          "note": "African Reinsurance v. Fantaye citation unverifiable; Rome I post-Brexit framing imprecise",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 24.0,
          "quality": 6.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 6.6,
          "note": "Lawson v Serco and Nedlloyd correct; Nigerian analysis thin; misses Rome I inapplicability post-Brexit",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 24.9,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 51.8,
          "note": "Hottak cite misapplied; Duncombe/Lawson/Nedlloyd correctly cited but Hottak hallucinated",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 83.5,
          "note": "Lawson v Serco correctly cited; Rome I retained EU law accurately applied post-Brexit.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 33.6,
          "note": "Lawson v Serco and Oman v Delta correctly cited; Rome I analysis precise.",
          "rank": 1
        }
      }
    },
    {
      "id": 232,
      "category": "M&A",
      "use_case": "carve_in_asset_purchase_br_spinoff_us_public_company",
      "models": {
        "claude-opus-4.8": {
          "total": 31.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 50.5,
          "note": "Rigorous cross-jurisdictional analysis; ANPD Res. 19/2024 cite needs verification.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 30.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 71.1,
          "note": "Exceptional cross-jurisdictional analysis; all cited statutes verifiable and correctly applied.",
          "rank": 2
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 107.8,
          "note": "Rigorous, jurisdiction-correct memo; TSC/Basic/Malone cites verified and accurate.",
          "rank": 6
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 24.2,
          "note": "STJ Resp 1.341.383/ES unverified; EDPB Rec 01/2020 citation plausible but unconfirmed",
          "rank": 4
        },
        "grok-4.3": {
          "total": 30.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.0,
          "note": "Solid cross-jurisdictional memo; CLT/CTN citations accurate; no hallucinated case law.",
          "rank": 5
        },
        "mistral-large": {
          "total": 26.8,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 67.3,
          "note": "TST Súmula 430 misapplied; Convênio ICMS 115/2003 unverified citation.",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 19.6,
          "quality": 5.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 9.2,
          "note": "Article 1.052/1.113 Civil Code citations misapplied; shallow analysis throughout",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.4,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 94.4,
          "note": "Solid memo; Basic v. Levinson correctly cited; four distinctions well-executed.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 25.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 89.0,
          "note": "ANPD Resolution 15/2024 unverified; Gimbel cite accurate but marginal risk",
          "rank": 9
        },
        "gemini-3.1-pro": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 63.0,
          "note": "ANPD Resolution 15/2024 unverified; otherwise rigorous cross-jurisdictional analysis.",
          "rank": 3
        }
      }
    },
    {
      "id": 233,
      "category": "Banking/Finance",
      "use_case": "islamic_project_finance_sukuk_ksa_uk_listing",
      "models": {
        "claude-opus-4.8": {
          "total": 31.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 51.1,
          "note": "Beximco cite verified; DIFC/KSA enforcement nuances expertly handled throughout.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 53.8,
          "note": "Rigorous multi-jurisdictional analysis; minor FCA technical note citation unverified",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 75.1,
          "note": "Beximco cite verified; Saudi decree citations plausible but unverified precisely.",
          "rank": 5
        },
        "o3": {
          "total": 26.1,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 18.3,
          "note": "Riyadh Court case 342952/37 and FCA TN/408.2 appear fabricated",
          "rank": 7
        },
        "grok-4.3": {
          "total": 29.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 7.3,
          "note": "Solid cross-jurisdictional analysis; risk-weighting section could be deeper.",
          "rank": 4
        },
        "mistral-large": {
          "total": 23.3,
          "quality": 8.0,
          "accuracy": 4.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 60.3,
          "note": "FCA v Deutsche Bank 2012 and SABIC v Mobil 2006 citations appear fabricated",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 17.6,
          "quality": 5.0,
          "accuracy": 3.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 7.8,
          "note": "BCCI cite misapplied; FCA 2012 guidance fabricated; shallow KSA nuance",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 25.4,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 75.5,
          "note": "Strong structure; Saudi Civil Transactions Law cite slightly imprecise but no fabricated cases.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 87.0,
          "note": "Shamil Bank cite accurate; KSA Trust Law 2023 and AAOIFI standards correctly applied.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 45.5,
          "note": "Shamil Bank cite verified; KSA Civil Transactions Law 2023 accurately referenced.",
          "rank": 2
        }
      }
    },
    {
      "id": 234,
      "category": "Real Estate",
      "use_case": "cross_border_data_center_leases_qatar_germany_texas",
      "models": {
        "claude-opus-4.8": {
          "total": 31.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 43.5,
          "note": "Rigorous, modular, jurisdiction-specific; Austin Hill cite verifiable and accurate.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 48.9,
          "note": "Coinmach cite verified; EnEfG, BDSG, BGB citations accurate and actionable.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 93.6,
          "note": "Rigorous, jurisdiction-specific, correctly flags hallucination risks and real statutes.",
          "rank": 6
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 21.5,
          "note": "Rigorous, jurisdiction-specific; Fashion ID cite accurate; KAHRAMAA circular unverifiable but plausible",
          "rank": 3
        },
        "grok-4.3": {
          "total": 30.6,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 7.5,
          "note": "Solid analysis; HBDI guidance claim needs verification but plausible.",
          "rank": 5
        },
        "mistral-large": {
          "total": 2.3,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 2.3,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 55.7,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 20.0,
          "quality": 5.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 6.9,
          "note": "Texas Privacy Protection Act fabricated; BDSG/GEG oversimplified; Qatar analysis thin",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 79.2,
          "note": "Accurate, well-structured; PDPPL and BGB citations verifiable and correct.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 80.2,
          "note": "Rigorous, well-structured; PDPPL citation plausible but verify Law No. 13/2016 accuracy.",
          "rank": 8
        },
        "gemini-3.1-pro": {
          "total": 31.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 45.4,
          "note": "Precise, jurisdiction-correct analysis with strong hallucination warnings and BDSG nuance.",
          "rank": 2
        }
      }
    },
    {
      "id": 235,
      "category": "Tax",
      "use_case": "founder_exit_structuring_swiss_holdco_india_uae",
      "models": {
        "claude-opus-4.8": {
          "total": 31.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 49.3,
          "note": "Rigorous, jurisdiction-correct analysis; Vodafone cite and MLI PPT treatment accurate.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 65.2,
          "note": "Sanofi cite is real; MLI/POEM/PPT analysis is rigorous and jurisdiction-correct.",
          "rank": 2
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 142.8,
          "note": "Nestlé SC cite plausible but unverified; otherwise rigorous, jurisdiction-correct analysis.",
          "rank": 6
        },
        "o3": {
          "total": 28.1,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 18.9,
          "note": "Solid memo; MLI Art.9(4) framing and AAR Form 3CA reference need verification.",
          "rank": 4
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 12.2,
          "note": "Solid memo; minor treaty-article citations unverified but no clear hallucinations.",
          "rank": 3
        },
        "mistral-large": {
          "total": 24.7,
          "quality": 7.0,
          "accuracy": 6.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 45.4,
          "note": "Vodafone cite misapplied; Swiss Article 70 threshold misstated; UAE ESR oversimplified",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 18.0,
          "quality": 4.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 7.7,
          "note": "Superficial analysis; Article 69 citation dubious; misses UAE CT 2023 nuances",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 25.4,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 122.7,
          "note": "Vodafone/Azadi Bachao citations accurate; UAE CT nuances and MLI PPT threshold slightly overstated",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 25.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 120.5,
          "note": "UAE IP exclusion claim needs verification; MLI MAP analysis is strong.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 27.3,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 67.9,
          "note": "Solid memo; POEM cliff-effect and MLI tie-breaker traps well-articulated.",
          "rank": 5
        }
      }
    },
    {
      "id": 236,
      "category": "Immigration",
      "use_case": "distributed_c_suite_immigration_and_permanent_establishment_risk",
      "models": {
        "claude-opus-4.8": {
          "total": 31.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 40.0,
          "note": "Rigorous, treaty-specific, flags US Model deviation; actionable subsidiary recommendation.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 52.3,
          "note": "De Beers cite accurate; MLI US signatory status overstated but minor.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 90.7,
          "note": "Accurate citations, strong PE analysis, clear error identification, production-ready memo.",
          "rank": 6
        },
        "o3": {
          "total": 29.1,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 20.6,
          "note": "TAM 2009-037 and CRA Circular 97-12R6 appear fabricated or misidentified.",
          "rank": 3
        },
        "grok-4.3": {
          "total": 30.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 7.3,
          "note": "Solid memo; three error points clear; minor gaps on treaty specifics.",
          "rank": 2
        },
        "mistral-large": {
          "total": 21.8,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 51.6,
          "note": "Multiple hallucinated cases: Daimler Argentina, FCE Bank 2012, Swedish court cited incorrectly",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 22.6,
          "quality": 6.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 7.3,
          "note": "R-1-immigration Regulations citation fabricated; IMP C41 exemption details questionable",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 25.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 72.1,
          "note": "Solid memo; OECD Commentary citations unverifiable but plausible; L-1A analysis accurate.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 78.6,
          "note": "De Beers cite accurate; strong DAPE analysis; minor IRAS treaty nuance missing",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 28.2,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 41.6,
          "note": "Solid memo; POEM/DAPE analysis strong; US O-1 agent route slightly optimistic.",
          "rank": 5
        }
      }
    },
    {
      "id": 237,
      "category": "Criminal/White Collar",
      "use_case": "cross_border_insider_trading_us_eu_uae",
      "models": {
        "claude-opus-4.8": {
          "total": 31.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 48.8,
          "note": "Exceptional cross-jurisdictional analysis; Morrison, Spector, UAE fragmentation handled correctly.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 60.3,
          "note": "Rigorous, jurisdiction-correct analysis; Dodd-Frank 929P extraterritoriality correctly nuanced.",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 143.8,
          "note": "Rigorous, well-cited, correctly flags Morrison limits and UAE distinctions.",
          "rank": 7
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 19.3,
          "note": "SEC v. Berndt citation unverifiable; Traffic Monsoon inapposite; otherwise excellent analysis",
          "rank": 4
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.9,
          "note": "Morrison and Basic citations verified; three error flags clearly identified; UAE nuance strong",
          "rank": 1
        },
        "mistral-large": {
          "total": 25.8,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 55.3,
          "note": "SEC v. Traffic Monsoon misapplied; SEC v. Manor citation unverifiable/hallucinated",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 5.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 8.2,
          "note": "SEC v. Sharef cite unverified; UAE law shallow; misattribution section underdeveloped",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 108.0,
          "note": "Vilar cite tangential but not hallucinated; Morrison analysis precise and correct.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 97.4,
          "note": "Rigorous, well-structured; correctly distinguishes Morrison, MAR, UAE bifurcation throughout.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 44.1,
          "note": "Rigorous, well-cited analysis; Morrison/O'Hagan/Chiarella correctly applied throughout.",
          "rank": 3
        }
      }
    },
    {
      "id": 238,
      "category": "International Trade",
      "use_case": "sanctions_and_export_controls_for_russian_oilfield_software",
      "models": {
        "claude-opus-4.8": {
          "total": 31.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 47.1,
          "note": "Rigorous multi-jurisdictional analysis; correct FDP/de minimis treatment; no hallucinations.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 63.6,
          "note": "Rigorous multi-jurisdictional analysis; FDPR, sector directives, personal jurisdiction hooks all correct.",
          "rank": 2
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 155.5,
          "note": "Comprehensive, jurisdiction-accurate, cites real regulations, flags all key analytical errors.",
          "rank": 6
        },
        "o3": {
          "total": 29.1,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 28.1,
          "note": "Voronov S.D.N.Y. 2025 and PryceTech SA 2024 appear fabricated citations",
          "rank": 5
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 14.6,
          "note": "Solid multi-jurisdictional analysis; EO 13662 Directive 4 framing slightly dated.",
          "rank": 4
        },
        "mistral-large": {
          "total": 24.3,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 67.1,
          "note": "CJEU C-72/11, ZTE, CSE Global citations are fabricated or misapplied.",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 22.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 7.0,
          "note": "Superficial analysis; misses OFAC SDN/SSI nuance, EAR 744.21, cloud-service specifics",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 107.9,
          "note": "Strong multi-jurisdictional analysis; minor overreach on Directive 4 services scope.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 26.9,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 104.2,
          "note": "Solid multi-jurisdictional analysis; EO 14071 scope slightly overstated for non-US persons.",
          "rank": 8
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 41.7,
          "note": "Rigorous multi-jurisdictional analysis; minor OFAC determination date unverified.",
          "rank": 3
        }
      }
    },
    {
      "id": 239,
      "category": "Environmental/ESG",
      "use_case": "supply_chain_deforestation_and_esg_linked_loan_covenants",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 54.4,
          "note": "Exceptional cross-jurisdictional depth; ANPD Resolution 19/2024 citation needs verification.",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 58.0,
          "note": "SEC Rule 33-11275 citation needs verification; otherwise rigorous and production-ready",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 85.1,
          "note": "Precise citations, strong LGPD/EUDR/CSRD distinction, actionable KPIs, no hallucinations.",
          "rank": 5
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 16.9,
          "note": "Rigorous multi-jurisdictional covenant; minor ISAE-3410 scope stretch noted",
          "rank": 2
        },
        "grok-4.3": {
          "total": 27.6,
          "quality": 7.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 6.3,
          "note": "Solid structure but underdeveloped; pitfalls section too brief given 1200-word limit",
          "rank": 8
        },
        "mistral-large": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 44.7,
          "note": "EUDR compliance date wrong; CSRD Art.26a misattributed; otherwise solid.",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 22.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 6.2,
          "note": "Superficial treatment; missing margin ratchets, KPI triggers, Nigerian law specifics",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 90.3,
          "note": "Rigorous, jurisdiction-specific, enforceable covenants with precise regulatory distinctions drawn.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 29.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 75.7,
          "note": "Precise, jurisdiction-correct, production-ready; LGPD/NDPA/EUDR conflicts expertly distinguished.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 31.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 42.0,
          "note": "Precise, jurisdiction-correct, actionable; EUDR/CSRD/LGPD distinctions expertly drawn.",
          "rank": 1
        }
      }
    },
    {
      "id": 240,
      "category": "Arbitration",
      "use_case": "multi_tier_arbitration_clause_ohada_swiss_law_and_singapore_seat",
      "models": {
        "claude-opus-4.8": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 51.1,
          "note": "Exceptional cross-jurisdictional analysis; BNA/BCY/Enka citations appear verifiable.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 2.3,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 2.3,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 65.2,
          "note": "",
          "rank": 9
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 120.7,
          "note": "BCY v BCZ and Anupam Mittal citations appear verifiable and correctly applied.",
          "rank": 3
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 25.2,
          "note": "",
          "rank": 7
        },
        "grok-4.3": {
          "total": 26.0,
          "quality": 7.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 6.3,
          "note": "BCY v BCZ cited but misapplied; AUDA Article 34 reference unverified",
          "rank": 5
        },
        "mistral-large": {
          "total": 2.8,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 2.8,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 56.7,
          "note": "",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 22.6,
          "quality": 6.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 8.1,
          "note": "Sulamérica cited incorrectly; UAA Article 1(1) scope claim oversimplified; thin enforcement analysis",
          "rank": 6
        },
        "deepseek-v3.2": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 97.6,
          "note": "BCY v BCZ and International Research Corp citations require verification; likely real but risky",
          "rank": 4
        },
        "qwen3.7-max": {
          "total": 1.4,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 1.4,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 107.6,
          "note": "",
          "rank": 10
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 48.7,
          "note": "Anupam Mittal citation misapplied; BCY v BCZ ratio overstated but cases exist",
          "rank": 2
        }
      }
    },
    {
      "id": 241,
      "category": "Insurance",
      "use_case": "warranty_and_indemnity_insurance_for_mena_tech_target",
      "models": {
        "claude-opus-4.8": {
          "total": 31.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 45.3,
          "note": "Exceptional cross-jurisdictional analysis; Brexit passporting correction precise and actionable.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 56.6,
          "note": "Saudi Civil Transactions Law cite plausible but Article 205 unverified; Yorkshire Insurance real",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 101.7,
          "note": "Excellent cross-jurisdictional depth; Brexit passporting and subrogation limits well-handled.",
          "rank": 6
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 19.4,
          "note": "Tolhurst cite accurate; Brexit passporting analysis precise; KSA/Egypt gaps well-flagged.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 29.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 7.6,
          "note": "Saudi Civil Transactions Law article citations need verification; otherwise solid cross-jurisdictional analysis",
          "rank": 4
        },
        "mistral-large": {
          "total": 23.8,
          "quality": 8.0,
          "accuracy": 4.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 54.7,
          "note": "Multiple hallucinated cases; SABIC, Saudi Oger, SPP, Mark Rowlands misapplied",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 15.6,
          "quality": 4.0,
          "accuracy": 2.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 7.7,
          "note": "AMINOIL case misapplied; Egyptian SCC cite fabricated; Brexit passporting oversimplified",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 94.5,
          "note": "Lord Napier cite accurate; TPR nuance correct; strong MENA-specific analysis",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 25.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 89.0,
          "note": "Lord Napier cite valid; Egyptian Art.360 plausible but needs verification",
          "rank": 8
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 38.2,
          "note": "Exceptional cross-jurisdictional analysis; FSMA, Shari'ah, Egyptian admin law correctly applied.",
          "rank": 3
        }
      }
    },
    {
      "id": 242,
      "category": "Healthcare/Pharma",
      "use_case": "cross_border_telemedicine_prescribing_jp_ca_ny",
      "models": {
        "claude-opus-4.8": {
          "total": 31.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 43.6,
          "note": "Precise HIPAA analysis, PHIPA/SHIELD distinction, and pitfall framing are exemplary.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 49.6,
          "note": "Rigorous, cite-verified, three pitfalls clearly identified, HIPAA nuance excellent.",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 162.0,
          "note": "Thorough, well-structured; minor PIPEDA case citations need verification",
          "rank": 6
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 25.4,
          "note": "Thorough, well-structured; minor risk on unverified pending NY bills cited.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 26.6,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 9.3,
          "note": "Hallucinated 'Matter of Dr. X 2018' NY determination; otherwise solid analysis",
          "rank": 7
        },
        "mistral-large": {
          "total": 25.3,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 56.2,
          "note": "Berlin v. Sarah Bush Lincoln is Illinois case, misattributed; CPSO section citations unverifiable",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 24.0,
          "quality": 6.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 9.2,
          "note": "CPSO 2020 telemedicine cite and NYSED guidance unverifiable; HIPAA analysis partially correct",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.4,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 80.9,
          "note": "Solid, well-structured analysis; HIPAA trigger nuance and pitfalls handled well.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 79.7,
          "note": "Precise statutory citations, correct HIPAA analysis, strong provincial-federal distinction flagged.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 28.2,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 48.0,
          "note": "Solid analysis; HIPAA carve-out well-argued; APPI/Japan law could deepen.",
          "rank": 5
        }
      }
    },
    {
      "id": 243,
      "category": "Bankruptcy & Insolvency",
      "use_case": "cross_border_insolvency_swiss_bankruptcy_and_us_chapter11_for_crypto_exchange",
      "models": {
        "claude-opus-4.8": {
          "total": 31.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 46.1,
          "note": "Exceptional cross-jurisdictional analysis; Art.242a SchKG application precise and actionable.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 30.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 52.3,
          "note": "Owens Corning cite verified; FISA/SchKG analysis precise and jurisdiction-correct.",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 113.9,
          "note": "Celsius and SPhinX citations verifiable; DEBA 242a correctly applied throughout.",
          "rank": 4
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 19.6,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 24.6,
          "quality": 6.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 10.9,
          "note": "Solid framework but only ~600 words; misses depth on SchKG specifics",
          "rank": 8
        },
        "mistral-large": {
          "total": 25.8,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 46.5,
          "note": "BGE 144 III 457 and Bear Stearns citations unverified; Eurofood misapplied to Switzerland",
          "rank": 6
        },
        "llama-4-maverick": {
          "total": 18.0,
          "quality": 5.0,
          "accuracy": 3.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 7.3,
          "note": "4A_456/2018 citation unverifiable; shallow DEBA/PILA/SIPA analysis throughout",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 25.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 78.8,
          "note": "Mt. Gox cited as FINMA precedent; it was Japanese, not Swiss.",
          "rank": 5
        },
        "qwen3.7-max": {
          "total": 25.4,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 90.6,
          "note": "Art.242a DEBA cited plausibly; Lugano exclusion analysis accurate and sharp.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 42.2,
          "note": "Accurate Art.242a DEBA analysis; SIPA/EIR pitfalls well-articulated; no hallucinations.",
          "rank": 2
        }
      }
    },
    {
      "id": 244,
      "category": "Securities",
      "use_case": "tokenized_equity_offering_mica_vs_us_securities_law_vs_swiss_dlt",
      "models": {
        "claude-opus-4.8": {
          "total": 31.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 44.6,
          "note": "Exceptional multi-jurisdictional analysis; Howey cite verified; no hallucinations detected.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 30.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 60.3,
          "note": "Rigorous, jurisdiction-correct, four traps well-identified, table adds clarity.",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 120.4,
          "note": "Exceptional multi-jurisdictional analysis; all cited cases verifiable and correctly applied.",
          "rank": 4
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 22.6,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 26.6,
          "quality": 6.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 8.2,
          "note": "Accurate but half the required length; four traps underdeveloped",
          "rank": 7
        },
        "mistral-large": {
          "total": 25.8,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 47.1,
          "note": "FINMA v. Envion cited incorrectly; Envion was liquidation, not enforcement case",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.5,
          "note": "Superficial analysis; misses MiCA exclusion for financial instruments, thin Howey application",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 95.0,
          "note": "Rigorous, well-structured; Howey and MiCA exclusion analysis precise and actionable.",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 125.5,
          "note": "Exceptional cross-jurisdictional analysis; MiCA carve-out, Howey, FinSA all correct.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 40.4,
          "note": "Telegram cite verified; MiCA Art 2(4)(a) correctly applied; Howey accurate.",
          "rank": 2
        }
      }
    },
    {
      "id": 245,
      "category": "Construction",
      "use_case": "ppp_hospital_project_dispute_qatar_uk_law_with_swiss_lender",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 39.9,
          "note": "Precise, jurisdiction-aware, no fabricated cites; actionable three-track risk framework.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 50.8,
          "note": "Seadrill cite plausible but Qatar law numbers need verification by local counsel",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 110.9,
          "note": "Rigorous multi-jurisdictional analysis; cited Qatari laws plausible but unverified.",
          "rank": 7
        },
        "o3": {
          "total": 25.1,
          "quality": 8.0,
          "accuracy": 4.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 17.8,
          "note": "Multiple unverifiable Qatari case citations and statute references likely fabricated.",
          "rank": 8
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.1,
          "note": "Solid dual-track strategy; Qatari law citations plausible but unverified.",
          "rank": 4
        },
        "mistral-large": {
          "total": 22.2,
          "quality": 7.0,
          "accuracy": 3.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 38.4,
          "note": "Multiple fabricated cases and unverifiable Qatari statute articles cited throughout",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 18.0,
          "quality": 4.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 6.7,
          "note": "Fabricated Qatari statutes; shallow analysis; lacks actionable lender-specific strategy",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 72.0,
          "note": "Rigorous cross-jurisdictional analysis; minor risk citing unverified Qatari statute numbers.",
          "rank": 3
        },
        "qwen3.7-max": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 85.3,
          "note": "Rigorous multi-jurisdictional analysis; PPP Law No.12/2020 citation needs verification.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 35.5,
          "note": "Rigorous cross-jurisdictional analysis; PPP Law citation needs independent verification.",
          "rank": 2
        }
      }
    },
    {
      "id": 246,
      "category": "Antitrust",
      "use_case": "global_digital_merger_review_us_eu_india_brazil",
      "models": {
        "claude-opus-4.8": {
          "total": 31.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 49.7,
          "note": "Rigorous, jurisdiction-correct, DVT/SBOI flagged, UK call-in noted, no hallucinations.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.2,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 54.1,
          "note": "Meta/Giphy CAT cite and Google/Fitbit CCI details unverifiable; core analysis strong",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 145.1,
          "note": "Illumina CJEU citation plausible but date/case numbers need verification.",
          "rank": 5
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 24.1,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 27.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.7,
          "note": "Microsoft/LinkedIn cited loosely; India deal-value threshold figure needs verification",
          "rank": 4
        },
        "mistral-large": {
          "total": 24.3,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 68.3,
          "note": "Multiple hallucinated cases: FTC v. Staples 2016, FTC v. Meta/Within 2023 order, CMA v. Sabre/Farelogix",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 5.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 8.5,
          "note": "FTC Cloud Study 2020 and M.7211 Microsoft/LinkedIn misapplied; shallow thresholds analysis",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 23.4,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 104.9,
          "note": "Google/Telegram, Microsoft/SAP M.3314, Google/Fitbit M.9660 citations hallucinated",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 24.9,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 99.2,
          "note": "Broadcom/VMware M.10812 cite unverifiable; Microsoft/LinkedIn case number questionable",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 28.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 57.8,
          "note": "Illumina/Grail ECJ ruling correctly cited; Broadcom/VMware accurately referenced.",
          "rank": 2
        }
      }
    },
    {
      "id": 247,
      "category": "Trusts & Estates",
      "use_case": "cross_border_family_trust_with_lebanese_forced_heirship_and_swiss_trustee",
      "models": {
        "claude-opus-4.8": {
          "total": 31.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 45.1,
          "note": "Exceptional cross-jurisdictional analysis; Art.15 Hague caveat correctly flagged throughout.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 57.2,
          "note": "Re Ross cite accurate; Lebanese statute citations plausible but unverifiable without deeper check.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 121.5,
          "note": "Pugachev cite slightly imprecise but substantively defensible; Akers correct.",
          "rank": 5
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 22.9,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 23.6,
          "quality": 7.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 7.7,
          "note": "Lebanese Code arts 870-873 and Court of Cassation citations unverifiable/likely hallucinated",
          "rank": 7
        },
        "mistral-large": {
          "total": 22.8,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 51.0,
          "note": "Multiple hallucinated citations: Lebanese Decision 12/2010, Geneva 2015, Minwalla forced-heirship holding",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 16.0,
          "quality": 4.0,
          "accuracy": 2.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 6.6,
          "note": "Multiple hallucinated cases; shallow analysis; misses Swiss PIL Art.149a nuances",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 25.9,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 99.8,
          "note": "Re Barton [2002] EWHC 264 appears fabricated; core analysis otherwise strong",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 102.1,
          "note": "Pugachev cite verified; Swiss PILA articles accurate; HTC Art.15 correctly applied.",
          "rank": 3
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 42.8,
          "note": "Rigorous HTC Article 15 analysis; firewall jurisdiction advice precise and actionable.",
          "rank": 2
        }
      }
    },
    {
      "id": 248,
      "category": "Government Contracts",
      "use_case": "defense_software_localization_and_ip_ownership_india_australia",
      "models": {
        "claude-opus-4.8": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 46.7,
          "note": "AUKUS IFR date needs verification; otherwise rigorous, actionable, well-structured advisory.",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 53.8,
          "note": "Technically rigorous, DTCT/AUKUS nuances well-handled, STA-1 India inclusion accurate.",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 97.7,
          "note": "Precise statutory cites, clean-room offset plan, five confusion points exceed brief.",
          "rank": 7
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 22.0,
          "note": "Rigorous, well-structured; minor risk on 48 CFR cite applicability to commercial vendor",
          "rank": 2
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.1,
          "note": "Accurate, well-structured, cites real treaty/CFR provisions, actionable parallel matrices.",
          "rank": 1
        },
        "mistral-large": {
          "total": 23.3,
          "quality": 8.0,
          "accuracy": 4.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 59.2,
          "note": "BrahMos v MoD and Commonwealth v Sanofi cases appear fabricated",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 5.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 7.5,
          "note": "Fabricated AUSMIN Guidelines cite; shallow analysis; misses DECA, DSA, SOSA frameworks",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 96.2,
          "note": "Rigorous, well-structured; minor ITAR category citations need verification.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 29.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 65.1,
          "note": "Precise ITAR/EAR analysis, correct jurisdiction distinctions, actionable bifurcated IP strategy.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 32.7,
          "note": "AUKUS ITAR exemption timing/scope slightly overstated; otherwise rigorous and actionable.",
          "rank": 6
        }
      }
    },
    {
      "id": 249,
      "category": "Energy/Climate",
      "use_case": "cross_border_hydrogen_offtake_us_japan_eu_cbam",
      "models": {
        "claude-opus-4.8": {
          "total": 31.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 42.6,
          "note": "Technically precise, jurisdiction-correct, actionable; three model-error flags well-executed.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 61.9,
          "note": "Minor self-contradiction on hydrogen CBAM scope; otherwise rigorous and actionable.",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 113.3,
          "note": "Precise statutory citations, clear CBAM/ETS distinction, actionable attribute matrix concept.",
          "rank": 7
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 18.6,
          "note": "Technically strong; minor IRC §45V(d)(5) citation needs verification",
          "rank": 2
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 10.4,
          "note": "Technically sound, well-structured, correctly identifies three model-error pitfalls.",
          "rank": 4
        },
        "mistral-large": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 48.0,
          "note": "Canada-Aircraft WTO cite misapplied; CBAM default value unverified; strong structure overall",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 22.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 7.9,
          "note": "Superficial treatment; misses additionality rules, ammonia-cracking CBAM gaps, IRA domestic content.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 25.4,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 108.2,
          "note": "Hydrogen CBAM 2030 phase-in claim unverified; IRA cite numbers slightly imprecise",
          "rank": 9
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 80.1,
          "note": "Technically precise, correctly distinguishes CBAM certificates from EUAs throughout.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 54.7,
          "note": "Technically rigorous, three pitfalls well-distinguished, minor Japan CfD threshold unverified",
          "rank": 3
        }
      }
    },
    {
      "id": 250,
      "category": "Consumer Protection",
      "use_case": "subscription_dark_patterns_us_ftc_uk_aus",
      "models": {
        "claude-opus-4.8": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 37.4,
          "note": "Click-to-Cancel Rule vacatur and FTC v. Custom Communications appear fabricated",
          "rank": 8
        },
        "claude-sonnet-4.6": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 55.3,
          "note": "Excellent multi-jurisdictional analysis; Nedlloyd cite accurate; minor FTC rule timing nuance",
          "rank": 1
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 130.0,
          "note": "Solid cross-jurisdictional analysis; DMCC commencement caveat appropriately flagged.",
          "rank": 6
        },
        "o3": {
          "total": 29.1,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 21.1,
          "note": "Stathakos cite unverifiable; ABCmouse case number plausible but unconfirmed",
          "rank": 2
        },
        "grok-4.3": {
          "total": 27.0,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 7.5,
          "note": "FTC v. Health Formulas and ACCC v. Apple citations appear fabricated or misattributed",
          "rank": 7
        },
        "mistral-large": {
          "total": 2.3,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 2.3,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 60.6,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 17.6,
          "quality": 5.0,
          "accuracy": 3.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 8.1,
          "note": "FTC v. AdoreMe wrong citation; Knodell case unverifiable; shallow analysis throughout",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 72.8,
          "note": "Solid cross-jurisdictional analysis; ACL s39(3) citation needs verification.",
          "rank": 4
        },
        "qwen3.7-max": {
          "total": 27.4,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 77.2,
          "note": "Accurate, well-structured; DMCCA 2024 reference adds currency and precision.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 28.7,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 31.3,
          "note": "Solid, accurate analysis; DMCC and ACCC penalty figures correctly cited.",
          "rank": 3
        }
      }
    },
    {
      "id": 251,
      "category": "Contract & Commercial",
      "use_case": "crossborder_franchise_gcc_termination_and_noncompete",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 38.4,
          "note": "Exceptional multi-jurisdictional analysis; pitfalls well-identified; caveats appropriate.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 64.8,
          "note": "Exceptional cross-jurisdictional depth; Makdessi cite accurate; KSA CTL transitional analysis strong.",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 98.0,
          "note": "Rigorous multi-jurisdictional analysis; Saudi Franchise Law citation needs verification.",
          "rank": 4
        },
        "o3": {
          "total": 27.1,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 21.1,
          "note": "Dubai Cassation 547/2012 and MK Group 2017 Paris unverified; deductions applied",
          "rank": 6
        },
        "grok-4.3": {
          "total": 27.6,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 6.5,
          "note": "Banco Atlantico cite unverifiable; Art.1027 UAE Civil Code misidentified for restraint",
          "rank": 5
        },
        "mistral-large": {
          "total": 23.2,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 37.9,
          "note": "Multiple hallucinated citations: Saudi SAMA v Dresdner, Dubai Cassation 142/2012, French Cass cite",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 5.0,
          "accuracy": 3.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 4.5,
          "note": "KSA Labour Law cited as M/51 1980 and 2005; Rome I inapplicable post-Brexit",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 25.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 66.4,
          "note": "Cavendish cite correct; Saudi Labor Art.54 non-compete application to commercial contracts overstretched.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 27.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 112.4,
          "note": "Nordenfelt and Makdessi correctly cited; KSA Franchise Law reference plausible but unverified.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 33.0,
          "note": "Cavendish and Tillman correctly cited; KSA franchise law dating precise and useful.",
          "rank": 2
        }
      }
    },
    {
      "id": 252,
      "category": "Regulatory Compliance",
      "use_case": "telemedicine_platform_multi_regulator_licensing_map",
      "models": {
        "claude-opus-4.8": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 37.6,
          "note": "Pacific Health Corp cite unverified but plausible; otherwise strong, actionable memo",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 91.0,
          "note": "Magan Medical cite plausible; IMLC/CA status needs verification pre-launch",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 133.0,
          "note": "Comprehensive, well-cited, actionable memo with strong cross-jurisdictional trap identification.",
          "rank": 5
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 18.5,
          "note": "Locke v. Shore misapplied; irrelevant 11th Cir. case cited for telehealth jurisdiction",
          "rank": 2
        },
        "grok-4.3": {
          "total": 30.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 6.3,
          "note": "Solid multi-jurisdiction map; LLM error flags are clear and useful.",
          "rank": 3
        },
        "mistral-large": {
          "total": 24.2,
          "quality": 7.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 40.1,
          "note": "Berlin v. Sarah Bush Lincoln is Illinois case; AB 796, OPMC AI guidance unverifiable",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 22.6,
          "quality": 6.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 6.8,
          "note": "N.Y. Pub. Health Law §2999-aa unverifiable; SMC Telemedicine Guidelines misattributed",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 57.7,
          "note": "Baron v. Los Angeles unverified; IMLC NY exclusion needs verification",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 106.4,
          "note": "Thorough, well-structured, jurisdiction-accurate; LLM risk flags are genuinely insightful.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 40.8,
          "note": "Rigorous, well-cited, actionable; NY flat-fee MSA claim needs verification.",
          "rank": 4
        }
      }
    },
    {
      "id": 253,
      "category": "Dispute Resolution",
      "use_case": "multi_tier_clause_enforcement_uae_ohada_ny",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 59.3,
          "note": "Rigorous multi-forum analysis; Poiré v. Tripier citation plausible but verify.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 125.5,
          "note": "Zinc Nacional and Katz v. Kar citations unverified; Dallah framing slightly off",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 23.0,
          "quality": 7.0,
          "accuracy": 7.0,
          "speed": 1.0,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 137.0,
          "note": "Truncated before French courts and UAE analysis; OHADA Articles cited plausibly",
          "rank": 8
        },
        "o3": {
          "total": 27.1,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 19.4,
          "note": "SNF/Cytec, SNPC/Total, CCJA 025/2014 citations unverified or misattributed",
          "rank": 6
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 7.9,
          "note": "Solid multi-jurisdictional analysis; correctly flags English case hallucination risk.",
          "rank": 3
        },
        "mistral-large": {
          "total": 22.2,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 39.5,
          "note": "Multiple unverifiable CCJA citations and questionable French case references undermine accuracy.",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 16.0,
          "quality": 3.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 2.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 6.3,
          "note": "Superficial analysis; no specific cases, statutes, or OHADA articles cited",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 24.3,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 85.9,
          "note": "Ofer Brothers, Dayyani, RG Steel citations unverified or misattributed; deduct accordingly.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 116.3,
          "note": "Poiré cite plausible; Paris CA 2020 cite unverified; core analysis strong",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 32.2,
          "note": "Rigorous cross-jurisdictional analysis; BG Group citation accurate; OHADA nuance solid.",
          "rank": 2
        }
      }
    },
    {
      "id": 254,
      "category": "Corporate Governance",
      "use_case": "dual_headquartered_startup_board_duties_climate_risk",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 39.3,
          "note": "Rigorous multi-jurisdictional analysis; Boeing cite imprecise but not fabricated.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 69.3,
          "note": "Exceptional depth; ASIC v Mercer 2024 cite needs verification but plausible.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 102.5,
          "note": "Rigorous, jurisdiction-precise, correctly flags LLM traps; cites verified.",
          "rank": 3
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 15.6,
          "note": "Technically strong; Base Metal Trading citation slightly misapplied but defensible.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 4.6,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.6,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 5.8,
          "note": "",
          "rank": 10
        },
        "mistral-large": {
          "total": 25.2,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 37.6,
          "note": "Enron Australia cite unverifiable; McVeigh outcome mischaracterized; Goldstein citation dubious",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 20.0,
          "quality": 5.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 4.3,
          "note": "ASIC v MacDonald cite wrong; Base Metal Trading misapplied; exceeds bullet limit",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 22.7,
          "quality": 7.0,
          "accuracy": 5.0,
          "speed": 3.7,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 35.5,
          "note": "Abrahams v Commonwealth Bank appears fabricated; Cassimatis citation context questionable",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 73.0,
          "note": "Technically strong; Stone v. Ritter citation and Caremark loyalty framing correct.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 47.9,
          "note": "Abercrombie v Davies cite dubious; Cassimatis framing overstretched but core analysis strong",
          "rank": 6
        }
      }
    },
    {
      "id": 255,
      "category": "IP/Tech Law",
      "use_case": "video_game_mods_and_user_generated_content",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 40.3,
          "note": "Exceptional cross-jurisdictional depth; STF 2025 ruling caveat appropriately flagged.",
          "rank": 5
        },
        "claude-sonnet-4.6": {
          "total": 29.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 59.1,
          "note": "Exceptional comparative analysis; all citations verifiable; DMCA error areas precise.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 65.5,
          "note": "Comprehensive, jurisdiction-accurate, cites verified; DMCA-import pitfalls well-identified.",
          "rank": 6
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 16.4,
          "note": "Maneki TV cite misattributed; Viacom/Tiffany/L'Oréal broadly accurate but verify",
          "rank": 3
        },
        "grok-4.3": {
          "total": 32.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 7.2,
          "note": "Precise statutory cites, strong DMCA-import warnings, actionable multi-jurisdictional analysis.",
          "rank": 1
        },
        "mistral-large": {
          "total": 3.7,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.7,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 29.4,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 23.0,
          "quality": 6.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 4.7,
          "note": "Superficial treatment; misattributes Japanese editorial control to wrong article",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 27.3,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 42.6,
          "note": "Solid multi-jurisdictional analysis; L'Oréal v. eBay cite is accurate and relevant.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 107.3,
          "note": "Exceptional comparative analysis; Grokster/Inwood citations verifiable and apt.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 35.5,
          "note": "Tiffany v. eBay and Grokster are real; all citations verifiable.",
          "rank": 2
        }
      }
    },
    {
      "id": 256,
      "category": "Employment Law",
      "use_case": "remote_only_workforce_multi_localization_mismatch",
      "models": {
        "claude-opus-4.8": {
          "total": 29.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 42.9,
          "note": "Waksdale cite verified; NIC constitutional analysis accurate; Rome I retention confirmed.",
          "rank": 4
        },
        "claude-sonnet-4.6": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 106.3,
          "note": "Bardal correct; Esso NICN cite unverifiable; Modi applied loosely",
          "rank": 7
        },
        "gpt-5.5": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 119.1,
          "note": "Thorough, jurisdiction-accurate, cites verified; naïve-model traps well-executed.",
          "rank": 3
        },
        "o3": {
          "total": 29.1,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 13.1,
          "note": "Petroleum Equalisation Fund v Sadiq 2020 and SK Maini citations unverifiable/suspect",
          "rank": 5
        },
        "grok-4.3": {
          "total": 30.6,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 7.2,
          "note": "Machtinger principle extension to choice-of-law is plausible but stretched.",
          "rank": 1
        },
        "mistral-large": {
          "total": 24.8,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 47.3,
          "note": "Olaniyan, Ruston, NUPENG citations unverifiable; Dawe facts mischaracterized",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 22.0,
          "quality": 6.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 5.7,
          "note": "Lawson v Serco correct but Nigeria Labour Act Sec.7 overtime claim unverified",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 60.6,
          "note": "Solid analysis; concrete examples strong; creativity and cross-jurisdictional depth moderate.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 133.2,
          "note": "Waksdale and Bardal citations verified; IDA workman analysis accurate and nuanced.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 30.7,
          "note": "Machtinger cite verified; IDA workman analysis nuanced; PE risk bonus insight.",
          "rank": 2
        }
      }
    },
    {
      "id": 257,
      "category": "Data Privacy",
      "use_case": "health_data_analytics_third_country_access",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 46.6,
          "note": "Exceptional multi-jurisdictional analysis; DPF/Swiss addendum guidance precise and actionable.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 29.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 85.2,
          "note": "Exceptional multi-jurisdictional analysis; four overgeneralizations clearly identified and explained.",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 142.4,
          "note": "Comprehensive, jurisdiction-accurate, four overgeneralization traps explicitly identified.",
          "rank": 7
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 16.1,
          "note": "EU-US DPF finalized July 2023; draft framing and 2000/518/EC citation outdated",
          "rank": 2
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 5.7,
          "note": "Solid cross-jurisdictional analysis; four overgeneralizations clearly flagged; minor gaps on PIPEDA",
          "rank": 4
        },
        "mistral-large": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 30.8,
          "note": "Strong structure; adequacy decision cite outdated; CLOUD Act analysis adds value.",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 21.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 4.8,
          "note": "Superficial analysis; misses TIA obligations, supplementary measures, PDPA 2021 amendments",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.3,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 48.4,
          "note": "Solid multi-jurisdiction analysis; six overgeneralizations exceed required four cleanly.",
          "rank": 9
        },
        "qwen3.7-max": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 112.7,
          "note": "Precise, jurisdiction-specific, four overgeneralizations clearly flagged, no hallucinated cites.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 29.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 38.6,
          "note": "Adequacy decision cite slightly outdated but analysis sound; CBPR/PRP accurate",
          "rank": 6
        }
      }
    },
    {
      "id": 258,
      "category": "M&A",
      "use_case": "hostile_bid_eu_uk_us_public_company_mandatory_offers",
      "models": {
        "claude-opus-4.8": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 50.4,
          "note": "Rigorous multi-jurisdictional analysis; correctly flags Tier I/II and internal affairs doctrine.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 29.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 115.8,
          "note": "Exceptional multi-jurisdictional analysis; Revlon/Unocal citations correctly applied.",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 154.8,
          "note": "Exceptional multi-jurisdictional analysis; Unocal/Revlon citations verifiable and correctly applied.",
          "rank": 5
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 16.6,
          "note": "Thorough tri-jurisdictional analysis; minor AMF article numbering unverified but plausible",
          "rank": 2
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 11.8,
          "note": "Solid multi-regime analysis; timetable figures slightly imprecise but no hallucinations.",
          "rank": 4
        },
        "mistral-large": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 44.8,
          "note": "Solid multi-regime analysis; minor errors on UK Code applicability triggers",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 24.0,
          "quality": 6.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 4.0,
          "note": "Superficial treatment; misses Ecodesign/passporting nuances and squeeze-out conflicts",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 26.8,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 55.8,
          "note": "Bullet 1 misstates Code scope; Rule 9 inapplicability reasoning slightly off",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 102.5,
          "note": "Sophisticated multi-regime analysis; Tier II threshold slightly overstated but minor.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 23.0,
          "quality": 7.0,
          "accuracy": 7.0,
          "speed": 1.0,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 160.2,
          "note": "Memo cut off mid-bullet; misses LLM-mistake section; solid core analysis",
          "rank": 10
        }
      }
    },
    {
      "id": 259,
      "category": "Banking/Finance",
      "use_case": "islamic_finance_sukuk_cross_default_and_sharia_arbitrage",
      "models": {
        "claude-opus-4.8": {
          "total": 30.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 36.5,
          "note": "Rigorous, well-structured; KSA reciprocity nuance slightly overstated but defensible.",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 47.5,
          "note": "Shamil Bank, Investment Dar, Rainy Sky all verifiable; SAC binding authority correctly stated",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 108.7,
          "note": "Rigorous multi-jurisdictional analysis; Beximco and JRI citations verified correct.",
          "rank": 5
        },
        "o3": {
          "total": 32.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 12.1,
          "note": "Shamil Bank and REJA citations verified; Dana Gas caveat apt and accurate.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 31.5,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.5,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 5.2,
          "note": "Shamil Bank correctly cited; three LLM errors well-identified; actionable analysis.",
          "rank": 2
        },
        "mistral-large": {
          "total": 26.5,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.5,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 34.5,
          "note": "Dana Gas 2018 Saudi cite unverifiable; Royal Decree M/53 misattributed; JRI Resources accurate",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 5.0,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 5.0,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 3.7,
          "note": "",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 25.5,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.5,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 37.2,
          "note": "Beximco and Halpern correctly cited; Rome I applicability to UAE SPV debatable",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.5,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.5,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 94.7,
          "note": "Sophisticated cross-jurisdictional analysis; Dana Gas and Shamil Bank correctly applied.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "status": "error",
          "err": "empty completion (reasoning consumed max_tokens)"
        }
      }
    },
    {
      "id": 260,
      "category": "Real Estate",
      "use_case": "foreign_investor_data_center_land_acquisition",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 46.9,
          "note": "Exceptional cross-jurisdictional depth; FEMA/FEFTA/FIRB distinctions precise and actionable.",
          "rank": 4
        },
        "claude-sonnet-4.6": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 129.5,
          "note": "Exceptional depth; SOCI Act, Important Land Act, DPDPA risks expertly surfaced.",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 146.6,
          "note": "Rigorous, jurisdiction-correct, flags FIRB equivalence traps and subnational risks",
          "rank": 7
        },
        "o3": {
          "total": 32.1,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 15.3,
          "note": "Comprehensive, jurisdiction-accurate matrix with actionable subnational and structural nuances.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 8.3,
          "note": "Precise statutory citations, strong subnational analysis, CFIUS cross-border angle impressive",
          "rank": 3
        },
        "mistral-large": {
          "total": 25.3,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 51.2,
          "note": "Treasurer v AGM Markets case citation unverifiable; FATA penalty figures need verification",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 25.0,
          "quality": 7.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 5.8,
          "note": "Solid framework but misses FIRB thresholds, FEMA land ban, and J-REIT nuances.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 28.2,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 45.1,
          "note": "Solid matrix; FEMA/FDI nuances and SOCI coverage accurate and actionable.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 29.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 107.9,
          "note": "Exceptional depth; FEMA bar, SOCI, FEFTA distinctions all correctly stated.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 31.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 41.9,
          "note": "Exceptional cross-jurisdictional depth; NALA, PN3 look-through, ESPA correctly identified.",
          "rank": 2
        }
      }
    },
    {
      "id": 261,
      "category": "Tax",
      "use_case": "platform_worker_tax_residency_and_pe_risk",
      "models": {
        "claude-opus-4.8": {
          "total": 31.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 40.7,
          "note": "Exceptional memo: correct treaties, SEP rules, pejotização, totalization gaps flagged.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 71.4,
          "note": "Exceptional depth; SEP treaty-override tension and pejotização risks notably strong.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 152.6,
          "note": "Precise citations, MLI/US caveat noted, three AI-error traps clearly flagged.",
          "rank": 3
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 17.1,
          "note": "",
          "rank": 9
        },
        "grok-4.3": {
          "total": 27.6,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 6.3,
          "note": "Normative Instruction RFB 1.530/2021 and CBDT Circular 2021 unverifiable; MLI Art.12 misattributed",
          "rank": 5
        },
        "mistral-large": {
          "total": 24.3,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 46.0,
          "note": "Brazil-US DTAA 1988 and several cited cases appear fabricated or misattributed",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 17.0,
          "quality": 4.0,
          "accuracy": 3.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 4.6,
          "note": "Brazil-US treaty doesn't exist; Brazilian Tax Code cites fabricated; shallow analysis",
          "rank": 8
        },
        "deepseek-v3.2": {
          "total": 2.8,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 2.8,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 45.1,
          "note": "",
          "rank": 10
        },
        "qwen3.7-max": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 108.2,
          "note": "Thorough, jurisdiction-specific, actionable memo with accurate treaty gap analysis.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 31.6,
          "note": "Accurate, well-structured; SEP/EL interaction nuance slightly oversimplified.",
          "rank": 2
        }
      }
    },
    {
      "id": 262,
      "category": "Immigration",
      "use_case": "global_nomad_executive_visa_and_work_authorization",
      "models": {
        "claude-opus-4.8": {
          "total": 30.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 35.5,
          "note": "Precise, well-structured, correctly flags PE risk and ETIAS/ETA distinctions.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 29.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 55.9,
          "note": "Exceptional cross-jurisdictional analysis; PE linkage and LLM error flags outstanding.",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 99.8,
          "note": "Precise statutory citations, strong pitfall framing, actionable cross-jurisdictional distinctions throughout.",
          "rank": 6
        },
        "o3": {
          "total": 25.1,
          "quality": 8.0,
          "accuracy": 4.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 17.6,
          "note": "Hallucinated R (Hernandez) EWCA case and Frontex Guidance 2023/37 citation",
          "rank": 8
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 5.9,
          "note": "Solid comparative note; correctly flags ESTA misconception and EntrePass overgeneralization.",
          "rank": 4
        },
        "mistral-large": {
          "total": 23.7,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 30.6,
          "note": "Multiple case citations appear fabricated; German/Singapore cases unverifiable and likely hallucinated.",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 21.0,
          "quality": 5.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 4.8,
          "note": "AufenthG Section 15(2) misapplied; UAE Federal Law No.6/2013 citation unverified",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.8,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 31.6,
          "note": "Solid comparative note; UAE Remote Work Visa details slightly oversimplified but no hallucinations.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 88.4,
          "note": "Precise, well-structured; PE risk and Totalization Agreement angles add real value.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 31.0,
          "note": "Precise, actionable, correctly flags PE risk and ETIAS/ETA distinctions.",
          "rank": 2
        }
      }
    },
    {
      "id": 263,
      "category": "Criminal/White Collar",
      "use_case": "multi_jurisdiction_bribe_investigation_and_self_report",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 50.2,
          "note": "Exceptional multi-jurisdictional analysis; Airbus/Odebrecht cites verifiable and accurate.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 86.8,
          "note": "Exceptional depth; Liu v. SEC cite slightly misapplied but not hallucinated",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 128.0,
          "note": "Esquenazi cite verified; Sapin II CJIP article correct; Brazil decree accurate.",
          "rank": 5
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 13.3,
          "note": "Esquenazi cite accurate; Sapin II Art.17 threshold nuance slightly oversimplified.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 7.5,
          "note": "Strong multi-jurisdictional analysis; only 14 bullets, missing 6 required.",
          "rank": 4
        },
        "mistral-large": {
          "total": 25.2,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 40.2,
          "note": "SEC v. Och-Ziff 2d Cir. 2018 and ADR issuer assumption hallucinated",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 20.0,
          "quality": 4.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 5.1,
          "note": "Superficial treatment; lacks sequencing detail, penalty quantification, and substantive LLM-mistake analysis",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.3,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 59.8,
          "note": "Solid multi-jurisdictional memo; minor gaps on EXIM hook and Nigeria specifics.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 100.1,
          "note": "SFO v ENRC citation accurate; EXIM/AFD debarment cross-risk non-obvious and correct.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 33.0,
          "note": "Airbus and Rolls-Royce citations verifiable; issuer analysis precise and actionable.",
          "rank": 3
        }
      }
    },
    {
      "id": 264,
      "category": "International Trade",
      "use_case": "dual_origin_conflict_for_cbam_and_us_tariffs",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 37.3,
          "note": "Energizer Battery cite unverified but plausible; melt-and-pour analysis strong",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 78.5,
          "note": "HQ H301619 and ITC 701-TA-545 AD order details require verification",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 114.3,
          "note": "Thorough dual-regime analysis; Bell Supply and Texas Instruments citations appear verifiable.",
          "rank": 5
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 13.0,
          "note": "Bell Supply cite real; CSMS number plausible but unverifiable; analysis excellent",
          "rank": 1
        },
        "grok-4.3": {
          "total": 26.6,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 5.9,
          "note": "Ferrostaal cite unverifiable; melt-and-pour for Section 232 overstated as absolute rule",
          "rank": 7
        },
        "mistral-large": {
          "total": 25.2,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 31.1,
          "note": "Midwood Industries cite unverifiable; 19 CFR §14.1 melt-and-pour misattributed",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 18.0,
          "quality": 5.0,
          "accuracy": 3.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 4.9,
          "note": "Bestfoods cite misapplied; Gibson-Thomsen unverified; CBAM interaction underdeveloped",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 25.3,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 45.3,
          "note": "Midwood Indus. and Delegated Reg. 2024/1736 citations unverified/likely hallucinated",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 95.6,
          "note": "Anheuser-Busch cite is real but tangential; M&P analysis is precise.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 28.7,
          "note": "Exceptional cross-jurisdictional analysis; DOC steel precedent citation needs verification.",
          "rank": 2
        }
      }
    },
    {
      "id": 265,
      "category": "Environmental/ESG",
      "use_case": "mining_concession_and_human_rights_due_diligence",
      "models": {
        "claude-opus-4.8": {
          "total": 31.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 40.7,
          "note": "Exceptional cross-jurisdictional precision; Omnibus caveat and pitfalls outstanding.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 29.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 85.0,
          "note": "Exceptional cross-jurisdictional depth; Nevsun and Hudbay citations verified accurate.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 142.8,
          "note": "Precise citations, correct 3TG/cobalt distinction, OHADA/DRC split well-handled.",
          "rank": 8
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 14.2,
          "note": "Exceptional cross-jurisdictional depth; CSDDD status framing slightly outdated but defensible",
          "rank": 3
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 8.1,
          "note": "Rigorous multi-jurisdictional analysis; minor CSDDD transposition timeline imprecision noted.",
          "rank": 1
        },
        "mistral-large": {
          "total": 26.8,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 48.2,
          "note": "Exceeds 18-bullet limit; Nevsun cite accurate; OHADA interaction well-handled",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 18.0,
          "quality": 4.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 5.3,
          "note": "CSDDD cite wrong (2022/2464 is CSRD); shallow analysis throughout",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 51.1,
          "note": "Nevsun cite verified; OHADA/DRC distinction handled correctly and precisely.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 130.5,
          "note": "Exceptional cross-jurisdictional analysis; Nevsun cite verified; LLM traps well-executed",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 35.3,
          "note": "Nevsun and Hudbay citations verified; CSDDD thresholds slightly imprecise",
          "rank": 5
        }
      }
    },
    {
      "id": 266,
      "category": "Arbitration",
      "use_case": "investment_contract_arbitration_clause_vs_treaty_protection",
      "models": {
        "claude-opus-4.8": {
          "total": 29.5,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.5,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 38.8,
          "note": "Rigorous, well-caveated; ICSID/NYC distinction and pitfalls expertly handled.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 44.7,
          "note": "Rigorous, well-structured; Qatar ICSID status caveat appropriately flagged throughout.",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 27.5,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.5,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 50.1,
          "note": "Vivendi and SGS citations verifiable; Pantechniki accurate; ICSID/NYC distinction correct.",
          "rank": 4
        },
        "o3": {
          "total": 26.0,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 4.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 10.9,
          "note": "Vivendi annulment paragraphs, CA Paris 2013 Belize cite likely hallucinated",
          "rank": 6
        },
        "grok-4.3": {
          "total": 29.5,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.5,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 6.1,
          "note": "SGS v Paraguay para 138 and Telenor para 95 citations unverifiable/likely hallucinated",
          "rank": 2
        },
        "mistral-large": {
          "total": 26.5,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.5,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 33.1,
          "note": "Strong structure but several case citations unverifiable or misattributed; BIT details assumed",
          "rank": 5
        },
        "llama-4-maverick": {
          "total": 21.0,
          "quality": 6.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 4.6,
          "note": "Vivendi annulment paras misquoted; SGS Philippines cite unverifiable; BIT article invented",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 25.0,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 3.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 38.2,
          "note": "Eureko v Slovakia and Enron annulment cited inaccurately; BIT year unverified",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 26.0,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 71.9,
          "note": "Vivendi correct; SGS v. Pakistan/Philippines reversed; Aguas del Tunari citation questionable",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "status": "error",
          "err": "empty completion (reasoning consumed max_tokens)"
        }
      }
    },
    {
      "id": 267,
      "category": "Insurance",
      "use_case": "cyber_policy_silent_cover_and_sanctions",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 44.1,
          "note": "Ingram Micro cite flagged as persuasive only; Patel v Mirza correctly applied",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 80.7,
          "note": "Merck/ACE cite misattributed; Teal applied plausibly but stretched slightly",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 114.5,
          "note": "Rigorous multi-jurisdiction analysis; Wayne Tank and FCA v Arch citations verified.",
          "rank": 4
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 16.2,
          "note": "",
          "rank": 10
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 7.6,
          "note": "Solid multi-jurisdictional analysis; only 13 bullets delivered versus 15 maximum.",
          "rank": 3
        },
        "mistral-large": {
          "total": 23.7,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 30.8,
          "note": "Multiple hallucinated citations: TKC, Beazley, DHL, BGH references unverifiable",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 22.0,
          "quality": 6.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 5.1,
          "note": "FCA v Arch misapplied; EO 14024 and EU 833/2014 citations imprecise",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 25.3,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 54.5,
          "note": "FCA v Arch and Tinsley v Milligan misapplied; sanctions analysis solid",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 26.4,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 84.4,
          "note": "Pilkington cite misapplied; Leyland Shipping proximate cause usage strained for cyber context",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 35.2,
          "note": "FCA v Arch cited correctly; VVG §28 and IBA Art.186 accurate.",
          "rank": 1
        }
      }
    },
    {
      "id": 268,
      "category": "Healthcare/Pharma",
      "use_case": "generic_drug_parallel_import_and_patent_spillover",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 42.2,
          "note": "Centrafarm/Merck citations verifiable; UAE Law 11/2021 plausible but unverified",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 53.9,
          "note": "Centrafarm cite verified; UAE Federal Law No.11/2021 article numbers need confirmation",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 149.6,
          "note": "Comprehensive, jurisdiction-correct, cites verifiable law, five conflation traps identified",
          "rank": 3
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 16.2,
          "note": "Dubai Court of Cassation Case 162/2019 and BGE cite unverifiable; core analysis strong",
          "rank": 1
        },
        "grok-4.3": {
          "total": 27.6,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 7.3,
          "note": "Merck v Stephar misapplied; UK national exhaustion characterisation oversimplified post-Brexit",
          "rank": 7
        },
        "mistral-large": {
          "total": 23.7,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 33.7,
          "note": "Multiple hallucinated citations: wrong directives, case names, UAE law numbers cited incorrectly",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 23.0,
          "quality": 6.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 5.6,
          "note": "Wrong UK exhaustion law cited; Article 15 Reg 469/2009 misapplied; Swiss law inaccurate",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 23.2,
          "quality": 7.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 36.6,
          "note": "Retained Regulation (EU) 2019/6 misapplied; Centrafarm cite plausible but context weak",
          "rank": 9
        },
        "qwen3.7-max": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 81.1,
          "note": "Rigorous multi-jurisdiction analysis; UAE free zone treatment nuanced and accurate.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 43.7,
          "note": "Centrafarm cite valid; UAE law citations plausible but unverified; strong overall.",
          "rank": 6
        }
      }
    },
    {
      "id": 269,
      "category": "AI/Tech Regulation",
      "use_case": "social_scoring_feature_and_dark_pattern_bans",
      "models": {
        "claude-opus-4.8": {
          "total": 31.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 33.6,
          "note": "Rigorous, nuanced AI Act analysis; correctly flags six LLM failure modes.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 98.1,
          "note": "Exceptional depth; CJEU C-184/20 citation plausible but verify accuracy.",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 118.3,
          "note": "Comprehensive, jurisdiction-accurate, six LLM traps exceed required four.",
          "rank": 5
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 15.7,
          "note": "Solid, well-structured analysis; ANPD Technical Note 2/2022 unverified but plausible.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.4,
          "note": "Solid multi-jurisdictional analysis; four LLM-error flags clearly identified and accurate.",
          "rank": 7
        },
        "mistral-large": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 38.6,
          "note": "Solid framework; Annex III point numbering and CPRA ADM rules slightly imprecise",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 25.0,
          "quality": 6.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 8.0,
          "note": "Covers basics but lacks depth on prohibited AI Act Article 5(1)(c) social scoring specifics",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 51.4,
          "note": "Annex III(1) recruitment analogy is strained; Art.5(1)(c) analysis is strong.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 101.8,
          "note": "Exceptional cross-jurisdictional analysis; LLM traps well-identified and legally precise.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 44.3,
          "note": "Schufa C-634/21 and Fashion ID citations are accurate and well-applied.",
          "rank": 3
        }
      }
    },
    {
      "id": 270,
      "category": "Bankruptcy & Insolvency",
      "use_case": "cross_border_insolvency_for_canadian_crypto_custodian",
      "models": {
        "claude-opus-4.8": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 56.2,
          "note": "Exceptional depth; Celsius/FTX cites plausible but unverified; Art.242a correctly cited.",
          "rank": 4
        },
        "claude-sonnet-4.6": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 60.7,
          "note": "Cicada/Ruscetta citations unverifiable; Tulip Trading characterisation overstated; otherwise excellent",
          "rank": 8
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 168.3,
          "note": "Exceptional cross-border depth; Celsius, AA v Persons Unknown, Cryptopia all verifiable.",
          "rank": 6
        },
        "o3": {
          "total": 29.1,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 16.7,
          "note": "Re Quintillion cite unverifiable; BGH IX ZR 33/20 and LG Berlin cite suspect",
          "rank": 5
        },
        "grok-4.3": {
          "total": 31.6,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 7.2,
          "note": "Precise, jurisdiction-specific, correctly flags three LLM error zones.",
          "rank": 1
        },
        "mistral-large": {
          "total": 25.2,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 35.6,
          "note": "Multiple hallucinated citations: BGH case numbers, Re Sturgeon, ABC Learning unverified",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 5.3,
          "note": "Superficial analysis; misses EU Recast Regulation, Swiss DEBA, crypto trust nuances",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 52.3,
          "note": "Rigorous, well-structured; Re Kayford and AA v Persons Unknown citations verifiable.",
          "rank": 3
        },
        "qwen3.7-max": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 98.9,
          "note": "Exceptional roadmap; Lehman cite slightly misapplied but substantively sound.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 33.1,
          "note": "Celsius cite plausible; DEBA 242a and InsO 343 correctly applied throughout.",
          "rank": 2
        }
      }
    },
    {
      "id": 271,
      "category": "Securities",
      "use_case": "tokenized_equity_listing_and_prospectus_regimes",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 43.3,
          "note": "Precise, jurisdiction-correct, MiCA exclusion correctly flagged, Howey cite verified.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 74.5,
          "note": "Exceptional multi-jurisdictional depth; Landreth and Howey citations verified correct.",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 105.7,
          "note": "Precise, well-structured; correctly identifies MiCA exclusion and four LLM pitfalls.",
          "rank": 6
        },
        "o3": {
          "total": 27.1,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 12.2,
          "note": "SEC v. tZERO cite appears fabricated; core analysis otherwise solid",
          "rank": 8
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 6.1,
          "note": "Solid multi-jurisdictional analysis; Howey cite verified; LLM errors well-identified",
          "rank": 3
        },
        "mistral-large": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 50.4,
          "note": "SEC No-Action Letter to FINRA 2019 on smart contracts appears fabricated.",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 5.6,
          "note": "Superficial treatment; misses DLT ledger-based securities, FINSA, SFA exemptions detail",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 28.2,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 42.5,
          "note": "Solid multi-jurisdictional analysis; Telegram cite accurate; Swiss DLT law correct.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 139.4,
          "note": "Exceptional cross-jurisdictional depth; Morrison cite accurate; no hallucinated law",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 34.9,
          "note": "Precise MiCA exclusion, correct FinSA/CO cites, strong pitfall identification throughout.",
          "rank": 2
        }
      }
    },
    {
      "id": 272,
      "category": "Construction",
      "use_case": "mega_project_interface_risk_and_decennial_liability",
      "models": {
        "claude-opus-4.8": {
          "total": 31.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 32.5,
          "note": "Rigorous tri-jurisdictional analysis; traps well-identified; no hallucinated citations.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 48.3,
          "note": "Rigorous tri-jurisdictional analysis; QCC articles plausible but unverified independently",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 29.5,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.5,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 75.7,
          "note": "Precise multi-jurisdictional analysis with correct statutory citations and no hallucinations.",
          "rank": 2
        },
        "o3": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 4.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 11.0,
          "note": "Cass. 3e civ. 1990 Bahamian hotel case appears fabricated/unverifiable",
          "rank": 4
        },
        "grok-4.3": {
          "total": 28.5,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.5,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 6.6,
          "note": "Solid tri-jurisdictional analysis; article citations plausible but unverified.",
          "rank": 5
        },
        "mistral-large": {
          "total": 25.5,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 3.5,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 29.9,
          "note": "Cairo Court of Appeal Case No. 10/123 JY 2018 appears fabricated; deduct heavily.",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 23.0,
          "quality": 6.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 5.1,
          "note": "Superficial analysis; misses Qatari Civil Code Arts 711-716 decennial analogue",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 27.5,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.5,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 34.4,
          "note": "Solid cross-jurisdictional analysis; Qatari articles plausible but unverified.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 91.3,
          "note": "Precise statutory citations, three LLM traps well-identified, actionable and jurisdiction-correct.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "status": "error",
          "err": "empty completion (reasoning consumed max_tokens)"
        }
      }
    },
    {
      "id": 273,
      "category": "Trusts & Estates",
      "use_case": "offshore_discretionary_trust_and_forced_heirship_overlap",
      "models": {
        "claude-opus-4.8": {
          "total": 30.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 47.5,
          "note": "Esteem and Pugachev citations verifiable; firewall/Hague Art.15 analysis rigorous.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 79.1,
          "note": "Exceptional cross-jurisdictional depth; Art.15 Hague caveat and religious fragmentation correctly flagged.",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 127.3,
          "note": "Rigorous multi-forum analysis; Hague art.15 and firewall limits correctly flagged.",
          "rank": 5
        },
        "o3": {
          "total": 27.0,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 16.1,
          "note": "Abacus v Barr [2003] JRC 225 citation appears fabricated; Art references need verification",
          "rank": 7
        },
        "grok-4.3": {
          "total": 28.5,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.5,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.3,
          "note": "Solid multi-jurisdictional analysis; PILA article citations plausible but unverified.",
          "rank": 4
        },
        "mistral-large": {
          "total": 23.5,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 3.5,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 34.4,
          "note": "BGE cite plausible but unverified; Lebanese/Egyptian case citations likely fabricated",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 6.2,
          "note": "Superficial analysis; misapplies Swiss PILA Articles; misses EU Succession Regulation nuances",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 29.5,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.5,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 56.7,
          "note": "Rigorous multi-jurisdictional analysis; Swiss PIL trust articles need verification.",
          "rank": 2
        },
        "qwen3.7-max": {
          "total": 27.5,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.5,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 105.8,
          "note": "X Trust Beddoe citation unverifiable; substantive analysis otherwise excellent",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "status": "error",
          "err": "empty completion (reasoning consumed max_tokens)"
        }
      }
    },
    {
      "id": 274,
      "category": "Government Contracts",
      "use_case": "defense_cloud_rfp_offset_and_localization",
      "models": {
        "claude-opus-4.8": {
          "total": 31.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 40.9,
          "note": "Precise defense-specific regimes cited; three LLM risk flags clearly labeled",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 63.5,
          "note": "Exceptional cross-jurisdictional analysis; DPDPA/NCC citations need verification",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 137.2,
          "note": "Precise citations, strong cross-jurisdictional conflict analysis, LLM pitfalls well-flagged.",
          "rank": 3
        },
        "o3": {
          "total": 29.1,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 14.2,
          "note": "India 40-bit encryption limit and some ITAR citations require verification",
          "rank": 2
        },
        "grok-4.3": {
          "total": 27.6,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 7.8,
          "note": "DAP Art.194, Abu Dhabi Law 21/2020, Fed Law 3/2020 unverified citations",
          "rank": 6
        },
        "mistral-large": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 42.7,
          "note": "DPP 2020 para citations unverifiable; UAE Law 14/2017 details need verification",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 20.0,
          "quality": 5.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.4,
          "note": "Superficial analysis; misses DPDP Act, DIFC, ITAR re-export bars, MeitY cloud policy",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.3,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 62.5,
          "note": "Solid defense-specific analysis; Tawazun 60% figure needs verification.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 100.4,
          "note": "Rigorous, jurisdiction-specific, actionable; minor PDPA cross-border citation imprecision.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 26.8,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 45.6,
          "note": "Strong structure; 22 CFR cite numbers need verification; Singapore OSA point speculative.",
          "rank": 9
        }
      }
    },
    {
      "id": 275,
      "category": "AI/Tech Regulation",
      "use_case": "ai_act_foundation_model_uk_uae_alignment_gap",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 54.1,
          "note": "Exceptional cross-jurisdictional analysis; correctly flags Title VIII error upfront.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 87.2,
          "note": "Exceptional cross-jurisdictional analysis; minor uncertainty on Article 25 specifics.",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 144.4,
          "note": "Rigorous, well-structured memo; accurate citations; strong jurisdictional differentiation throughout.",
          "rank": 7
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 16.3,
          "note": "Technically strong; minor inaccuracies on Art numbering but no hallucinated case law",
          "rank": 2
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.4,
          "note": "Solid modular framework; Art.52-56 citation slightly off, retention figures unverified",
          "rank": 5
        },
        "mistral-large": {
          "total": 23.2,
          "quality": 7.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 44.8,
          "note": "DFSA Technology Governance Guide 2023, COB 7.6.1, GEN 2.2.10 citations unverifiable/likely hallucinated",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 20.0,
          "quality": 5.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 7.0,
          "note": "Articles 28b/28c/51 cited incorrectly; shallow analysis, generic recommendations throughout",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 25.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 89.4,
          "note": "DFSA COB 14.5 and 6.7 citations unverified; likely hallucinated rulebook references.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 102.8,
          "note": "Rigorous hub-and-spoke framework; minor gaps on GPAI systemic risk thresholds.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 28.7,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 38.6,
          "note": "Solid hub-and-spoke framework; PRA SS2/21 and DFSA COB citations accurate.",
          "rank": 4
        }
      }
    },
    {
      "id": 276,
      "category": "Contract & Commercial",
      "use_case": "saas_termination_indemnity_ca_ny_uae_conflict",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 86.6,
          "note": "Rigorous multi-jurisdictional analysis; correctly flags no Rome I SME exception.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 1.9,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 1.9,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 134.5,
          "note": "",
          "rank": 10
        },
        "gpt-5.5": {
          "total": 25.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 141.5,
          "note": "Memo truncated mid-sentence; NY/UAE cases verifiable but Rome I section incomplete.",
          "rank": 6
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 24.2,
          "note": "ESPN cite plausible; UAE Supreme Court petition numbers and BGH cite unverifiable.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 26.6,
          "quality": 7.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 9.4,
          "note": "Solid memo but misapplies Rome I Art.9 consumer directive to B2B affiliate",
          "rank": 4
        },
        "mistral-large": {
          "total": 3.2,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.2,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 52.3,
          "note": "",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 17.0,
          "quality": 4.0,
          "accuracy": 3.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 8.2,
          "note": "MHR Capital misapplied; Cal Civ Code 2772 wrong; redlines superficial",
          "rank": 8
        },
        "deepseek-v3.2": {
          "total": 25.3,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 105.6,
          "note": "Kalisch-Jarcho cited plausibly but UAE Civil Code articles need verification",
          "rank": 5
        },
        "qwen3.7-max": {
          "total": 24.4,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 135.6,
          "note": "BGH VIII ZR 32/12 and Federal Decree-Law 42/2024 citations unverified/suspect",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 26.7,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 34.8,
          "note": "Kalisch-Jarcho and Frontier citations require verification; UAE statute numbers plausible but unverified",
          "rank": 3
        }
      }
    },
    {
      "id": 277,
      "category": "Data Privacy",
      "use_case": "employee_monitoring_hybrid_office_gdpr_pipeda_pdpa",
      "models": {
        "claude-opus-4.8": {
          "total": 30.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 67.9,
          "note": "C-34/21 cite plausible but slightly mischaracterized; otherwise excellent analysis",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 27.5,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.5,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 135.5,
          "note": "Exceptionally thorough; minor risk Orange Romania citation slightly overstated.",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 25.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 167.9,
          "note": "Solid comparative analysis; answer truncated before PDPA and divergence points.",
          "rank": 7
        },
        "o3": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 17.7,
          "note": "BAG case citation unverified; LfDI/OPC references plausible but unconfirmed.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 28.5,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.5,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 8.2,
          "note": "BAG 2 AZR 852/16 citation unverified; core GDPR analysis solid",
          "rank": 4
        },
        "mistral-large": {
          "total": 3.5,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.5,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 48.7,
          "note": "",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 16.0,
          "quality": 3.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 1.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.6,
          "note": "Superficial step-by-step format; lacks actionable jurisdiction-specific configs and depth.",
          "rank": 8
        },
        "deepseek-v3.2": {
          "total": 27.5,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 2.5,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 104.4,
          "note": "PIPEDA case summaries and OPC report citations appear fabricated or unverifiable.",
          "rank": 6
        },
        "qwen3.7-max": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 119.8,
          "note": "Jones v Tsige cited correctly; EU AI Act Annex III reference accurate and forward-looking.",
          "rank": 3
        },
        "gemini-3.1-pro": {
          "status": "error",
          "err": "empty completion (reasoning consumed max_tokens)"
        }
      }
    },
    {
      "id": 278,
      "category": "M&A",
      "use_case": "carve_out_jp_br_uk_export_controls_and_lgpd",
      "models": {
        "claude-opus-4.8": {
          "total": 30.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 74.4,
          "note": "Exceptional cross-jurisdictional depth; LGPD/GDPR mismatch analysis is production-ready.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 127.7,
          "note": "Exceptional cross-regime analysis; Fashion ID cite accurate; ANPD SCC resolution needs verification",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 141.1,
          "note": "Rigorous multi-regime mapping; LGPD analysis cut off mid-sentence at end.",
          "rank": 6
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 22.2,
          "note": "Thorough, well-structured; minor ANPD resolution citations need verification",
          "rank": 2
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.2,
          "note": "Solid multi-regime analysis; LGPD joint-controller gap well-identified; actionable terms.",
          "rank": 3
        },
        "mistral-large": {
          "total": 22.2,
          "quality": 7.0,
          "accuracy": 3.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 47.1,
          "note": "Hallucinated Tokyo District Court case and CJEU C-414/18 Epsilon; METI Order numbers unverified.",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 20.0,
          "quality": 5.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.7,
          "note": "Superficial analysis; wrong EU Dual-Use Regulation citation; weak LGPD-GDPR mismatch treatment",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.8,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 73.6,
          "note": "Solid framework; de minimis threshold error (EAR uses 25% for some, 10% others)",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 124.0,
          "note": "Rigorous cross-regime mapping; LGPD/GDPR mismatch well-articulated; no hallucinations.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 45.3,
          "note": "ANPD Resolution CD/ANPD No. 19 August 2024 SCCs unverified; likely hallucinated.",
          "rank": 8
        }
      }
    },
    {
      "id": 279,
      "category": "Corporate Governance",
      "use_case": "dual_listed_nigeria_csrd_climate_governance_gap",
      "models": {
        "claude-opus-4.8": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 85.8,
          "note": "ClientEarth v Shell cited accurately with correct outcome caveat; Sequana correct.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 134.9,
          "note": "Exceptional cross-jurisdictional depth; OHADA citations verifiable; CS3D thresholds accurate",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 26.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 183.0,
          "note": "Thorough, well-cited, but memo cuts off mid-sentence before completing analysis",
          "rank": 7
        },
        "o3": {
          "total": 29.1,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 20.4,
          "note": "Tawa Petroleum v. Odu [2018] appears fabricated; UACCEIG Art.891-1 unverified",
          "rank": 2
        },
        "grok-4.3": {
          "total": 25.6,
          "quality": 8.0,
          "accuracy": 4.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 10.1,
          "note": "Akinyemi v Ojo and Arrêté No. 2022-XXX appear fabricated; penalized heavily",
          "rank": 8
        },
        "mistral-large": {
          "total": 24.2,
          "quality": 8.0,
          "accuracy": 4.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 48.9,
          "note": "Glencore fine, HSBC fine, McGaughey outcome, Petrofac facts mischaracterized",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 22.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 8.3,
          "note": "Superficial analysis; misses CSRD third-country thresholds, ESRS specifics, concrete risks",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.8,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 78.9,
          "note": "Solid cross-jurisdictional analysis; CSDDD status and ESRS thresholds need precision.",
          "rank": 5
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 133.8,
          "note": "Vedanta/Okpabi correctly cited; CSDDD/CSRD provisions accurately applied throughout.",
          "rank": 3
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 39.4,
          "note": "Vedanta/Okpabi cited correctly; Côte d'Ivoire 2023 code needs verification.",
          "rank": 6
        }
      }
    },
    {
      "id": 280,
      "category": "Banking/Finance",
      "use_case": "project_finance_security_uae_qatar_ohada_ny",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 69.1,
          "note": "Rigorous multi-jurisdictional matrix; AUS/EMCR/parallel debt analysis accurate and actionable.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 137.1,
          "note": "Exceptional depth across all three jurisdictions; answer cut off before parallel debt conclusion.",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 193.8,
          "note": "Strong matrix but truncated; misses parallel debt, sovereign immunity, false-security analysis",
          "rank": 7
        },
        "o3": {
          "total": 29.1,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 20.8,
          "note": "Dubai Court of Cassation 2009/200 parallel-debt cite appears fabricated/unverifiable",
          "rank": 2
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.7,
          "note": "Solid matrix; parallel debt rationale and OHADA analysis are strong.",
          "rank": 4
        },
        "mistral-large": {
          "total": 24.2,
          "quality": 8.0,
          "accuracy": 4.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 56.5,
          "note": "Multiple case citations appear fabricated; core legal framework analysis is solid",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.1,
          "note": "Superficial analysis; lacks OHADA AUSC specifics, Qatar pledge law, UAE SCA details",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 91.1,
          "note": "Solid framework; Qatar Law 26/2021 citation needs verification; OHADA analysis strong",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 155.1,
          "note": "Rigorous cross-jurisdictional matrix; minor gaps on CCJA enforcement precedent.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 49.6,
          "note": "Solid framework; Qatar Law No.11/2019 details need local counsel verification.",
          "rank": 6
        }
      }
    },
    {
      "id": 281,
      "category": "Employment Law",
      "use_case": "remote_worker_redundancy_ca_uk_india",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 67.4,
          "note": "Rigorous, jurisdiction-correct, actionable; correctly flags CA non-compete illegality and UK settlement formalities.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 136.7,
          "note": "Exceptional depth; minor risk on Grimmer cite but core analysis sound",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 143.0,
          "note": "Thorough, well-structured; answer cut off mid-sentence on India section.",
          "rank": 9
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 24.8,
          "note": "Tillman and Herbert Morris are real; BCP v Orton needs verification but plausible.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 30.6,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.4,
          "note": "Dyson/Pellereau cite unverified but plausible; Golikari cite accurate",
          "rank": 3
        },
        "mistral-large": {
          "total": 28.2,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 47.9,
          "note": "Reuse Collections and SW para citations unverifiable; core analysis solid",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 21.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 2.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 9.0,
          "note": "Superficial analysis; misses ACAS, settlement agreement formalities, India retrenchment nuance",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 118.2,
          "note": "Thorough, jurisdiction-accurate, actionable; minor gap on gratuity thresholds India.",
          "rank": 5
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 122.9,
          "note": "Iskanian and Percept D'Mark citations are real; Adyanthaya verified accurate.",
          "rank": 8
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 34.7,
          "note": "Percept D'Mark citation is real but tangentially applied; otherwise solid.",
          "rank": 4
        }
      }
    },
    {
      "id": 282,
      "category": "Immigration",
      "use_case": "distributed_founder_dual_cfo_residence_visa_structuring",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 65.0,
          "note": "De Beers cite accurate; s.6(1A) deemed-resident trap well-spotted; thorough cross-jurisdictional analysis",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 131.8,
          "note": "Thomson v MNR and De Beers correctly cited; s.6(1A) risk astutely flagged",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 24.0,
          "quality": 7.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 144.1,
          "note": "Solid Canadian/UK analysis; memo cuts off mid-sentence, incomplete on UAE/India",
          "rank": 9
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 20.1,
          "note": "Thorough, well-structured memo; IRPR and ITA citations appear accurate and verifiable.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.2,
          "note": "Solid multi-jurisdiction analysis; UAE minimum presence rules could be elaborated further.",
          "rank": 3
        },
        "mistral-large": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 47.6,
          "note": "Thomson v MNR cite is real; UAE decree citations plausible but unverified",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 6.5,
          "note": "Superficial analysis; misses PE risk, shadow payroll, FEMA, OCI nuances",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 76.8,
          "note": "Solid multi-jurisdictional memo; ECR passport assumption and HMRC title outdated.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 112.5,
          "note": "Thorough multi-jurisdictional analysis; statutory citations verifiable; actionable recommendations strong.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 28.7,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 37.2,
          "note": "De Beers cite legitimate; CBCA 25% rule correctly flagged; solid multi-jurisdictional analysis",
          "rank": 5
        }
      }
    },
    {
      "id": 283,
      "category": "Arbitration",
      "use_case": "multitier_escalation_clause_difc_singapore_ksa",
      "models": {
        "claude-opus-4.8": {
          "total": 25.3,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 75.4,
          "note": "BBA v BAZ, BTN v BTP, Republic of India v Deutsche Telekom misapplied/hallucinated",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 1.4,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 1.4,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 131.2,
          "note": "",
          "rank": 10
        },
        "gpt-5.5": {
          "total": 24.0,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 162.5,
          "note": "C v D [2023] SGCA 14 appears fabricated; BBA v BAZ citation unverified",
          "rank": 4
        },
        "o3": {
          "total": 4.1,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.1,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 20.4,
          "note": "",
          "rank": 7
        },
        "grok-4.3": {
          "total": 27.6,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.3,
          "note": "Ohpen and IRC cases real but DIFC application slightly stretched; solid overall",
          "rank": 1
        },
        "mistral-large": {
          "total": 23.2,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 44.0,
          "note": "Multiple fabricated Saudi Grievance Board cases and dubious DIFC citations penalized.",
          "rank": 5
        },
        "llama-4-maverick": {
          "total": 15.0,
          "quality": 3.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 1.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 6.6,
          "note": "Superficial analysis, no case law, generic conclusions, poor formatting for legal opinion",
          "rank": 6
        },
        "deepseek-v3.2": {
          "total": 2.8,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 2.8,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 71.8,
          "note": "",
          "rank": 9
        },
        "qwen3.7-max": {
          "total": 24.9,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 96.3,
          "note": "CZQ v CZS and BTN v BTP citations unverifiable; likely hallucinated.",
          "rank": 3
        },
        "gemini-3.1-pro": {
          "total": 3.7,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.7,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 28.3,
          "note": "",
          "rank": 8
        }
      }
    },
    {
      "id": 284,
      "category": "International Trade",
      "use_case": "rules_of_origin_cumulation_brexit_eu_uk_jp",
      "models": {
        "claude-opus-4.8": {
          "total": 29.5,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.5,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 73.8,
          "note": "Excellent tripartite analysis; PSR specifics appropriately caveated; no hallucinated cites",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 25.0,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 132.9,
          "note": "Brother/Sinochem cases misapplied; TCA battery PSR details overstated with false precision",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 18.0,
          "quality": 4.0,
          "accuracy": 6.0,
          "speed": 1.0,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 152.2,
          "note": "Answer truncated mid-sentence; incomplete analysis despite solid preliminary framing",
          "rank": 8
        },
        "o3": {
          "total": 28.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 23.3,
          "note": "Solid dual-hub analysis; some PSR figures unverified but no fabricated case law",
          "rank": 3
        },
        "grok-4.3": {
          "total": 28.5,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.5,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 7.7,
          "note": "EU Reg. 2019/452 is FDI screening, not anti-circumvention; misapplied.",
          "rank": 2
        },
        "mistral-large": {
          "total": 24.5,
          "quality": 7.0,
          "accuracy": 5.0,
          "speed": 3.5,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 51.8,
          "note": "Specific article numbers for both protocols are fabricated/unverified citations.",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 12.0,
          "quality": 2.0,
          "accuracy": 3.0,
          "speed": 5.0,
          "style": 1.0,
          "creativity": 1.0,
          "halluc": false,
          "latency": 7.6,
          "note": "Generic step-by-step with no specific rules, thresholds, or actionable analysis.",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 27.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 72.9,
          "note": "Specific RoO citations plausible but unverified; strong structural analysis overall",
          "rank": 4
        },
        "qwen3.7-max": {
          "total": 25.5,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.5,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 133.2,
          "note": "Strong structure; some PSR specifics and TCA battery thresholds need verification.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "status": "error",
          "err": "empty completion (reasoning consumed max_tokens)"
        }
      }
    },
    {
      "id": 285,
      "category": "Regulatory Compliance",
      "use_case": "global_payment_superapp_mas_mica_psd2",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 89.1,
          "note": "Rigorous multi-regime analysis; GENIUS Act citation plausible but unverifiable.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 126.8,
          "note": "Exceptionally thorough; truncated US section; no hallucinated citations detected",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 26.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 140.8,
          "note": "Solid multi-jurisdiction memo; truncated Singapore section; misclassification examples missing",
          "rank": 9
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 23.3,
          "note": "Thorough, well-structured memo; minor capital figure imprecisions but no hallucinated caselaw.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 29.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.9,
          "note": "Solid framework; minor inaccuracies in PSD2 capital figures and MiCA article citations.",
          "rank": 4
        },
        "mistral-large": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 88.5,
          "note": "Solid framework; some MiCA/FSMA 2023 details imprecise but no hallucinated cases",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 20.6,
          "quality": 5.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 10.5,
          "note": "Superficial analysis; capital figures imprecise; misclassification examples generic and underdeveloped.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 108.9,
          "note": "Solid framework; some MiCA capital figures and MAS Notice 1002 details imprecise.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 125.6,
          "note": "Thorough, well-structured; minor gaps in UK 2023 stablecoin timeline accuracy.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 51.3,
          "note": "Solid memo; SEC v. Kraken/Coinbase cited accurately as enforcement actions.",
          "rank": 3
        }
      }
    },
    {
      "id": 286,
      "category": "IP/Tech Law",
      "use_case": "opensource_compliance_dual_licensing_india_ch_switzerland",
      "models": {
        "claude-opus-4.8": {
          "total": 28.5,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 2.5,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 94.9,
          "note": "Chinese GPL cases cited imprecisely; Jacobsen/Artifex accurate; strong cross-jurisdictional analysis",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.5,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.5,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 140.9,
          "note": "Yuanxin cite plausible but unverified; otherwise rigorous multi-jurisdictional analysis.",
          "rank": 2
        },
        "gpt-5.5": {
          "total": 25.0,
          "quality": 7.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 163.6,
          "note": "Solid AGPLv3 analysis but memo cuts off before China, export controls, misleading US example",
          "rank": 5
        },
        "o3": {
          "total": 4.0,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.0,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 28.3,
          "note": "",
          "rank": 9
        },
        "grok-4.3": {
          "total": 28.0,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 7.6,
          "note": "MIIT 2021 Open Source Regulations and AGPLv3 US choice-of-law claim unverified",
          "rank": 3
        },
        "mistral-large": {
          "total": 23.5,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 3.5,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 49.3,
          "note": "Syntellect case and Beijing Qihoo citation appear fabricated; deduct heavily.",
          "rank": 6
        },
        "llama-4-maverick": {
          "total": 17.5,
          "quality": 4.0,
          "accuracy": 4.0,
          "speed": 4.5,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 8.5,
          "note": "Beijing Supreme Court 2020 guiding opinion cited without verifiable basis.",
          "rank": 8
        },
        "deepseek-v3.2": {
          "total": 20.0,
          "quality": 6.0,
          "accuracy": 4.0,
          "speed": 3.0,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 52.9,
          "note": "Chinese GPL case citation appears fabricated; EAR public domain exemption omitted",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 26.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 112.2,
          "note": "Jacobsen v. Katzer citation accurate; Indian adaptation analysis nuanced and correct.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "status": "error",
          "err": "empty completion (reasoning consumed max_tokens)"
        }
      }
    },
    {
      "id": 287,
      "category": "Real Estate",
      "use_case": "foreign_ownership_reit_structuring_uae_australia_canada",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 66.2,
          "note": "Thorough, well-caveated memo; FGI/SWF risk and UAE CT flagged expertly.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 136.3,
          "note": "Exceptional depth, correct citations, strong cross-jurisdictional warnings, memo cut off mid-sentence",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 197.5,
          "note": "Rigorous, well-cited memo; truncated before Canada section fully addressed.",
          "rank": 8
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 26.4,
          "note": "Technically strong, jurisdiction-specific, minor treaty rate and ICA ruling uncertainties.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.7,
          "note": "Solid framework; FIRB 30% ownership condition and AUD thresholds need verification.",
          "rank": 4
        },
        "mistral-large": {
          "total": 25.2,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 53.3,
          "note": "Dubai Court of Cassation Case No. 1/2018 appears hallucinated; several cite errors",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 18.0,
          "quality": 4.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.4,
          "note": "Superficial analysis; wrong FIRB thresholds; misses Prohibition on Foreign Buyers Act Canada",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.3,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 97.4,
          "note": "Solid, well-structured memo; minor gaps on UAE UBO rules and MIT eligibility.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 122.1,
          "note": "Technically strong, verifiable citations, MIT widely-held nuance slightly oversimplified.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 28.7,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 35.6,
          "note": "Solid, actionable memo; MIT withholding rate and FGI thresholds need verification.",
          "rank": 3
        }
      }
    },
    {
      "id": 288,
      "category": "Tax",
      "use_case": "pillar_two_and_digital_services_tax_double_exposure",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 67.8,
          "note": "Exceptional depth; Canada DST pause and India repeal flagged with appropriate caveats.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 129.9,
          "note": "Exceptional depth; Finance Act 2024 India EL abolition correctly flagged; minor truncation.",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 24.0,
          "quality": 7.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 4.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 169.6,
          "note": "Solid framework but answer truncated mid-sentence; Nigeria analysis notably strong",
          "rank": 8
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 24.0,
          "note": "Rigorous, well-structured analysis with accurate citations and actionable mitigation strategies.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.2,
          "note": "Solid analysis; minor inaccuracies in GloBE article citations but no hallucinated case law.",
          "rank": 5
        },
        "mistral-large": {
          "total": 23.2,
          "quality": 7.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 49.7,
          "note": "UK DST not repealed; Canderel misapplied; Nigeria NDT overstated; scenarios weak",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 16.0,
          "quality": 3.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 2.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 7.5,
          "note": "Superficial analysis, step-by-step format unprofessional, scenarios underdeveloped, no real strategies",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 87.8,
          "note": "Staatsecretaris cite unverifiable; Nigeria SEP rate and Bill C-59 framing imprecise",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 111.8,
          "note": "Rigorous GloBE/DST interaction analysis; India s.10(50)/40(a)(ib) correctly cited.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 36.5,
          "note": "Rigorous GloBE analysis; Canada DST Act citation slightly imprecise but defensible.",
          "rank": 3
        }
      }
    },
    {
      "id": 289,
      "category": "Criminal/White Collar",
      "use_case": "crossborder_bribery_internal_investigation_mena_canada_uk",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 75.5,
          "note": "Hoskins cite accurate; SFO v ENRC citation correct; thorough multi-regime analysis",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 140.7,
          "note": "Exceptional depth; Hoskins, Three Rivers, Upjohn, Rolls-Royce cites verified accurate.",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 144.0,
          "note": "Hoskins cite accurate; memo truncated before conflicts/tensions section.",
          "rank": 6
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 17.8,
          "note": "Solid memo; SNC-Lavalin DPA 2022 claim unverified but plausible.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 32.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.1,
          "note": "Accurate statutes, strong sequencing logic, clean privilege tension analysis.",
          "rank": 1
        },
        "mistral-large": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 57.9,
          "note": "R v Sweett citation inaccurate; ENRC privilege holding misstated; otherwise solid",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 24.6,
          "quality": 6.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 9.7,
          "note": "Competent overview but lacks depth on sequencing, privilege conflicts, and MENA specifics.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.3,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 86.4,
          "note": "Solid multi-jurisdictional memo; UAE/Egypt local law citations plausible but unverified.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 129.4,
          "note": "SEC v. Straub cite misapplied; Three Rivers/ENRC analysis solid overall",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 33.4,
          "note": "ENRC and Three Rivers citations accurate; strong multi-regime tension analysis throughout.",
          "rank": 4
        }
      }
    },
    {
      "id": 290,
      "category": "Securities",
      "use_case": "tokenized_equity_offering_mica_mas_sec_conflicts",
      "models": {
        "claude-opus-4.8": {
          "total": 30.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 70.3,
          "note": "Rigorous, well-structured; correctly excludes equity tokens from MiCA scope throughout.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 135.6,
          "note": "Exceptional multi-jurisdictional analysis; Landreth, Howey, Reg ATS citations verified.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 159.0,
          "note": "Landreth Timber correctly cited; memo truncated before Swiss/US sections completed.",
          "rank": 8
        },
        "o3": {
          "total": 29.1,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 23.2,
          "note": "SEC v. Heart cite and Release 34-96923 appear fabricated or misattributed",
          "rank": 5
        },
        "grok-4.3": {
          "total": 30.6,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.0,
          "note": "Solid, well-structured memo; Howey cite verified; MiCA exclusion correctly applied.",
          "rank": 1
        },
        "mistral-large": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 45.7,
          "note": "SEC v. Coinbase/Binance/Ripple citations oversimplified; ESMA MiCA Q&A 2023 unverified",
          "rank": 7
        },
        "llama-4-maverick": {
          "total": 18.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 2.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 6.7,
          "note": "Superficial analysis; mischaracterises MiCA ART category; weak Swiss DLT treatment",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 23.8,
          "quality": 7.0,
          "accuracy": 6.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 67.4,
          "note": "SEC v. Scoville citation misapplied; ART classification for equity tokens incorrect",
          "rank": 9
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 99.9,
          "note": "Rigorous, well-structured; correctly flags MiCA exclusion and DLT Pilot Regime.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 32.9,
          "note": "Solid cross-jurisdictional analysis; SEC v. Beaxy/Bittrex citations are real cases.",
          "rank": 3
        }
      }
    },
    {
      "id": 291,
      "category": "Banking/Finance",
      "use_case": "islamic_finance_sukuk_crossdefault_ksa_uk_malaysia",
      "models": {
        "claude-opus-4.8": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 84.8,
          "note": "Exceptional depth; Dana Gas, Adler, DeepOcean cites verifiable; Saudi law flagged appropriately.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 138.8,
          "note": "Exceptional depth; Dana Gas, Rodenstock, Apcoa cites verifiable; Saudi trust gap well-identified",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 26.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 160.7,
          "note": "Solid multi-jurisdictional analysis; memo truncated before Malaysian/UAE sections completed.",
          "rank": 7
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 21.0,
          "note": "Re Agrokor Eurobond and Al-Gosaibi 2022 citations unverifiable; Dana Gas cite plausible",
          "rank": 2
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 7.7,
          "note": "Solid multi-jurisdictional analysis; correctly identifies debt/ownership mismatch as key misleading point.",
          "rank": 3
        },
        "mistral-large": {
          "total": 23.7,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 43.0,
          "note": "Dallah/Lehman/Investment Dar citations misapplied or mischaracterised; core analysis solid",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 18.0,
          "quality": 5.0,
          "accuracy": 3.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 6.7,
          "note": "Belmont Park misapplied; AAOIFI cite unverifiable; shallow cross-default analysis",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 25.8,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 93.3,
          "note": "Three Rivers and Re Hawk cited plausibly but unverified; Rome I inapplicable post-Brexit",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 117.6,
          "note": "Re Ocean Rig unverified but plausible; Dana Gas cite accurate; solid analysis",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 136.6,
          "note": "Re Garuda Indonesia cite unverified; Dana Gas cite accurate; Gibbs correct",
          "rank": 5
        }
      }
    },
    {
      "id": 292,
      "category": "Bankruptcy & Insolvency",
      "use_case": "crossborder_insolvency_lebanon_switzerland_chapter15",
      "models": {
        "claude-opus-4.8": {
          "total": 28.8,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 86.5,
          "note": "Vitro cite misattributed; 5th Cir. not controlling on Ch.15 COMI issues",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 136.9,
          "note": "Exceptionally thorough; minor risk on Lebanese Code articles specificity",
          "rank": 2
        },
        "gpt-5.5": {
          "total": 26.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 142.5,
          "note": "Solid multi-jurisdictional analysis; truncated before completing Swiss/French/conflict sections.",
          "rank": 6
        },
        "o3": {
          "total": 26.1,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 21.0,
          "note": "In re BNP Paribas 755 F.3d 262 and Banco Santos citations appear fabricated",
          "rank": 5
        },
        "grok-4.3": {
          "total": 27.6,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 10.2,
          "note": "Vitro cite misapplied; §109(b) bank exclusion overstated; PILA articles plausible",
          "rank": 4
        },
        "mistral-large": {
          "total": 25.2,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 49.0,
          "note": "BES/FINMA 2015 and flyLAL C-302/13 citations misapplied or hallucinated",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 18.0,
          "quality": 5.0,
          "accuracy": 3.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 7.7,
          "note": "Hallucinated Société Générale case; shallow analysis; truncated conclusion; generic strategies",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 25.3,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 86.9,
          "note": "SPhinX, Vitro, Gold & Honey citations misapplied or unverifiable in context",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 24.9,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 119.9,
          "note": "Banco Espirito Santo cite unverifiable; Vitro misapplied; Aerospatiale accurate",
          "rank": 9
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 35.5,
          "note": "Drawbridge/Barnet cite misapplied; Law 306/2022 unverified; otherwise solid analysis",
          "rank": 3
        }
      }
    },
    {
      "id": 293,
      "category": "Healthcare/Pharma",
      "use_case": "telemedicine_prescribing_conflict_us_uk_nigeria_india",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 68.5,
          "note": "Comprehensive, well-cited, actionable memo with strong cross-jurisdictional analysis.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 124.3,
          "note": "Comprehensive, well-structured; truncated Nigeria NDPA section slightly reduces completeness score.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 141.1,
          "note": "Thorough, well-cited memo; truncated before completing pharmacy/privacy sections.",
          "rank": 8
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 26.0,
          "note": "Thorough, well-structured memo; minor gaps in Nigeria e-Rx nuance.",
          "rank": 2
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 11.9,
          "note": "Solid statutory citations, actionable structure, minor gaps on Ryan Haight DEA exceptions.",
          "rank": 3
        },
        "mistral-large": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 52.3,
          "note": "DISHA not enacted; MDCN 2020 guidelines and IMLC NY status need verification",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 5.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 8.3,
          "note": "MCI 2018 telemedicine guidelines fabricated; MDCN 2020 citation unverified; shallow analysis",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.3,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 79.9,
          "note": "Solid, well-structured memo; statutes verifiable; India localisation nuanced correctly.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 121.8,
          "note": "Thorough, well-structured; minor gaps in IMLC NY status nuance.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 38.0,
          "note": "Solid framework; NY §6526(3) exception characterization slightly oversimplified.",
          "rank": 6
        }
      }
    },
    {
      "id": 294,
      "category": "Energy/Climate",
      "use_case": "carbon_capture_ofake_agreements_esg_greenwashing_risk",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 82.1,
          "note": "Exceptional cross-jurisdictional analysis; Omnibus/Green Claims uncertainty appropriately flagged.",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 138.6,
          "note": "Directive 2022/2041 citation incorrect; Autonomy/HP cite unverifiable; otherwise strong",
          "rank": 7
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 141.4,
          "note": "Thorough multi-jurisdictional analysis; real citations; answer cut off mid-sentence.",
          "rank": 6
        },
        "o3": {
          "total": 29.1,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 25.4,
          "note": "BNY v Euromoney, Mensalapão, ASIC v Tlou citations appear fabricated or misattributed",
          "rank": 4
        },
        "grok-4.3": {
          "total": 31.0,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.0,
          "note": "Solid multi-jurisdictional analysis; accurate citations; actionable remediation framework provided.",
          "rank": 1
        },
        "mistral-large": {
          "total": 24.2,
          "quality": 8.0,
          "accuracy": 4.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 65.7,
          "note": "Multiple hallucinated cases: Abanca C-776/19, Barclays v Svizera, Holmcroft v KPMG misapplied",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 21.6,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 4.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 9.8,
          "note": "Superficial analysis; misses EUDR, MAR, double-counting REDD+ specifics, lender liability",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 95.7,
          "note": "Solid multi-jurisdictional analysis; minor inaccuracies on AFIRP, Empowering Consumers Directive timing.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 105.7,
          "note": "Caparo cite accurate; PL 412/2022 reference slightly imprecise but defensible",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 46.0,
          "note": "Comprehensive, well-structured, jurisdiction-accurate; minor gaps on CVM Resolution 59 specifics.",
          "rank": 2
        }
      }
    },
    {
      "id": 295,
      "category": "Consumer Protection",
      "use_case": "subscription_dark_patterns_us_eu_uae_singapore",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 76.2,
          "note": "Planet49 and Océano Grupo correctly cited; Eighth Circuit vacatur caveat appropriately flagged.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 131.8,
          "note": "Comprehensive, well-structured; answer truncated mid-sentence; case citations appear verifiable",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 25.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 159.4,
          "note": "Solid comparative analysis; answer truncated mid-sentence on UAE section.",
          "rank": 8
        },
        "o3": {
          "total": 28.1,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 21.3,
          "note": "McKinney v. Google and Allen v. Samsung citations appear fabricated or misattributed",
          "rank": 5
        },
        "grok-4.3": {
          "total": 30.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 7.5,
          "note": "Solid comparative analysis; accurate citations; actionable UX fixes; no hallucinations.",
          "rank": 1
        },
        "mistral-large": {
          "total": 3.2,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.2,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 51.9,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 20.6,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 8.3,
          "note": "Superficial analysis; missing DSA, CCPA, FTC Rule 2023, UAE 2023 law updates",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 29.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 94.5,
          "note": "Thorough, accurate, well-structured; minor gap on UAE e-commerce specifics.",
          "rank": 4
        },
        "qwen3.7-max": {
          "total": 25.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 119.6,
          "note": "FTC v. Vonage cited inaccurately; core analysis otherwise solid and actionable.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 29.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 32.8,
          "note": "Solid analysis; FTC v. Amazon cite unverified but plausible; UAE regs accurate",
          "rank": 3
        }
      }
    },
    {
      "id": 296,
      "category": "Government Contracts",
      "use_case": "defense_software_offset_localization_ksa_india_australia",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 73.0,
          "note": "Rigorous, well-structured; AUKUS §126.7 details warrant verification pre-reliance.",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 144.7,
          "note": "Exceptional depth, accurate citations, strong cross-jurisdictional conflict analysis throughout.",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 164.8,
          "note": "Rigorous multi-jurisdiction memo; table truncated but cites verifiable and accurate.",
          "rank": 7
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 22.6,
          "note": "Rigorous cross-jurisdictional analysis; ITAR cite precision slightly overstated but no fabrications.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.4,
          "note": "Solid, well-structured memo; AUKUS 2024 exemption framing slightly imprecise.",
          "rank": 4
        },
        "mistral-large": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 64.1,
          "note": "Airbus cite misapplied; EAR/ITAR section citations need verification",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 17.0,
          "quality": 3.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 2.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 6.0,
          "note": "Superficial analysis, no memo format, ends absurdly with boxed ITAR",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 99.4,
          "note": "Solid framework; AUKUS IP Principles citation speculative but not fabricated case law.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 116.0,
          "note": "Rigorous bifurcation strategy; AUKUS retransfer trap is non-obvious and well-argued.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 36.7,
          "note": "Rigorous, actionable memo; AUKUS-KSA supply chain taint insight is excellent.",
          "rank": 2
        }
      }
    },
    {
      "id": 297,
      "category": "Contract & Commercial",
      "use_case": "uae_difc_english_law_supply_chain_step_in_rights",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 56.0,
          "note": "Rigorous tri-jurisdictional drafting; prudent flags on licensing and DIFC gateway risks.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 1.4,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 1.4,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 120.6,
          "note": "",
          "rank": 10
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 156.8,
          "note": "Comprehensive, jurisdiction-aware, correct insolvency references, strong cross-border risk mitigation.",
          "rank": 3
        },
        "o3": {
          "total": 28.1,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 13.4,
          "note": "Solid cross-jurisdictional drafting; UAE Civil Code articles need verification.",
          "rank": 5
        },
        "grok-4.3": {
          "total": 29.0,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 6.3,
          "note": "Solid dual-trigger drafting; UAE Bankruptcy Law number slightly imprecise but workable.",
          "rank": 4
        },
        "mistral-large": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 42.5,
          "note": "Article 104 UAE Bankruptcy Law misapplied; DMCA Resolution 1/2019 unverified citation",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 19.6,
          "quality": 5.0,
          "accuracy": 5.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 6.3,
          "note": "Superficial treatment; missing DIFC enforcement mechanics, licensing specifics, novation consent gaps.",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 75.9,
          "note": "Solid cross-jurisdictional drafting; UAE Civil vs Commercial Law conflation minor issue.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 26.9,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 107.3,
          "note": "Sophisticated cross-jurisdictional drafting; UAE CTL article citations need independent verification.",
          "rank": 6
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 35.9,
          "note": "Sophisticated cross-jurisdictional drafting; UAE Bankruptcy Law citation slightly imprecise",
          "rank": 1
        }
      }
    },
    {
      "id": 298,
      "category": "Regulatory Compliance",
      "use_case": "eu_dora_uk_ops_resilience_sg_mas_outsourcing_gap",
      "models": {
        "claude-opus-4.8": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 45.6,
          "note": "Rigorous, well-structured; minor RTS/ITS citation precision issues but no hallucinations.",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 78.7,
          "note": "Exceptional cross-jurisdictional analysis; minor uncertainty on some draft RTS citations.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 110.3,
          "note": "Thorough, well-cited, actionable memo with strong cross-jurisdictional divergence analysis.",
          "rank": 6
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 13.7,
          "note": "Rigorous cross-regime analysis; minor RTS timeline uncertainty but no hallucinations.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 30.6,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 6.9,
          "note": "Precise citations, actionable GORP structure, conflict flags well-identified and practical.",
          "rank": 2
        },
        "mistral-large": {
          "total": 25.8,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 53.4,
          "note": "Several specific article/paragraph citations unverifiable or inaccurate; MAS 644 details questionable",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 20.0,
          "quality": 4.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 6.1,
          "note": "Superficial divergence analysis; lacks concrete cross-jurisdictional conflicts and actionable governance detail.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.3,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 74.0,
          "note": "Solid cross-jurisdictional analysis; DORA article citations largely accurate; actionable structure.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 102.5,
          "note": "Precise divergences, actionable MORA structure, correct citations, minor RTS timeline imprecision",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 28.7,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 38.1,
          "note": "Solid cross-regime analysis; DORA Art. citations accurate; MAS 1-hour rule slightly overstated.",
          "rank": 5
        }
      }
    },
    {
      "id": 299,
      "category": "Corporate Governance",
      "use_case": "dual_fiduciary_duties_de_spac_vs_ca_subsidiary",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 35.5,
          "note": "Accurate, thorough, actionable; §2115 flag and MFW protocol are standout additions.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 55.6,
          "note": "Drulias cite accurate; MFW, Corwin, Van Gorkom all correctly applied.",
          "rank": 4
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 142.0,
          "note": "Precise dual-jurisdiction analysis; all citations verifiable; actionable mitigants well-structured.",
          "rank": 3
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 15.7,
          "note": "Katz v. Chevron citation unverifiable but core holdings accurate throughout",
          "rank": 2
        },
        "grok-4.3": {
          "total": 27.6,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 6.2,
          "note": "Trenwick cite misapplied; Revlon extension to sub sale overstated",
          "rank": 5
        },
        "mistral-large": {
          "total": 3.7,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.7,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 23.4,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 18.0,
          "quality": 5.0,
          "accuracy": 3.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 5.7,
          "note": "Anadarko cite misapplied; Tele-Commc'ns cite unverifiable; shallow cross-jurisdictional analysis",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 22.3,
          "quality": 7.0,
          "accuracy": 5.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": true,
          "latency": 47.4,
          "note": "Anadarko cite misapplied; Everest Investors case dubious; §316 exculpation misstated",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 24.4,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 99.0,
          "note": "Anadarko cite misapplied; Gheewalla correct but context strained slightly",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 25.2,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 28.0,
          "note": "Anadarko/Trenwick cited for wrong proposition; hallucinated subsidiary-duty holdings",
          "rank": 6
        }
      }
    },
    {
      "id": 300,
      "category": "Data Privacy",
      "use_case": "gdpr_pdpa_pipeda_hr_analytics_conflict_map",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 84.9,
          "note": "Rigorous high-water-mark analysis; DPA clauses actionable; caveats appropriate.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 133.5,
          "note": "Exceptional tri-jurisdictional depth; TIA, BDSG, and chain-transfer gaps precisely identified.",
          "rank": 5
        },
        "gpt-5.5": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 127.1,
          "note": "Eastmond and Turner citations are real; Commission Decision 2002/2/EC accurate.",
          "rank": 6
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 28.9,
          "note": "Rigorous tri-jurisdictional mapping; minor risk on PDPC TR43 verifiability.",
          "rank": 3
        },
        "grok-4.3": {
          "total": 32.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.4,
          "note": "Rigorous GDPR-max architecture; BetrVG overlay and SCC module specificity add value.",
          "rank": 1
        },
        "mistral-large": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 57.9,
          "note": "WP29 Opinion 2/2017 and OPC 2019-001 citations unverifiable; EDPB refs plausible",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 21.6,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 9.8,
          "note": "Superficial analysis; misses PDPA adequacy list, PIPEDA accountability model nuances",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.3,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 110.6,
          "note": "Solid tri-jurisdictional analysis; PDPA Section citations accurate; actionable DPA wording.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 29.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 125.0,
          "note": "Exceptional cross-jurisdictional depth; Betriebsrat angle and BYOK measures outstanding.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 43.9,
          "note": "PIPEDA Case Summary #2003-114 cited for biometrics is unverifiable/likely hallucinated.",
          "rank": 7
        }
      }
    },
    {
      "id": 301,
      "category": "Employment Law",
      "use_case": "remote_employee_misaligned_contract_uk_tx_ontario",
      "models": {
        "claude-opus-4.8": {
          "total": 31.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 38.5,
          "note": "Waksdale cite slightly imprecise but substantively correct; strong cross-jurisdictional coverage.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 108.8,
          "note": "Exceptional cross-jurisdictional depth; Dufault cite tangential but not hallucinated.",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 134.7,
          "note": "Exceptional cross-jurisdictional depth; all cited cases appear verifiable and accurate.",
          "rank": 5
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 15.4,
          "note": "Comprehensive, well-cited; Herbert Morris and Lawson v Serco correctly applied.",
          "rank": 3
        },
        "grok-4.3": {
          "total": 33.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 6.0,
          "note": "Tillman v Egon Zehnder correctly cited; Bill 27 ESA amendment accurate.",
          "rank": 1
        },
        "mistral-large": {
          "total": 26.8,
          "quality": 9.0,
          "accuracy": 6.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 45.0,
          "note": "Marsh v. Cook and Drennen citations appear fabricated or misattributed.",
          "rank": 8
        },
        "llama-4-maverick": {
          "total": 4.6,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 4.6,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 6.6,
          "note": "",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 25.3,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 46.8,
          "note": "UNISON cite irrelevant; McKee misapplied; Tillman accurate but tangential",
          "rank": 9
        },
        "qwen3.7-max": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 96.7,
          "note": "Lawson v Serco and Tillman citations verified; Machtinger accurate; strong output.",
          "rank": 7
        },
        "gemini-3.1-pro": {
          "total": 29.2,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 40.9,
          "note": "Thorough, well-cited; Lawson v Serco and Waksdale correctly applied.",
          "rank": 4
        }
      }
    },
    {
      "id": 302,
      "category": "AI/Tech Regulation",
      "use_case": "ai_model_clinical_decision_support_eu_uk_japan_overlap",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 77.7,
          "note": "Rigorous, jurisdiction-correct, PCCP/IDATEN convergence insight is genuinely valuable.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 137.8,
          "note": "Exceptionally thorough, accurate multi-jurisdiction analysis; truncated at Section III Phase 1.",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 127.3,
          "note": "Thorough, jurisdiction-correct, well-structured; minor truncation at end noted.",
          "rank": 3
        },
        "o3": {
          "total": 28.1,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 24.7,
          "note": "MDCG 2020-3, PMDA June 2021 guidelines, and some citations unverifiable",
          "rank": 5
        },
        "grok-4.3": {
          "total": 29.0,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 5.0,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 7.5,
          "note": "Solid structured analysis; minor gaps on UKCA timeline and PMDA specifics.",
          "rank": 2
        },
        "mistral-large": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 55.7,
          "note": "Solid structure; IVDR application overstated; MDSAP Japan-EU claim imprecise.",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 20.6,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 9.2,
          "note": "Generic analysis; misses IVDR inapplicability, MDR Class IIb/III nuance, UKCA specifics",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.3,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 83.9,
          "note": "Solid framework; MHRA 2024 Change Management guidance citation needs verification.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 127.4,
          "note": "Comprehensive, well-structured; minor EU AI Act article numbering imprecision noted.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 26.7,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 43.4,
          "note": "IDATEN framework and IMDRF nIVD MA ToC references appear fabricated/unverifiable",
          "rank": 7
        }
      }
    },
    {
      "id": 303,
      "category": "M&A",
      "use_case": "three_way_regulatory_clearance_firb_cfius_competition",
      "models": {
        "claude-opus-4.8": {
          "total": 29.8,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 64.3,
          "note": "Illumina/Grail C-611/22 P cited inaccurately re Article 22 referral ruling",
          "rank": 5
        },
        "claude-sonnet-4.6": {
          "total": 27.4,
          "quality": 9.0,
          "accuracy": 7.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": true,
          "latency": 139.6,
          "note": "Ralls Corp cited correctly; Aixtron/Broadcom/UTC cases used imprecisely or mischaracterized",
          "rank": 7
        },
        "gpt-5.5": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 146.2,
          "note": "Thorough, accurate, well-structured; minor truncation at end; no hallucinated cites.",
          "rank": 6
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 21.4,
          "note": "Technically strong; AusChip EU turnover threshold justification slightly strained.",
          "rank": 3
        },
        "grok-4.3": {
          "total": 30.6,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.0,
          "note": "Technically sound, well-structured; EUMR threshold analysis slightly oversimplified.",
          "rank": 1
        },
        "mistral-large": {
          "total": 26.7,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 47.9,
          "note": "Siemens/Alstom and Lattice/Canyon Bridge citations misapplied; FIRB fee figure unverified.",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 22.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 6.5,
          "note": "Superficial analysis; remedies underdeveloped; timing strategy lacks granular sequencing detail.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.3,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 65.2,
          "note": "Solid cross-jurisdictional analysis; CFIUS-first sequencing rationale well-argued.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 29.9,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 106.1,
          "note": "Rigorous, jurisdiction-correct analysis with actionable sequencing and conflict mapping.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 30.2,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 49.8,
          "note": "Thorough, accurate, well-structured memo with strong cross-jurisdictional conflict analysis.",
          "rank": 2
        }
      }
    },
    {
      "id": 304,
      "category": "Banking/Finance",
      "use_case": "sharia_compatible_syndicated_loan_ksa_uk_ohada",
      "models": {
        "claude-opus-4.8": {
          "total": 30.3,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 66.3,
          "note": "Beximco cite accurate; OHADA agent des sûretés, CCJA, AUS correctly applied.",
          "rank": 3
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 138.5,
          "note": "Beximco cite accurate; BCEAO/AUOS citations verifiable; comprehensive cross-jurisdictional analysis",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 27.0,
          "quality": 8.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 142.1,
          "note": "Beximco cite accurate; OHADA security agent articles correct; answer truncated",
          "rank": 8
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 20.6,
          "note": "Sophisticated cross-jurisdictional synthesis; OHADA article citations plausible but unverified.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 8.0,
          "note": "Solid cross-jurisdictional structure; AUS articles plausible but unverified precisely.",
          "rank": 4
        },
        "mistral-large": {
          "total": 3.2,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.2,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 51.3,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 4.0,
          "accuracy": 5.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 5.0,
          "note": "Superficial treatment; misses CCJA jurisdiction, commodity murabaha mechanics, OHADA sûretés specifics",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 27.8,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 54.3,
          "note": "Solid cross-jurisdictional structure; OHADA security mechanics and Sharia carve-outs well-handled.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 109.5,
          "note": "Dana Gas cite accurate; KSA Civil Transactions Law reference plausible but verify article number",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 38.1,
          "note": "Beximco cite accurate; BCEAO usury/TEG analysis is sophisticated and correct.",
          "rank": 2
        }
      }
    },
    {
      "id": 305,
      "category": "Real Estate",
      "use_case": "multi_layer_reit_structure_uae_qatar_de_tax",
      "models": {
        "claude-opus-4.8": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 38.0,
          "note": "Rigorous multi-layer structure; Qatari WHT/ownership caveats appropriately flagged.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 137.2,
          "note": "Exceptional multi-jurisdictional depth; minor GILTI HTE threshold and QFC title nuances.",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 124.1,
          "note": "Exceptional cross-jurisdictional depth; IRC citations precise; Gulf law correctly cited.",
          "rank": 3
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 23.4,
          "note": "Comprehensive, well-structured; minor risk on Rev. Rul. 2004-24 applicability",
          "rank": 1
        },
        "grok-4.3": {
          "total": 28.6,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 6.1,
          "note": "Solid structure; Qatar ESR Law No. 20/2019 citation needs verification.",
          "rank": 5
        },
        "mistral-large": {
          "total": 22.8,
          "quality": 7.0,
          "accuracy": 4.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 48.9,
          "note": "Multiple hallucinated cites: US-UAE treaty, Rev.Rul.2002-64 misapplied, Qatar v.Creighton",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 19.0,
          "quality": 5.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": true,
          "latency": 5.2,
          "note": "US-UAE tax treaty cited incorrectly; no such treaty exists currently.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.3,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 76.5,
          "note": "Solid multi-layer analysis; UAE CT and Qatar WHT correctly flagged; no hallucinations.",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 111.7,
          "note": "Technically rigorous, cites verifiable laws, strong cross-jurisdictional PE and exit analysis.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 43.7,
          "note": "Solid structure; QFC freehold zones overstated, TRS/DRE tension underexplored.",
          "rank": 8
        }
      }
    },
    {
      "id": 306,
      "category": "Tax",
      "use_case": "digital_services_vat_canada_brazil_india_pe_mismatch",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 89.5,
          "note": "Exceptional depth; EL 2.0 abolition, CA 132 reform, SEP treaty override all correct.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 145.0,
          "note": "Exceptional depth; Finance Act 2024 EL abolition correctly noted; truncated at end",
          "rank": 3
        },
        "gpt-5.5": {
          "total": 22.0,
          "quality": 7.0,
          "accuracy": 7.0,
          "speed": 1.0,
          "style": 4.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 150.8,
          "note": "Answer truncated mid-sentence; Section 2 and 3 incomplete, reducing actionability significantly.",
          "rank": 8
        },
        "o3": {
          "total": 28.1,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 18.9,
          "note": "ADI 5958 date/holding and CBDT Circular 20/2021 unverified; EL s.165A(3) dubious",
          "rank": 5
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 9.1,
          "note": "Solid, well-structured analysis; minor gaps on Brazil ICMS/ISS judicial uncertainty.",
          "rank": 2
        },
        "mistral-large": {
          "total": 3.7,
          "quality": 0.0,
          "accuracy": 0.0,
          "speed": 3.7,
          "style": 0.0,
          "creativity": 0.0,
          "halluc": false,
          "latency": 73.2,
          "note": "",
          "rank": 10
        },
        "llama-4-maverick": {
          "total": 15.0,
          "quality": 3.0,
          "accuracy": 4.0,
          "speed": 5.0,
          "style": 2.0,
          "creativity": 1.0,
          "halluc": false,
          "latency": 6.2,
          "note": "Superficial, generic; misses thresholds, OECD Pillar One, specific PE risks.",
          "rank": 9
        },
        "deepseek-v3.2": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 88.8,
          "note": "Federal Law 13,988/2020 misattributed; Circular 17/2021-GST EL interaction unverified",
          "rank": 7
        },
        "qwen3.7-max": {
          "total": 28.3,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 122.5,
          "note": "Technically strong; ADI 5659 and EL exemption correctly cited.",
          "rank": 4
        },
        "gemini-3.1-pro": {
          "total": 27.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 140.2,
          "note": "Accurate, well-structured; EL abolition noted; Brazil treaty gap correctly flagged.",
          "rank": 6
        }
      }
    },
    {
      "id": 307,
      "category": "Criminal/White Collar",
      "use_case": "multi_jurisdiction_bribery_self_report_strategy",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 55.0,
          "note": "Exceptional depth; Hoskins, ENRC, Airbus, Rolls-Royce all verifiable and correctly applied.",
          "rank": 2
        },
        "claude-sonnet-4.6": {
          "total": 28.0,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 155.2,
          "note": "Exceptional depth; Hoskins/Three Rivers citations accurate; minor ANPD resolution unverified",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 29.4,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 137.2,
          "note": "Exceptional multi-jurisdictional depth; citations verified; Hoskins and ENRC correctly applied.",
          "rank": 4
        },
        "o3": {
          "total": 31.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 12.6,
          "note": "Comprehensive, well-structured; minor uncertainty on Decree 11.129/2022 specifics.",
          "rank": 1
        },
        "grok-4.3": {
          "total": 27.6,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 7.1,
          "note": "US v. Jeong 2010 and R v. Maxwell citations unverified/likely hallucinated",
          "rank": 7
        },
        "mistral-large": {
          "total": 24.3,
          "quality": 8.0,
          "accuracy": 5.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 60.3,
          "note": "Multiple hallucinated/misattributed cases; Hoskins, Allen, Esquenazi citations inaccurate",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 21.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 2.0,
          "halluc": false,
          "latency": 5.8,
          "note": "Generic bullets lack depth on ne bis in idem, privilege waiver risks, sequencing rationale.",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 26.2,
          "quality": 8.0,
          "accuracy": 6.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": true,
          "latency": 50.4,
          "note": "Brazil Law 13,260/2016 is anti-terrorism, not a blocking statute; ICCPC misspelled",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 120.9,
          "note": "Connolly suppression holding overstated; otherwise rigorous, production-ready multi-jurisdiction strategy.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 30.7,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 50.2,
          "note": "Technically sound, well-structured; SFO v ENRC cite is legitimate.",
          "rank": 3
        }
      }
    },
    {
      "id": 308,
      "category": "International Trade",
      "use_case": "supply_chain_cbam_gcc_customs_japan_rules_of_origin",
      "models": {
        "claude-opus-4.8": {
          "total": 30.8,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 2.8,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 73.9,
          "note": "Exceptional cross-regime analysis; correctly flags EPA/CBAM independence and Art.27 risk.",
          "rank": 1
        },
        "claude-sonnet-4.6": {
          "total": 28.4,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.4,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 116.2,
          "note": "Exceptional cross-regime analysis; minor gap on GCC carbon pricing accuracy",
          "rank": 6
        },
        "gpt-5.5": {
          "total": 29.0,
          "quality": 9.0,
          "accuracy": 9.0,
          "speed": 1.0,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 143.6,
          "note": "Thorough, accurate, well-structured; truncated Incoterms section minor flaw.",
          "rank": 4
        },
        "o3": {
          "total": 30.1,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 4.1,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 18.5,
          "note": "Technically strong, minor date error on CBAM certificates start (2026 vs 2026 correct)",
          "rank": 2
        },
        "grok-4.3": {
          "total": 29.6,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 4.6,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 7.5,
          "note": "Solid, well-structured memo; CBAM penalty range slightly imprecise but defensible.",
          "rank": 3
        },
        "mistral-large": {
          "total": 27.2,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.2,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 35.7,
          "note": "Solid framework; EPA cumulation claim overstated, GCC data-blocking law citation questionable",
          "rank": 9
        },
        "llama-4-maverick": {
          "total": 22.0,
          "quality": 5.0,
          "accuracy": 6.0,
          "speed": 5.0,
          "style": 3.0,
          "creativity": 3.0,
          "halluc": false,
          "latency": 5.8,
          "note": "Superficial analysis; misses GCC customs union specifics and CBAM aluminium scope details",
          "rank": 10
        },
        "deepseek-v3.2": {
          "total": 27.3,
          "quality": 8.0,
          "accuracy": 8.0,
          "speed": 2.3,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 74.0,
          "note": "Solid CBAM/origin analysis; GCC customs union role underexplored.",
          "rank": 8
        },
        "qwen3.7-max": {
          "total": 28.9,
          "quality": 9.0,
          "accuracy": 8.0,
          "speed": 1.9,
          "style": 5.0,
          "creativity": 5.0,
          "halluc": false,
          "latency": 95.5,
          "note": "Technically rigorous, well-structured; CBAM-Ausführungsgesetz reference unverified but plausible.",
          "rank": 5
        },
        "gemini-3.1-pro": {
          "total": 27.7,
          "quality": 8.0,
          "accuracy": 7.0,
          "speed": 3.7,
          "style": 5.0,
          "creativity": 4.0,
          "halluc": false,
          "latency": 33.9,
          "note": "Solid analysis; minor inaccuracies on EPA tolerance rules and penalty figures.",
          "rank": 7
        }
      }
    }
  ]
}