๐Ÿ“ˆ CapArena-Auto Leaderboard ๐Ÿ“Š

๐Ÿ“‘ Paper | MODELS: 22 | UPDATED: 2025-03-16 14:32:59
๐Ÿ’ฌ Metric Explanations

CapArena-Auto is an arena-style automated evaluation benchmark for detailed captioning. It includes 600 evaluation images and assesses model performance through pairwise battles with three baseline models. The final score is calculated by GPT4o-as-a-Judge.

{
  • "headers": [
    • "",
    • "Model",
    • "Score_Avg โฌ†๏ธ",
    • "Score_GPT",
    • "Score_COG",
    • "Score_CPM",
    • "Length_Avg",
    • "Available on the hub"
    ],
  • "data": [
    • [
      • 1,
      • "<a target="_blank" href="https://ai.google.dev/gemini-api/docs/models/gemini" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">๐Ÿ”’ Gemini-1.5-pro-002</a>",
      • 56.17,
      • 29,
      • 61,
      • 78.5,
      • 168.56,
      • false
      ],
    • [
      • 2,
      • "<a target="_blank" href="https://platform.openai.com/docs/models/gpt-4o" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">๐Ÿ”’ GPT-4o-0806</a>",
      • 44,
      • 0,
      • 55.5,
      • 76.5,
      • 115.8,
      • false
      ],
    • [
      • 3,
      • "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2.5-VL-72B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">๐Ÿ”‘ Qwen2.5VL-72B</a>",
      • 35.33,
      • -1,
      • 49,
      • 58,
      • 163.67,
      • true
      ],
    • [
      • 4,
      • "<a target="_blank" href="https://ai.google.dev/gemini-api/docs/models/gemini" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">๐Ÿ”’ Gemini-2.0-flash-exp</a>",
      • 30.83,
      • -2,
      • 39.5,
      • 55,
      • 416.99,
      • false
      ],
    • [
      • 5,
      • "<a target="_blank" href="https://huggingface.co/AIDC-AI/Ovis2-34B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">๐Ÿ”‘ Ovis-2-34b</a>",
      • 27,
      • -15,
      • 33.5,
      • 62.5,
      • 120.2,
      • true
      ],
    • [
      • 6,
      • "<a target="_blank" href="https://www.anthropic.com/news/claude-3-5-sonnet" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">๐Ÿ”’ Claude-3.5-Sonnet-0620</a>",
      • 21.5,
      • -14,
      • 30,
      • 48.5,
      • 147.93,
      • false
      ],
    • [
      • 7,
      • "<a target="_blank" href="https://huggingface.co/OpenGVLab/InternVL2-26B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">๐Ÿ”‘ InternVL2-26B</a>",
      • 13,
      • -38.5,
      • 20,
      • 57.5,
      • 236.32,
      • true
      ],
    • [
      • 8,
      • "<a target="_blank" href="https://platform.openai.com/docs/models/gpt-4o" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">๐Ÿ”’ GPT-4o-mini-0718</a>",
      • 9.33,
      • -36,
      • 17,
      • 47,
      • 139.83,
      • false
      ],
    • [
      • 9,
      • "<a target="_blank" href="https://huggingface.co/AIDC-AI/Ovis1.6-Gemma2-27B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">๐Ÿ”‘ Ovis-1_6-27b</a>",
      • 3,
      • -49.5,
      • 14.5,
      • 44,
      • 94.16,
      • true
      ],
    • [
      • 10,
      • "<a target="_blank" href="https://github.com/THUDM/GLM-4" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">๐Ÿ”’ GLM-4V-Plus</a>",
      • -0.17,
      • -51.5,
      • 13,
      • 38,
      • 109.27,
      • false
      ],
    • [
      • 11,
      • "<a target="_blank" href="https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">๐Ÿ”‘ CogVLM2-llama3-chat-19B</a>",
      • -8.5,
      • -56.5,
      • 0,
      • 31,
      • 115.87,
      • true
      ],
    • [
      • 12,
      • "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">๐Ÿ”‘ Qwen2-VL-72B-Instruct</a>",
      • -9,
      • -50.5,
      • -4.5,
      • 28,
      • 114.45,
      • true
      ],
    • [
      • 13,
      • "<a target="_blank" href="https://huggingface.co/llava-hf/llava-onevision-qwen2-72b-ov-hf" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">๐Ÿ”‘ LLaVA-OV-72b</a>",
      • -12.33,
      • -57.5,
      • -6,
      • 26.5,
      • 200.88,
      • true
      ],
    • [
      • 14,
      • "<a target="_blank" href="https://huggingface.co/meta-llama/Llama-3.2-90B-Vision-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">๐Ÿ”‘ LLama-3.2-90B</a>",
      • -25.67,
      • -72,
      • -13,
      • 8,
      • 160.25,
      • true
      ],
    • [
      • 15,
      • "<a target="_blank" href="https://cloud.tencent.com/document/product/1729/101832" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">๐Ÿ”’ Hunyuan-standard-vision</a>",
      • -26,
      • -63,
      • -19,
      • 4,
      • 354.1,
      • false
      ],
    • [
      • 16,
      • "<a target="_blank" href="https://huggingface.co/OpenGVLab/InternVL2_5-8B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">๐Ÿ”‘ Internvl2-5-8b</a>",
      • -29.83,
      • -71,
      • -29,
      • 10.5,
      • 117.77,
      • true
      ],
    • [
      • 17,
      • "<a target="_blank" href="https://huggingface.co/openbmb/MiniCPM-V-2_6" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">๐Ÿ”‘ MiniCPM-V2.6-8B</a>",
      • -38,
      • -80,
      • -34,
      • 0,
      • 106.74,
      • true
      ],
    • [
      • 18,
      • "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">๐Ÿ”‘ Qwen2-VL-2B-Instruct</a>",
      • -48.67,
      • -86,
      • -49.5,
      • -10.5,
      • 116.84,
      • true
      ],
    • [
      • 19,
      • "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">๐Ÿ”‘ Qwen2-VL-7B-Instruct</a>",
      • -49,
      • -78,
      • -59,
      • -10,
      • 97.81,
      • true
      ],
    • [
      • 20,
      • "<a target="_blank" href="https://huggingface.co/llava-hf/llava-v1.6-34b-hf" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">๐Ÿ”‘ LLaVA-1.6-34B</a>",
      • -67.5,
      • -92,
      • -53.5,
      • -57,
      • 124.81,
      • true
      ],
    • [
      • 21,
      • "<a target="_blank" href="https://huggingface.co/nyu-visionx/cambrian-34b" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">๐Ÿ”‘ cambrian-34b</a>",
      • -75,
      • -93,
      • -76,
      • -56,
      • 120.23,
      • true
      ],
    • [
      • 22,
      • "<a target="_blank" href="https://huggingface.co/llava-hf/llava-1.5-7b-hf" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">๐Ÿ”‘ LLaVA-1.5-7B</a>",
      • -94,
      • -99.5,
      • -92,
      • -90.5,
      • 74.38,
      • true
      ]
    ],
  • "metadata": null
}