Polish_Cultural_Vision_Benchmark / benchmark_report.json
Sekon's picture
Update benchmark_report.json
5894291 verified
[
{
"Model Name":"Google Gemini 2.5 Pro",
"Model Size":"-",
"Avg (object)":43.0,
"Avg (country)":76.77,
"Art & Entertainment (object)":34.48,
"Art & Entertainment (country)":62.07,
"Culture & Tradition (object)":18.33,
"Culture & Tradition (country)":70.0,
"Geography (object)":71.67,
"Geography (country)":90.0,
"History (object)":47.5,
"History (country)":85.0
},
{
"Model Name":"Google Gemini 2.5 Flash",
"Model Size":"-",
"Avg (object)":37.52,
"Avg (country)":67.66,
"Art & Entertainment (object)":27.59,
"Art & Entertainment (country)":44.83,
"Culture & Tradition (object)":21.67,
"Culture & Tradition (country)":60.0,
"Geography (object)":68.33,
"Geography (country)":88.33,
"History (object)":32.5,
"History (country)":77.5
},
{
"Model Name":"Anthropic Claude 3.7 Sonnet",
"Model Size":"-",
"Avg (object)":37.06,
"Avg (country)":62.46,
"Art & Entertainment (object)":22.41,
"Art & Entertainment (country)":44.83,
"Culture & Tradition (object)":15.0,
"Culture & Tradition (country)":41.67,
"Geography (object)":58.33,
"Geography (country)":83.33,
"History (object)":52.5,
"History (country)":80.0
},
{
"Model Name":"OpenAI GPT-4o",
"Model Size":"-",
"Avg (object)":28.94,
"Avg (country)":42.49,
"Art & Entertainment (object)":22.41,
"Art & Entertainment (country)":24.14,
"Culture & Tradition (object)":18.33,
"Culture & Tradition (country)":53.33,
"Geography (object)":45.0,
"Geography (country)":55.0,
"History (object)":30.0,
"History (country)":37.5
},
{
"Model Name":"Qwen 2.5 VL 72B",
"Model Size":"72",
"Avg (object)":23.91,
"Avg (country)":51.51,
"Art & Entertainment (object)":18.97,
"Art & Entertainment (country)":31.03,
"Culture & Tradition (object)":10.0,
"Culture & Tradition (country)":33.33,
"Geography (object)":31.67,
"Geography (country)":71.67,
"History (object)":35.0,
"History (country)":70.0
},
{
"Model Name":"Qwen 2.5 VL 32B",
"Model Size":"32",
"Avg (object)":22.27,
"Avg (country)":48.8,
"Art & Entertainment (object)":22.41,
"Art & Entertainment (country)":31.03,
"Culture & Tradition (object)":8.33,
"Culture & Tradition (country)":30.0,
"Geography (object)":28.33,
"Geography (country)":66.67,
"History (object)":30.0,
"History (country)":67.5
},
{
"Model Name":"Qwen 2.5 VL 7B",
"Model Size":"7",
"Avg (object)":21.62,
"Avg (country)":44.72,
"Art & Entertainment (object)":18.97,
"Art & Entertainment (country)":15.52,
"Culture & Tradition (object)":6.67,
"Culture & Tradition (country)":31.67,
"Geography (object)":28.33,
"Geography (country)":66.67,
"History (object)":32.5,
"History (country)":65.0
},
{
"Model Name":"Google Gemma 3 27B",
"Model Size":"27",
"Avg (object)":19.14,
"Avg (country)":43.76,
"Art & Entertainment (object)":22.41,
"Art & Entertainment (country)":25.86,
"Culture & Tradition (object)":13.33,
"Culture & Tradition (country)":48.33,
"Geography (object)":28.33,
"Geography (country)":48.33,
"History (object)":12.5,
"History (country)":52.5
},
{
"Model Name":"Meta Llama 4 Maverick",
"Model Size":"400",
"Avg (object)":17.49,
"Avg (country)":42.98,
"Art & Entertainment (object)":24.14,
"Art & Entertainment (country)":32.76,
"Culture & Tradition (object)":8.33,
"Culture & Tradition (country)":36.67,
"Geography (object)":20.0,
"Geography (country)":50.0,
"History (object)":17.5,
"History (country)":52.5
},
{
"Model Name":"Mistral Medium 3",
"Model Size":"-",
"Avg (object)":17.45,
"Avg (country)":45.99,
"Art & Entertainment (object)":18.97,
"Art & Entertainment (country)":18.97,
"Culture & Tradition (object)":6.67,
"Culture & Tradition (country)":43.33,
"Geography (object)":31.67,
"Geography (country)":56.67,
"History (object)":12.5,
"History (country)":65.0
},
{
"Model Name":"Google Gemma 3 12B",
"Model Size":"12",
"Avg (object)":13.06,
"Avg (country)":40.04,
"Art & Entertainment (object)":17.24,
"Art & Entertainment (country)":29.31,
"Culture & Tradition (object)":10.0,
"Culture & Tradition (country)":41.67,
"Geography (object)":15.0,
"Geography (country)":46.67,
"History (object)":10.0,
"History (country)":42.5
},
{
"Model Name":"Mistral Small 3.1 24B",
"Model Size":"24",
"Avg (object)":12.41,
"Avg (country)":26.17,
"Art & Entertainment (object)":13.79,
"Art & Entertainment (country)":15.52,
"Culture & Tradition (object)":6.67,
"Culture & Tradition (country)":15.0,
"Geography (object)":21.67,
"Geography (country)":36.67,
"History (object)":7.5,
"History (country)":37.5
},
{
"Model Name":"Google Gemma 3 4B",
"Model Size":"4",
"Avg (object)":9.72,
"Avg (country)":35.84,
"Art & Entertainment (object)":17.24,
"Art & Entertainment (country)":25.86,
"Culture & Tradition (object)":8.33,
"Culture & Tradition (country)":31.67,
"Geography (object)":8.33,
"Geography (country)":38.33,
"History (object)":5.0,
"History (country)":47.5
}
]