| 1 |
gpt-5 |
83.25 |
96.64 |
77.89 |
75.21 |
|
2218 |
| 2 |
gpt-5-mini |
80.78 |
96.71 |
74.31 |
71.32 |
|
2219 |
| 3 |
claude-sonnet-4-20250514-thinking |
79.86 |
90.35 |
75.95 |
73.29 |
|
2219 |
| 4 |
claude-sonnet-4-20250514 |
78.55 |
87.83 |
75.03 |
72.80 |
|
2219 |
| 5 |
claude-sonnet-4-5-20250929 |
78.10 |
87.64 |
74.48 |
72.20 |
|
2217 |
| 6 |
Qwen3-Coder-480B-A35B-Instruct |
76.44 |
85.29 |
73.05 |
70.98 |
|
2219 |
| 7 |
gpt-4.1-2025-04-14 |
76.01 |
91.76 |
69.68 |
66.60 |
|
2219 |
| 8 |
qwen3-coder-plus |
75.86 |
84.70 |
72.36 |
70.52 |
|
2204 |
| 9 |
DeepSeek-R1-0528 |
75.62 |
87.52 |
72.33 |
67.01 |
|
2219 |
| 10 |
o3 |
75.44 |
92.26 |
68.10 |
65.95 |
|
2219 |
| 11 |
gemini-2.5-pro |
74.62 |
89.14 |
67.98 |
66.75 |
|
2219 |
| 12 |
qwen3-max |
74.53 |
85.05 |
70.38 |
68.15 |
|
2219 |
| 13 |
deepseek-v3.2-exp |
73.75 |
84.28 |
69.57 |
67.39 |
|
2218 |
| 14 |
Qwen3-235B-A22B-Thinking-2507 |
73.05 |
84.57 |
68.01 |
66.57 |
|
2218 |
| 15 |
Qwen3-Coder-30B-A3B-Instruct |
72.78 |
83.80 |
69.03 |
65.52 |
|
2219 |
| 16 |
GLM-4.5 |
72.18 |
84.20 |
67.68 |
64.66 |
|
2219 |
| 17 |
o4-mini |
71.97 |
87.83 |
64.99 |
63.10 |
|
2219 |
| 18 |
gpt-oss-120b |
71.19 |
90.07 |
63.05 |
60.45 |
|
2219 |
| 19 |
gemini-2.5-flash |
70.90 |
92.91 |
61.44 |
58.35 |
|
2218 |
| 20 |
Qwen3-235B-A22B-Instruct-2507 |
70.53 |
85.39 |
64.23 |
61.97 |
|
2219 |
| 21 |
qwen3-next-80b-a3b-instruct |
69.67 |
83.95 |
63.66 |
61.40 |
|
2219 |
| 22 |
DeepSeek-V3-0324 |
69.28 |
83.03 |
64.18 |
60.62 |
|
2219 |
| 23 |
Moonshot-Kimi-K2-Instruct |
69.11 |
84.14 |
62.59 |
60.62 |
|
2219 |
| 24 |
Qwen3-235B-A22B |
68.44 |
81.36 |
62.16 |
61.79 |
|
2219 |
| 25 |
chatgpt-4o-latest |
68.41 |
82.62 |
62.58 |
60.04 |
|
2218 |
| 26 |
gpt-oss-20b |
68.26 |
88.83 |
58.77 |
57.17 |
|
2219 |
| 27 |
Qwen3-30B-A3B-Thinking-2507 |
67.65 |
80.83 |
61.68 |
60.44 |
|
2219 |
| 28 |
GLM-4.5-Air |
67.47 |
84.80 |
60.79 |
56.80 |
|
2219 |
| 29 |
qwen3-next-80b-a3b-thinking |
66.96 |
77.68 |
62.10 |
61.10 |
|
2219 |
| 30 |
Qwen3-32B |
66.36 |
81.73 |
59.34 |
58.02 |
|
2219 |
| 31 |
Qwen3-30B-A3B-Instruct-2507 |
65.87 |
81.43 |
59.07 |
57.13 |
|
2219 |
| 32 |
DeepSeek-R1 |
64.64 |
80.46 |
57.56 |
55.88 |
|
2219 |
| 33 |
Qwen3-30B-A3B |
62.72 |
78.57 |
55.66 |
53.93 |
|
2219 |
| 34 |
QwQ-32B |
62.56 |
79.18 |
54.84 |
53.65 |
|
2219 |
| 35 |
Qwen3-14B |
61.90 |
79.22 |
54.60 |
51.86 |
|
2219 |
| 36 |
gpt-4o-2024-11-20 |
60.65 |
76.85 |
53.23 |
51.87 |
|
2219 |
| 37 |
Zhihu-ai-Zhi-Create-Qwen3-32B |
58.63 |
75.95 |
52.14 |
47.80 |
|
2088 |
| 38 |
DeepSeek-V3 |
58.07 |
73.06 |
51.44 |
49.72 |
|
2219 |
| 39 |
Qwen3-8B |
57.63 |
76.32 |
49.25 |
47.32 |
|
2219 |
| 40 |
o3-mini-2025-01-31 |
56.00 |
89.32 |
40.03 |
38.65 |
|
2218 |
| 41 |
DeepSeek-R1-Distill-Llama-70B |
55.13 |
73.73 |
47.03 |
44.64 |
|
2219 |
| 42 |
Qwen3-4B-Thinking-2507 |
52.96 |
70.20 |
45.19 |
43.50 |
|
2218 |
| 43 |
Qwen2.5-72B-Instruct |
52.86 |
72.86 |
43.87 |
41.86 |
|
2218 |
| 44 |
Qwen3-4B |
52.44 |
72.95 |
42.88 |
41.48 |
|
2218 |
| 45 |
gpt-4o-mini-2024-07-18 |
51.94 |
70.56 |
43.58 |
41.70 |
|
2219 |
| 46 |
Seed-Coder-8B-Instruct |
51.32 |
73.37 |
41.35 |
39.23 |
|
2219 |
| 47 |
DeepSeek-R1-Distill-Qwen-32B |
51.30 |
71.63 |
42.47 |
39.82 |
|
2219 |
| 48 |
Qwen2.5-Coder-32B-Instruct |
50.86 |
74.03 |
40.26 |
38.28 |
|
2219 |
| 49 |
Qwen2.5-32B-Instruct |
47.99 |
65.89 |
39.76 |
38.32 |
|
2219 |
| 50 |
Llama-3.1-8B-Instruct |
43.91 |
62.49 |
35.90 |
33.33 |
|
2218 |
| 51 |
Qwen2.5-14B-Instruct |
43.87 |
66.12 |
33.77 |
31.70 |
|
2123 |
| 52 |
Qwen2.5-Coder-14B-Instruct |
43.73 |
68.28 |
32.94 |
29.99 |
|
2219 |
| 53 |
Qwen2.5-Coder-7B-Instruct |
37.35 |
64.06 |
25.42 |
22.59 |
|
2216 |
| 54 |
DeepSeek-R1-Distill-Qwen-14B |
36.95 |
64.76 |
24.25 |
21.83 |
|
2219 |
| 55 |
Qwen2.5-7B-Instruct |
36.21 |
60.18 |
25.30 |
23.17 |
|
2219 |
| 56 |
Qwen3-1.7B |
33.07 |
57.01 |
22.56 |
19.65 |
|
2138 |
| 57 |
Llama-3.2-3B-Instruct |
32.90 |
55.07 |
23.82 |
19.83 |
|
2219 |
| 58 |
deepseek-coder-7b-instruct-v1.5 |
32.21 |
53.85 |
22.60 |
20.17 |
|
2219 |
| 59 |
OpenCoder-8B-Instruct |
25.40 |
48.61 |
16.00 |
11.58 |
|
2219 |
| 60 |
Qwen2.5-Coder-3B-Instruct |
24.28 |
53.38 |
12.50 |
6.95 |
|
2219 |
| 61 |
Hunyuan-7B-Instruct |
23.93 |
58.04 |
7.87 |
5.89 |
|
2219 |
| 62 |
Qwen2.5-3B-Instruct |
23.25 |
46.67 |
12.97 |
10.09 |
|
2201 |
| 63 |
Qwen2.5-Coder-1.5B-Instruct |
19.19 |
46.38 |
8.88 |
2.31 |
|
2219 |
| 64 |
Llama-3.2-1B-Instruct |
17.18 |
39.50 |
8.28 |
3.75 |
|
2219 |
| 65 |
Qwen2.5-1.5B-Instruct |
16.79 |
39.89 |
7.10 |
3.39 |
|
2219 |
| 66 |
DeepSeek-R1-Distill-Llama-8B |
15.94 |
41.70 |
3.74 |
2.37 |
|
2219 |
| 67 |
DeepSeek-R1-0528-Qwen3-8B |
14.17 |
40.54 |
1.08 |
0.88 |
|
2219 |
| 68 |
Qwen3-0.6B |
14.14 |
34.84 |
6.04 |
1.52 |
|
2219 |
| 69 |
Qwen2.5-Coder-0.5B-Instruct |
13.07 |
34.75 |
4.32 |
0.16 |
|
2219 |
| 70 |
DeepSeek-R1-Distill-Qwen-7B |
12.32 |
35.85 |
0.77 |
0.33 |
|
2218 |
| 71 |
Qwen2.5-0.5B-Instruct |
11.13 |
31.14 |
2.03 |
0.22 |
|
2218 |
| 72 |
DeepSeek-R1-Distill-Qwen-1.5B |
8.63 |
25.85 |
0.04 |
0.00 |
|
2219 |