Kopachelli
			's Collections
			 
		
			
				
				
	
	
	
		
			Paper
			
•
			2505.09388
			
•
			Published
				
			•
				
				308
			
 
	
	 
	
	
	
				Qwen/Qwen3-14B-GGUF
				
				
			
			Text Generation
			
• 
		
				15B
			• 
	
				Updated
					
				
				• 
					
					7.06k
				
	
				
• 
					
					51
				
 
		
	
	
	 
	
	
	
				Qwen/Qwen3-8B-GGUF
				
				
			
			Text Generation
			
• 
		
				8B
			• 
	
				Updated
					
				
				• 
					
					44.6k
				
	
				
• 
					
					64
				
 
		
	
	
	 
	
	
	
				Qwen/Qwen3-4B-GGUF
				
				
			
			Text Generation
			
• 
		
				4B
			• 
	
				Updated
					
				
				• 
					
					8.06k
				
	
				
• 
					
					35
				
 
		
	
	
	 
	
	
	
			
			Qwen2.5-Coder Technical Report
		
			Paper
			
•
			2409.12186
			
•
			Published
				
			•
				
				150
			
 
	
	 
	
	
	
				Qwen/Qwen2.5-Coder-7B-Instruct
				
				
			
			Text Generation
			
• 
		
				8B
			• 
	
				Updated
					
				
				• 
					
					264k
				
• 
			
	
				• 
					
					552
				
 
		
	
	
	 
	
	
	
				Qwen/Qwen2.5-Coder-14B
				
				
			
			Text Generation
			
• 
		
				15B
			• 
	
				Updated
					
				
				• 
					
					11.4k
				
• 
			
	
				• 
					
					48
				
 
		
	
	
	 
	
	
	
				Qwen/Qwen2.5-Coder-14B-Instruct
				
				
			
			Text Generation
			
• 
		
				15B
			• 
	
				Updated
					
				
				• 
					
					154k
				
• 
			
	
				• 
					
					131
				
 
		
	
	
	 
	
	
	
				Qwen/Qwen2.5-Coder-7B
				
				
			
			Text Generation
			
• 
		
				8B
			• 
	
				Updated
					
				
				• 
					
					20.5k
				
• 
			
	
				• 
					
					126
				
 
		
	
	
	 
	
	
	
			
			DeepSeek-V3 Technical Report
		
			Paper
			
•
			2412.19437
			
•
			Published
				
			•
				
				71
			
 
	
	 
	
	
	
			
			DeepSeek-Coder-V2: Breaking the Barrier of Closed-Source Models in Code
  Intelligence
		
			Paper
			
•
			2406.11931
			
•
			Published
				
			•
				
				65
			
 
	
	 
	
	
	
				nvidia/Llama-3.1-Nemotron-Nano-8B-v1
				
				
			
			Text Generation
			
• 
		
				8B
			• 
	
				Updated
					
				
				• 
					
					16.3k
				
• 
			
	
				• 
					
					210
				
 
		
	
	
	 
	
	
	
			
			Llama-Nemotron: Efficient Reasoning Models
		
			Paper
			
•
			2505.00949
			
•
			Published
				
			•
				
				42
			
 
	
	 
	
	
	
			
			AIMO-2 Winning Solution: Building State-of-the-Art Mathematical
  Reasoning Models with OpenMathReasoning dataset
		
			Paper
			
•
			2504.16891
			
•
			Published
				
			•
				
				25
			
 
	
	 
	
	
	
			
			OpenCodeReasoning-II: A Simple Test Time Scaling Approach via
  Self-Critique
		
			Paper
			
•
			2507.09075
			
•
			Published
				
			•
				
				15
			
 
	
	 
	
	
	
				tencent/Hunyuan-7B-Instruct
				
				
			
			Text Generation
			
• 
		
				8B
			• 
	
				Updated
					
				
				• 
					
					4.47k
				
	
				
• 
					
					82
				
 
		
	
	
	 
	
	
	
			
			GLM-4.1V-Thinking: Towards Versatile Multimodal Reasoning with Scalable
  Reinforcement Learning
		
			Paper
			
•
			2507.01006
			
•
			Published
				
			•
				
				237
			
 
	
	 
	
	
	
				zai-org/GLM-4.1V-9B-Thinking
				
				
			
			Image-Text-to-Text
			
• 
		
				10B
			• 
	
				Updated
					
				
				• 
					
					254k
				
• 
			
	
				• 
					
					752
				
 
		
	
	
	 
	
	
	
				zai-org/GLM-4.1V-9B-Base
				
				
			
			Image-Text-to-Text
			
• 
		
				10B
			• 
	
				Updated
					
				
				• 
					
					9.11k
				
	
				
• 
					
					59
				
 
		
	
	
	 
	
	
	
			
			Let the Expert Stick to His Last: Expert-Specialized Fine-Tuning for
  Sparse Architectural Large Language Models
		
			Paper
			
•
			2407.01906
			
•
			Published
				
			•
				
				43
			
 
	
	 
	
	
	
				deepseek-ai/deepseek-moe-16b-base
				
				
			
			Text Generation
			
• 
		
				16B
			• 
	
				Updated
					
				
				• 
					
					13.2k
				
	
				
• 
					
					129
				
 
		
	
	
	 
	
	
	
			
			DeepSeekMoE: Towards Ultimate Expert Specialization in
  Mixture-of-Experts Language Models
		
			Paper
			
•
			2401.06066
			
•
			Published
				
			•
				
				56
			
 
	
	 
	
	
	
				deepseek-ai/deepseek-moe-16b-chat
				
				
			
			Text Generation
			
• 
		
				16B
			• 
	
				Updated
					
				
				• 
					
					4.56k
				
	
				
• 
					
					149
				
 
		
	
	
	 
	
	
	
				Skywork/Skywork-VL-Reward-7B
				
				
			
			Image-Text-to-Text
			
• 
		
				8B
			• 
	
				Updated
					
				
				• 
					
					583
				
	
				
• 
					
					46
				
 
		
	
	
	 
	
	
	
				Mungert/xLAM-2-32b-fc-r-GGUF
				
				
			
			Text Generation
			
• 
		
				33B
			• 
	
				Updated
					
				
				• 
					
					324
				
	
				
• 
					
					5
				
 
		
	
	
	 
	
	
	
				zai-org/SWE-Dev-7B
				
				
			
		
				8B
			• 
	
				Updated
					
				
				• 
					
					251
				
	
				
• 
					
					6
				
  
		
	
	
	 
	
	
	
				Mungert/Skywork-VL-Reward-7B-GGUF
				
				
			
			Image-Text-to-Text
			
• 
		
				8B
			• 
	
				Updated
					
				
				• 
					
					487
				
	
				
				
 
		
	
	
	 
	
	
	
				Skywork/Skywork-o1-Open-PRM-Qwen-2.5-1.5B
				
				
			
			Text Classification
			
• 
		
	
				Updated
					
				
				• 
					
					2.47k
				
	
				
• 
					
					33
				
 
		
	
	
	 
	
	
	
				jnorthrup/Skywork-o1-Open-PRM-Qwen-2.5-7B
				
				
			
			Text Classification
			
• 
		
				8B
			• 
	
				Updated
					
				
				• 
					
					1
				
	
				
				
 
		
	
	
	 
	
	
	
				mistralai/Mixtral-8x7B-Instruct-v0.1
				
				
			
		
				47B
			• 
	
				Updated
					
				
				• 
					
					549k
				
	
				
• 
					
					4.59k
				
  
		
	
	
	 
	
	
	
				CodeDPO/mimo-7b-base-deepcoder-120steps
				
				
			 
		
	
	
	 
	
	
	
				Mungert/granite-guardian-3.1-8b-GGUF
				
				
			
			Text Generation
			
• 
		
				8B
			• 
	
				Updated
					
				
				• 
					
					99
				
	
				
				
 
		
	
	
	 
	
	
	
				ariels/pest_twitter_geoparsing
				
				
			
			Viewer
			
• 
	
				Updated
					
				• 
			
			678
	
				• 
					
					12