Rainnighttram
/

GLM-4.1V-9B-Thinking-bnb-4bit

@@ -92,7 +92,7 @@ if __name__ == "__main__":
 Script to test the vision model
 ```python
 import base64
-import subprocess
 import json
 import os
@@ -109,51 +109,44 @@ def encode_image_to_base64(image_path):
         print(f"Error encoding image: {str(e)}")
         return None
-def send_curl_request(base64_image):
-    """Send base64 image to vision model server using curl in completions format."""
     try:
-        # Prepare JSON payload
         payload = {
             "prompt": "Describe this image in detail.",
             "image": base64_image,
             "max_tokens": 300
         }
-        payload_json = json.dumps(payload)
-        curl_cmd = [
-            "curl",
-            "-X", "POST",
             "http://localhost:8000/v1/completions",
-            "-H", "Content-Type: application/json",
-            "-d", payload_json
-        ]
-        result = subprocess.run(curl_cmd, capture_output=True, text=True)
-        if result.returncode == 0:
             print("Server response:")
-            print(result.stdout)
         else:
-            print(f"Curl command failed with code {result.returncode}:")
-            print(result.stderr)
-    except subprocess.SubprocessError as e:
-        print(f"Error executing curl command: {str(e)}")
     except Exception as e:
         print(f"Unexpected error: {str(e)}")
 def main():
-    image_path = "/path/to/your/image.jpeg"
     base64_image = encode_image_to_base64(image_path)
     if base64_image:
-        # Send request
-        send_curl_request(base64_image)
 if __name__ == "__main__":
     main()
 ```
-The quantized model can be loaded using a single GPU with VRAM larger than 8GB (Tested on Tesla T10)
 ```bash
 +-----------------------------------------------------------------------------------------+
 | NVIDIA-SMI 570.133.20             Driver Version: 570.133.20     CUDA Version: 12.8     |

 Script to test the vision model
 ```python
 import base64
+import requests
 import json
 import os
         print(f"Error encoding image: {str(e)}")
         return None
+def send_request(base64_image):
+    """Send base64 image to vision model server using requests."""
     try:
         payload = {
             "prompt": "Describe this image in detail.",
             "image": base64_image,
             "max_tokens": 300
         }
+        response = requests.post(
             "http://localhost:8000/v1/completions",
+            headers={"Content-Type": "application/json"},
+            json=payload
+        )
+        if response.status_code == 200:
             print("Server response:")
+            print(response.text)
         else:
+            print(f"Request failed with status code {response.status_code}:")
+            print(response.text)
+    except requests.RequestException as e:
+        print(f"Error sending request: {str(e)}")
     except Exception as e:
         print(f"Unexpected error: {str(e)}")
 def main():
+    image_path = "/path/to/sample.jpeg"
     base64_image = encode_image_to_base64(image_path)
     if base64_image:
+        send_request(base64_image)
 if __name__ == "__main__":
     main()
 ```
+The quantized model can be loaded using a single GPU with VRAM larger than 12GB (Tested on Tesla T10)
 ```bash
 +-----------------------------------------------------------------------------------------+
 | NVIDIA-SMI 570.133.20             Driver Version: 570.133.20     CUDA Version: 12.8     |