| | import flask |
| | from flask import request, jsonify |
| | from transformers import pipeline, AutoTokenizer |
| | import torch |
| | import warnings |
| |
|
| | |
| | warnings.filterwarnings("ignore") |
| |
|
| | app = flask.Flask(__name__) |
| |
|
| | |
| | |
| | |
| | |
| | model_id = "HuggingFaceTB/SmolLM-1.7B" |
| | print("π Loading model...") |
| |
|
| | |
| | device = 0 if torch.cuda.is_available() else -1 |
| | |
| | dtype = torch.float32 if device == -1 else torch.bfloat16 |
| |
|
| | try: |
| | |
| | tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) |
| | if tokenizer.pad_token is None: |
| | |
| | tokenizer.pad_token = tokenizer.eos_token |
| |
|
| | |
| | ai = pipeline( |
| | "text-generation", |
| | model=model_id, |
| | tokenizer=tokenizer, |
| | max_new_tokens=200, |
| | device=device, |
| | torch_dtype=dtype, |
| | trust_remote_code=True |
| | ) |
| | print("β
Model loaded!") |
| | except Exception as e: |
| | print(f"β Error loading model: {e}") |
| | ai = None |
| |
|
| | |
| | |
| | |
| | @app.route('/chat', methods=['POST']) |
| | def chat(): |
| | if ai is None: |
| | return jsonify({"error": "Model initialization failed."}), 500 |
| | |
| | try: |
| | data = request.get_json() |
| | msg = data.get("message", "") |
| | if not msg: |
| | return jsonify({"error": "No message sent"}), 400 |
| |
|
| | |
| | prompt = f"User: {msg}\nAssistant:" |
| | |
| | output = ai(prompt)[0]["generated_text"] |
| | |
| | |
| | |
| | if "Assistant:" in output: |
| | reply = output.split("Assistant:")[-1].strip() |
| | elif "User:" in output: |
| | reply = output.split("User:")[0].strip() |
| | else: |
| | reply = output.strip() |
| | |
| | |
| | if reply.startswith(msg): |
| | reply = reply[len(msg):].strip() |
| |
|
| | return jsonify({"reply": reply}) |
| | except Exception as e: |
| | return jsonify({"error": str(e)}), 500 |
| |
|
| | |
| | |
| | |
| | if __name__ == "__main__": |
| | app.run(host='0.0.0.0', port=7860) |