amosnbn commited on
Commit
9961081
·
1 Parent(s): ffcccde
Files changed (1) hide show
  1. app.py +108 -46
app.py CHANGED
@@ -1,6 +1,6 @@
1
  # app.py
2
- # PapuaTranslate — Flask + SQLAlchemy (Supabase/SQLite) + mT5-LoRA (lazy)
3
- import os, re, logging, threading
4
  from datetime import datetime, timezone, timedelta
5
  from functools import wraps
6
  from flask import Flask, render_template, request, redirect, url_for, session, jsonify, flash
@@ -10,27 +10,26 @@ from werkzeug.middleware.proxy_fix import ProxyFix
10
  logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s")
11
  log = logging.getLogger("papua-app")
12
 
13
- # ===== Flask =====
14
- # Template HTML kamu ada di 'frontend/'
15
  app = Flask(__name__, template_folder="frontend", static_folder="static")
16
-
17
- # Trust reverse proxy di Hugging Face -> Flask tahu proto HTTPS & host asli (perlu untuk cookie/redirect)
18
  app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1)
19
 
20
- # Session config: aman untuk iframe/third-party, dan bekerja baik saat dibuka di tab langsung hf.space
21
  app.config.update(
22
  SECRET_KEY=os.getenv("SECRET_KEY", "dev-secret-change-me"),
23
  SESSION_COOKIE_NAME="hfspace_session",
24
- SESSION_COOKIE_SAMESITE="None", # penting untuk iframe / third-party
25
- SESSION_COOKIE_SECURE=True, # wajib True kalau SAMESITE=None
26
  SESSION_COOKIE_HTTPONLY=True,
27
  SESSION_COOKIE_PATH="/",
28
  PREFERRED_URL_SCHEME="https",
29
  )
30
- # Lama hidup session (untuk session.permanent=True)
31
  app.permanent_session_lifetime = timedelta(hours=8)
32
 
33
- # ===== DB: SQLAlchemy (Supabase Postgres / SQLite fallback) =====
 
 
 
 
34
  from sqlalchemy import create_engine, Column, Integer, Text, DateTime, ForeignKey, func
35
  from sqlalchemy.orm import declarative_base, sessionmaker, scoped_session, relationship
36
 
@@ -39,12 +38,10 @@ if not DATABASE_URL:
39
  DATABASE_URL = "sqlite:////tmp/app.db"
40
  log.warning("[DB] DATABASE_URL tidak diset; pakai SQLite /tmp/app.db")
41
  else:
42
- # normalisasi ke psycopg2 driver
43
  if DATABASE_URL.startswith("postgres://"):
44
  DATABASE_URL = DATABASE_URL.replace("postgres://", "postgresql+psycopg2://", 1)
45
  elif DATABASE_URL.startswith("postgresql://"):
46
  DATABASE_URL = DATABASE_URL.replace("postgresql://", "postgresql+psycopg2://", 1)
47
- # tambahkan sslmode kalau belum ada
48
  if DATABASE_URL.startswith("postgresql+psycopg2") and "sslmode=" not in DATABASE_URL:
49
  sep = "&" if "?" in DATABASE_URL else "?"
50
  DATABASE_URL = f"{DATABASE_URL}{sep}sslmode=require"
@@ -90,7 +87,7 @@ def login_required(fn):
90
  return fn(*args, **kwargs)
91
  return _wrap
92
 
93
- # ===== Prenorm (heuristik ringan) =====
94
  PAPUA_MAP = {
95
  r"\bsa\b": "saya", r"\bko\b": "kamu", r"\btra\b": "tidak", r"\bndak\b": "tidak",
96
  r"\bmo\b": "mau", r"\bpu\b": "punya", r"\bsu\b": "sudah", r"\bkong\b": "kemudian",
@@ -101,30 +98,45 @@ def prenorm(text: str) -> str:
101
  for pat, repl in PAPUA_MAP.items(): t = re.sub(pat, repl, t, flags=re.IGNORECASE)
102
  return t
103
 
104
- # ===== Model (lazy-load LoRA) =====
105
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
106
- from peft import PeftModel
107
-
108
- # Mulai dengan model kecil untuk uji UI; setelah lancar, ganti ke model kamu
109
- BASE_MODEL_ID = os.getenv("BASE_MODEL_ID", "google/mt5-small")
110
- ADAPTER_ID = os.getenv("ADAPTER_ID", "")
111
  DEVICE = "cuda" if os.getenv("DEVICE", "cpu") == "cuda" else "cpu"
112
 
113
  TOK = None
114
  MODEL = None
115
  _MODEL_LOCK = threading.Lock()
 
 
116
 
117
  def _load_model():
118
- global TOK, MODEL
119
- log.info("[MODEL] loading base=%s adapter=%s", BASE_MODEL_ID, ADAPTER_ID or "-")
120
- TOK = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
121
- base = AutoModelForSeq2SeqLM.from_pretrained(BASE_MODEL_ID)
122
- MODEL = PeftModel.from_pretrained(base, ADAPTER_ID) if ADAPTER_ID else base
123
- MODEL.eval().to(DEVICE)
124
- log.info("[MODEL] ready on %s", DEVICE)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
  def get_model():
127
- global MODEL
128
  if MODEL is None:
129
  with _MODEL_LOCK:
130
  if MODEL is None:
@@ -132,17 +144,36 @@ def get_model():
132
  return TOK, MODEL
133
 
134
  def translate_with_model(text: str, max_new_tokens: int = 48) -> str:
 
135
  tok, m = get_model()
136
- inputs = tok([text], return_tensors="pt").to(DEVICE)
137
- outputs = m.generate(
138
- **inputs,
139
- max_new_tokens=max_new_tokens,
 
 
 
 
 
 
 
140
  num_beams=4,
141
  length_penalty=0.9,
142
  no_repeat_ngram_size=3,
143
  early_stopping=True,
144
  )
145
- return tok.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
146
 
147
  # ===== Utils / logging =====
148
  @app.before_request
@@ -155,7 +186,30 @@ def _err(e):
155
  log.exception("Unhandled error")
156
  return "Internal Server Error", 500
157
 
158
- # ===== Debug endpoints (cek cookie session) =====
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  @app.get("/debug/session/set")
160
  def dbg_set():
161
  session.permanent = True
@@ -187,11 +241,8 @@ def login_post():
187
  u = s.query(User).filter_by(email=email).first()
188
  if not u or not verify_password(u, pwd):
189
  flash("Email atau password salah", "error"); return redirect(url_for("login_get"))
190
-
191
- # penting: buat session 'permanent' supaya cookie ditulis dengan benar
192
  session.permanent = True
193
  session["uid"], session["email"] = u.id, u.email
194
-
195
  return redirect(url_for("index"))
196
 
197
  @app.get("/register")
@@ -237,23 +288,34 @@ def about_page():
237
  @app.post("/translate")
238
  def api_translate():
239
  if not session.get("uid"):
240
- return jsonify({"error": "Unauthorized"}), 401
 
241
  payload = request.get_json(silent=True) or {}
242
  text = (payload.get("text") or "").strip()
243
  max_new = int(payload.get("max_new_tokens", 48))
244
  if not text:
245
- return jsonify({"error": "Empty text"}), 400
 
246
  try:
247
  clean = prenorm(text)
248
- mt = translate_with_model(clean, max_new_tokens=max_new)
 
 
 
 
 
 
 
249
  with SessionLocal() as s:
250
  s.add(Translation(user_id=session["uid"], src=text, mt=mt))
251
  s.commit()
252
- return jsonify({"mt": mt})
253
- except Exception:
254
- log.exception("translate error")
255
- return jsonify({"error": "server error"}), 500
 
 
 
256
 
257
  if __name__ == "__main__":
258
- # Jalankan lokal: python app.py, atau di HF pakai gunicorn
259
  app.run(host="0.0.0.0", port=int(os.getenv("PORT", "7860")), debug=False)
 
1
  # app.py
2
+ # PapuaTranslate — Flask + SQLAlchemy (Supabase/SQLite) + mT5-LoRA (lazy) + diag + preload
3
+ import os, re, logging, threading, traceback
4
  from datetime import datetime, timezone, timedelta
5
  from functools import wraps
6
  from flask import Flask, render_template, request, redirect, url_for, session, jsonify, flash
 
10
  logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s")
11
  log = logging.getLogger("papua-app")
12
 
13
+ # ===== Flask / Template =====
 
14
  app = Flask(__name__, template_folder="frontend", static_folder="static")
 
 
15
  app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1)
16
 
 
17
  app.config.update(
18
  SECRET_KEY=os.getenv("SECRET_KEY", "dev-secret-change-me"),
19
  SESSION_COOKIE_NAME="hfspace_session",
20
+ SESSION_COOKIE_SAMESITE="None",
21
+ SESSION_COOKIE_SECURE=True,
22
  SESSION_COOKIE_HTTPONLY=True,
23
  SESSION_COOKIE_PATH="/",
24
  PREFERRED_URL_SCHEME="https",
25
  )
 
26
  app.permanent_session_lifetime = timedelta(hours=8)
27
 
28
+ # ===== Feature flags via ENV =====
29
+ PRELOAD_MODEL = os.getenv("PRELOAD_MODEL", "true").lower() in ("1","true","yes")
30
+ FALLBACK_TRANSLATE = os.getenv("FALLBACK_TRANSLATE", "false").lower() in ("1","true","yes")
31
+
32
+ # ===== DB: SQLAlchemy =====
33
  from sqlalchemy import create_engine, Column, Integer, Text, DateTime, ForeignKey, func
34
  from sqlalchemy.orm import declarative_base, sessionmaker, scoped_session, relationship
35
 
 
38
  DATABASE_URL = "sqlite:////tmp/app.db"
39
  log.warning("[DB] DATABASE_URL tidak diset; pakai SQLite /tmp/app.db")
40
  else:
 
41
  if DATABASE_URL.startswith("postgres://"):
42
  DATABASE_URL = DATABASE_URL.replace("postgres://", "postgresql+psycopg2://", 1)
43
  elif DATABASE_URL.startswith("postgresql://"):
44
  DATABASE_URL = DATABASE_URL.replace("postgresql://", "postgresql+psycopg2://", 1)
 
45
  if DATABASE_URL.startswith("postgresql+psycopg2") and "sslmode=" not in DATABASE_URL:
46
  sep = "&" if "?" in DATABASE_URL else "?"
47
  DATABASE_URL = f"{DATABASE_URL}{sep}sslmode=require"
 
87
  return fn(*args, **kwargs)
88
  return _wrap
89
 
90
+ # ===== Prenorm =====
91
  PAPUA_MAP = {
92
  r"\bsa\b": "saya", r"\bko\b": "kamu", r"\btra\b": "tidak", r"\bndak\b": "tidak",
93
  r"\bmo\b": "mau", r"\bpu\b": "punya", r"\bsu\b": "sudah", r"\bkong\b": "kemudian",
 
98
  for pat, repl in PAPUA_MAP.items(): t = re.sub(pat, repl, t, flags=re.IGNORECASE)
99
  return t
100
 
101
+ # ===== Model (lazy) =====
102
+ BASE_MODEL_ID = os.getenv("BASE_MODEL_ID", "google/mt5-small") # kecil dulu untuk uji
103
+ ADAPTER_ID = os.getenv("ADAPTER_ID", "") # kosongkan dulu
 
 
 
 
104
  DEVICE = "cuda" if os.getenv("DEVICE", "cpu") == "cuda" else "cpu"
105
 
106
  TOK = None
107
  MODEL = None
108
  _MODEL_LOCK = threading.Lock()
109
+ _MODEL_READY = False
110
+ _MODEL_ERROR = None
111
 
112
  def _load_model():
113
+ global TOK, MODEL, _MODEL_READY, _MODEL_ERROR
114
+ try:
115
+ log.info("[MODEL] loading base=%s adapter=%s", BASE_MODEL_ID, ADAPTER_ID or "-")
116
+ # import di sini agar error import terlihat di /diag
117
+ import torch
118
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
119
+ from peft import PeftModel
120
+
121
+ TOK = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
122
+ base = AutoModelForSeq2SeqLM.from_pretrained(BASE_MODEL_ID)
123
+
124
+ if ADAPTER_ID:
125
+ MODEL = PeftModel.from_pretrained(base, ADAPTER_ID)
126
+ else:
127
+ MODEL = base
128
+
129
+ MODEL.eval().to(DEVICE)
130
+ _MODEL_READY = True
131
+ _MODEL_ERROR = None
132
+ log.info("[MODEL] ready on %s", DEVICE)
133
+ except Exception as e:
134
+ _MODEL_READY = False
135
+ _MODEL_ERROR = f"{type(e).__name__}: {e}"
136
+ log.exception("[MODEL] load error")
137
 
138
  def get_model():
139
+ global MODEL, _MODEL_READY
140
  if MODEL is None:
141
  with _MODEL_LOCK:
142
  if MODEL is None:
 
144
  return TOK, MODEL
145
 
146
  def translate_with_model(text: str, max_new_tokens: int = 48) -> str:
147
+ import torch
148
  tok, m = get_model()
149
+ if not _MODEL_READY or m is None:
150
+ raise RuntimeError(f"Model not ready: {_MODEL_ERROR or 'unknown error'}")
151
+
152
+ # guard panjang input agar tidak OOM
153
+ enc = tok([text], return_tensors="pt", truncation=True, max_length=256)
154
+ enc = {k: v.to(DEVICE) for k, v in enc.items()}
155
+
156
+ log.info("[GEN] start (len=%d)", int(enc["input_ids"].shape[-1]))
157
+ out = m.generate(
158
+ **enc,
159
+ max_new_tokens=int(max_new_tokens),
160
  num_beams=4,
161
  length_penalty=0.9,
162
  no_repeat_ngram_size=3,
163
  early_stopping=True,
164
  )
165
+ log.info("[GEN] done")
166
+ return tok.decode(out[0], skip_special_tokens=True)
167
+
168
+ # preload (opsional) supaya request pertama nggak lama
169
+ def _preload_thread():
170
+ try:
171
+ _load_model()
172
+ except Exception:
173
+ pass
174
+
175
+ if PRELOAD_MODEL:
176
+ threading.Thread(target=_preload_thread, daemon=True).start()
177
 
178
  # ===== Utils / logging =====
179
  @app.before_request
 
186
  log.exception("Unhandled error")
187
  return "Internal Server Error", 500
188
 
189
+ # ===== Debug / Diag =====
190
+ @app.get("/diag")
191
+ def diag():
192
+ import sys
193
+ try:
194
+ import torch, transformers, peft
195
+ torch_v = torch.__version__
196
+ tf_v = transformers.__version__
197
+ peft_v = peft.__version__
198
+ except Exception as e:
199
+ torch_v = tf_v = peft_v = f"import error: {e}"
200
+
201
+ return jsonify({
202
+ "ok": True,
203
+ "time": datetime.now(timezone.utc).isoformat(),
204
+ "device": DEVICE,
205
+ "base_model": BASE_MODEL_ID,
206
+ "adapter": ADAPTER_ID or None,
207
+ "model_ready": _MODEL_READY,
208
+ "model_error": _MODEL_ERROR,
209
+ "versions": {"python": sys.version, "torch": torch_v, "transformers": tf_v, "peft": peft_v},
210
+ "preload": PRELOAD_MODEL,
211
+ })
212
+
213
  @app.get("/debug/session/set")
214
  def dbg_set():
215
  session.permanent = True
 
241
  u = s.query(User).filter_by(email=email).first()
242
  if not u or not verify_password(u, pwd):
243
  flash("Email atau password salah", "error"); return redirect(url_for("login_get"))
 
 
244
  session.permanent = True
245
  session["uid"], session["email"] = u.id, u.email
 
246
  return redirect(url_for("index"))
247
 
248
  @app.get("/register")
 
288
  @app.post("/translate")
289
  def api_translate():
290
  if not session.get("uid"):
291
+ return jsonify({"ok": False, "error": "Unauthorized"}), 401
292
+
293
  payload = request.get_json(silent=True) or {}
294
  text = (payload.get("text") or "").strip()
295
  max_new = int(payload.get("max_new_tokens", 48))
296
  if not text:
297
+ return jsonify({"ok": False, "error": "Empty text"}), 400
298
+
299
  try:
300
  clean = prenorm(text)
301
+
302
+ # Jika ingin memastikan alur UI/DB dulu tanpa model:
303
+ if FALLBACK_TRANSLATE:
304
+ mt = f"[FAKE] {clean}"
305
+ else:
306
+ mt = translate_with_model(clean, max_new_tokens=max_new)
307
+
308
+ # simpan riwayat
309
  with SessionLocal() as s:
310
  s.add(Translation(user_id=session["uid"], src=text, mt=mt))
311
  s.commit()
312
+
313
+ return jsonify({"ok": True, "result": mt})
314
+ except Exception as e:
315
+ log.error("[API] translate error: %s", e)
316
+ log.error(traceback.format_exc())
317
+ # kirim error yang lebih informatif
318
+ return jsonify({"ok": False, "error": f"{type(e).__name__}: {e}"}), 500
319
 
320
  if __name__ == "__main__":
 
321
  app.run(host="0.0.0.0", port=int(os.getenv("PORT", "7860")), debug=False)