amosnbn commited on
Commit
25f414e
·
1 Parent(s): 9740d1e
Files changed (1) hide show
  1. app.py +43 -46
app.py CHANGED
@@ -1,7 +1,7 @@
1
  # app.py
2
  # PapuaTranslate — Flask + SQLAlchemy (Supabase/SQLite) + mT5/LoRA (lazy)
3
  # Fitur: ProxyFix, cookie Spaces, session permanent, /diag, preload, fallback,
4
- # strip BOM JSON, sanitize adapter_config untuk kompatibilitas PEFT lama.
5
 
6
  import os, re, json, codecs, pathlib, logging, threading, traceback, inspect
7
  from datetime import datetime, timezone, timedelta
@@ -10,13 +10,16 @@ from flask import Flask, render_template, request, redirect, url_for, session, j
10
  from werkzeug.middleware.proxy_fix import ProxyFix
11
  from huggingface_hub import snapshot_download
12
 
13
- # ========== Logging ==========
14
  logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s")
15
  log = logging.getLogger("papua-app")
16
 
17
- # ========== Flask ==========
18
  app = Flask(__name__, template_folder="frontend", static_folder="static")
 
19
  app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1)
 
 
20
  app.config.update(
21
  SECRET_KEY=os.getenv("SECRET_KEY", "dev-secret-change-me"),
22
  SESSION_COOKIE_NAME="hfspace_session",
@@ -28,11 +31,11 @@ app.config.update(
28
  )
29
  app.permanent_session_lifetime = timedelta(hours=8)
30
 
31
- # ========== Feature Flags via ENV ==========
32
  PRELOAD_MODEL = os.getenv("PRELOAD_MODEL", "true").lower() in ("1","true","yes")
33
  FALLBACK_TRANSLATE = os.getenv("FALLBACK_TRANSLATE", "false").lower() in ("1","true","yes")
34
 
35
- # ========== Database (SQLAlchemy) ==========
36
  from sqlalchemy import create_engine, Column, Integer, Text, DateTime, ForeignKey, func
37
  from sqlalchemy.orm import declarative_base, sessionmaker, scoped_session, relationship
38
 
@@ -75,7 +78,7 @@ try:
75
  except Exception as e:
76
  log.exception("[DB] init error: %s", e)
77
 
78
- # ========== Auth Helpers ==========
79
  from werkzeug.security import generate_password_hash, check_password_hash
80
  def set_password(user: User, raw: str): user.pass_hash = generate_password_hash(raw)
81
  def verify_password(user: User, raw: str) -> bool:
@@ -90,7 +93,7 @@ def login_required(fn):
90
  return fn(*args, **kwargs)
91
  return _wrap
92
 
93
- # ========== Prenorm ==========
94
  PAPUA_MAP = {
95
  r"\bsa\b": "saya", r"\bko\b": "kamu", r"\btra\b": "tidak", r"\bndak\b": "tidak",
96
  r"\bmo\b": "mau", r"\bpu\b": "punya", r"\bsu\b": "sudah", r"\bkong\b": "kemudian",
@@ -101,7 +104,7 @@ def prenorm(text: str) -> str:
101
  for pat, repl in PAPUA_MAP.items(): t = re.sub(pat, repl, t, flags=re.IGNORECASE)
102
  return t
103
 
104
- # ========== Model (lazy) + Strip BOM + Sanitize adapter_config ==========
105
  BASE_MODEL_ID = os.getenv("BASE_MODEL_ID", "google/mt5-small")
106
  ADAPTER_ID = os.getenv("ADAPTER_ID", "")
107
  DEVICE = "cuda" if os.getenv("DEVICE", "cpu") == "cuda" else "cpu"
@@ -125,17 +128,11 @@ def _strip_bom_in_dir(root_dir: str):
125
  log.warning(f"[BOM] skip {p}: {e}")
126
 
127
  def _sanitize_adapter_config(adapter_dir: str):
128
- """
129
- Buang kunci tidak dikenal oleh peft.LoraConfig saat ini.
130
- Menangani error seperti: LoraConfig.__init__() got an unexpected keyword argument 'e_rank_pattern'
131
- """
132
  try:
133
- from peft import LoraConfig # versi yang sedang terpasang di runtime
134
  except Exception as e:
135
  log.warning(f"[SAN] Tidak bisa import LoraConfig: {e}")
136
  return
137
-
138
- sig = None
139
  try:
140
  sig = inspect.signature(LoraConfig.__init__)
141
  allowed = set(p.name for p in sig.parameters.values())
@@ -145,12 +142,10 @@ def _sanitize_adapter_config(adapter_dir: str):
145
 
146
  cfg_path = pathlib.Path(adapter_dir) / "adapter_config.json"
147
  if not cfg_path.exists():
148
- # beberapa repo pakai nama lain; normalnya adapter_config.json
149
- for alt in ("adapter_config.json", "adapter_config_0.json", "config.json"):
150
- candidate = pathlib.Path(adapter_dir) / alt
151
- if candidate.exists():
152
- cfg_path = candidate
153
- break
154
  if not cfg_path.exists():
155
  log.warning(f"[SAN] adapter_config tidak ditemukan di {adapter_dir}")
156
  return
@@ -162,7 +157,6 @@ def _sanitize_adapter_config(adapter_dir: str):
162
  log.warning(f"[SAN] gagal baca adapter_config: {e}")
163
  return
164
 
165
- # filter hanya key yang diizinkan oleh LoraConfig saat ini
166
  cleaned = {k: v for k, v in cfg.items() if k in allowed}
167
  dropped = [k for k in cfg.keys() if k not in allowed]
168
  if dropped:
@@ -171,7 +165,7 @@ def _sanitize_adapter_config(adapter_dir: str):
171
  json.dump(cleaned, f, ensure_ascii=False, indent=2)
172
 
173
  def _load_model():
174
- """Download ke /tmp, strip BOM, sanitize adapter_config, lalu load dari path lokal."""
175
  global TOK, MODEL, _MODEL_READY, _MODEL_ERROR
176
  try:
177
  log.info("[MODEL] downloading base=%s adapter=%s", BASE_MODEL_ID, ADAPTER_ID or "-")
@@ -199,19 +193,12 @@ def _load_model():
199
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
200
  from peft import PeftModel
201
 
202
- log.info("[MODEL] loading tokenizer from %s", base_dir)
203
  TOK = AutoTokenizer.from_pretrained(base_dir)
204
-
205
- log.info("[MODEL] loading base model from %s", base_dir)
206
  base = AutoModelForSeq2SeqLM.from_pretrained(base_dir)
207
 
208
- if adapter_dir:
209
- log.info("[MODEL] attaching adapter from %s", adapter_dir)
210
- MODEL = PeftModel.from_pretrained(base, adapter_dir)
211
- else:
212
- MODEL = base
213
-
214
  MODEL.eval().to(DEVICE)
 
215
  _MODEL_READY = True
216
  _MODEL_ERROR = None
217
  log.info("[MODEL] ready on %s", DEVICE)
@@ -221,7 +208,7 @@ def _load_model():
221
  log.exception("[MODEL] load error")
222
 
223
  def get_model():
224
- global MODEL, _MODEL_READY
225
  if MODEL is None:
226
  with _MODEL_LOCK:
227
  if MODEL is None:
@@ -246,18 +233,16 @@ def translate_with_model(text: str, max_new_tokens: int = 48) -> str:
246
  return tok.decode(out[0], skip_special_tokens=True)
247
 
248
  def _preload_thread():
249
- try:
250
- _load_model()
251
- except Exception:
252
- pass
253
 
254
  if PRELOAD_MODEL:
255
  threading.Thread(target=_preload_thread, daemon=True).start()
256
 
257
- # ========== Utils / Logging ==========
258
  @app.before_request
259
  def _log_req():
260
- if request.path not in ("/health", "/ping", "/favicon.ico"):
261
  log.info("[REQ] %s %s", request.method, request.path)
262
 
263
  @app.errorhandler(Exception)
@@ -265,7 +250,7 @@ def _err(e):
265
  log.exception("Unhandled error")
266
  return "Internal Server Error", 500
267
 
268
- # ========== Debug & Diag ==========
269
  @app.get("/diag")
270
  def diag():
271
  import sys
@@ -276,7 +261,6 @@ def diag():
276
  peft_v = peft.__version__
277
  except Exception as e:
278
  torch_v = tf_v = peft_v = f"import error: {e}"
279
-
280
  return jsonify({
281
  "ok": True,
282
  "time": datetime.now(timezone.utc).isoformat(),
@@ -300,7 +284,7 @@ def dbg_set():
300
  def dbg_get():
301
  return {"uid": session.get("uid"), "email": session.get("email")}
302
 
303
- # ========== Routes ==========
304
  @app.get("/health")
305
  @app.get("/ping")
306
  def health():
@@ -357,24 +341,36 @@ def index():
357
  .filter(Translation.user_id == uid)
358
  .order_by(Translation.id.desc())
359
  .limit(10).all())
360
- data = [{"src": it.src, "mt": it.mt, "created_at": it.created_at} for it in items]
361
- return render_template("index.html", user=session.get("email"), data=data, device=DEVICE)
362
 
363
  @app.get("/about")
364
  def about_page():
365
  return render_template("about.html")
366
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
367
  @app.post("/translate")
368
  def api_translate():
369
  if not session.get("uid"):
370
  return jsonify({"ok": False, "error": "Unauthorized"}), 401
371
-
372
  payload = request.get_json(silent=True) or {}
373
  text = (payload.get("text") or "").strip()
374
  max_new = int(payload.get("max_new_tokens", 48))
375
  if not text:
376
  return jsonify({"ok": False, "error": "Empty text"}), 400
377
-
378
  try:
379
  clean = prenorm(text)
380
  mt = f"[FAKE] {clean}" if FALLBACK_TRANSLATE else translate_with_model(clean, max_new_tokens=max_new)
@@ -387,5 +383,6 @@ def api_translate():
387
  log.error(traceback.format_exc())
388
  return jsonify({"ok": False, "error": f"{type(e).__name__}: {e}"}), 500
389
 
 
390
  if __name__ == "__main__":
391
  app.run(host="0.0.0.0", port=int(os.getenv("PORT", "7860")), debug=False)
 
1
  # app.py
2
  # PapuaTranslate — Flask + SQLAlchemy (Supabase/SQLite) + mT5/LoRA (lazy)
3
  # Fitur: ProxyFix, cookie Spaces, session permanent, /diag, preload, fallback,
4
+ # strip BOM JSON, sanitize adapter_config, endpoint /history + homepage responsive.
5
 
6
  import os, re, json, codecs, pathlib, logging, threading, traceback, inspect
7
  from datetime import datetime, timezone, timedelta
 
10
  from werkzeug.middleware.proxy_fix import ProxyFix
11
  from huggingface_hub import snapshot_download
12
 
13
+ # ---------- Logging ----------
14
  logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s")
15
  log = logging.getLogger("papua-app")
16
 
17
+ # ---------- Flask ----------
18
  app = Flask(__name__, template_folder="frontend", static_folder="static")
19
+ # trust HF proxy (HTTPS/host), penting untuk cookie Secure & redirect
20
  app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1)
21
+
22
+ # session config (HF Spaces iframe-friendly)
23
  app.config.update(
24
  SECRET_KEY=os.getenv("SECRET_KEY", "dev-secret-change-me"),
25
  SESSION_COOKIE_NAME="hfspace_session",
 
31
  )
32
  app.permanent_session_lifetime = timedelta(hours=8)
33
 
34
+ # ---------- Feature Flags ----------
35
  PRELOAD_MODEL = os.getenv("PRELOAD_MODEL", "true").lower() in ("1","true","yes")
36
  FALLBACK_TRANSLATE = os.getenv("FALLBACK_TRANSLATE", "false").lower() in ("1","true","yes")
37
 
38
+ # ---------- Database ----------
39
  from sqlalchemy import create_engine, Column, Integer, Text, DateTime, ForeignKey, func
40
  from sqlalchemy.orm import declarative_base, sessionmaker, scoped_session, relationship
41
 
 
78
  except Exception as e:
79
  log.exception("[DB] init error: %s", e)
80
 
81
+ # ---------- Auth helpers ----------
82
  from werkzeug.security import generate_password_hash, check_password_hash
83
  def set_password(user: User, raw: str): user.pass_hash = generate_password_hash(raw)
84
  def verify_password(user: User, raw: str) -> bool:
 
93
  return fn(*args, **kwargs)
94
  return _wrap
95
 
96
+ # ---------- Prenorm ----------
97
  PAPUA_MAP = {
98
  r"\bsa\b": "saya", r"\bko\b": "kamu", r"\btra\b": "tidak", r"\bndak\b": "tidak",
99
  r"\bmo\b": "mau", r"\bpu\b": "punya", r"\bsu\b": "sudah", r"\bkong\b": "kemudian",
 
104
  for pat, repl in PAPUA_MAP.items(): t = re.sub(pat, repl, t, flags=re.IGNORECASE)
105
  return t
106
 
107
+ # ---------- Model (lazy) + strip BOM + sanitize adapter ----------
108
  BASE_MODEL_ID = os.getenv("BASE_MODEL_ID", "google/mt5-small")
109
  ADAPTER_ID = os.getenv("ADAPTER_ID", "")
110
  DEVICE = "cuda" if os.getenv("DEVICE", "cpu") == "cuda" else "cpu"
 
128
  log.warning(f"[BOM] skip {p}: {e}")
129
 
130
  def _sanitize_adapter_config(adapter_dir: str):
 
 
 
 
131
  try:
132
+ from peft import LoraConfig
133
  except Exception as e:
134
  log.warning(f"[SAN] Tidak bisa import LoraConfig: {e}")
135
  return
 
 
136
  try:
137
  sig = inspect.signature(LoraConfig.__init__)
138
  allowed = set(p.name for p in sig.parameters.values())
 
142
 
143
  cfg_path = pathlib.Path(adapter_dir) / "adapter_config.json"
144
  if not cfg_path.exists():
145
+ for alt in ("adapter_config.json","adapter_config_0.json","config.json"):
146
+ c = pathlib.Path(adapter_dir)/alt
147
+ if c.exists():
148
+ cfg_path = c; break
 
 
149
  if not cfg_path.exists():
150
  log.warning(f"[SAN] adapter_config tidak ditemukan di {adapter_dir}")
151
  return
 
157
  log.warning(f"[SAN] gagal baca adapter_config: {e}")
158
  return
159
 
 
160
  cleaned = {k: v for k, v in cfg.items() if k in allowed}
161
  dropped = [k for k in cfg.keys() if k not in allowed]
162
  if dropped:
 
165
  json.dump(cleaned, f, ensure_ascii=False, indent=2)
166
 
167
  def _load_model():
168
+ """Download ke /tmp, strip BOM, sanitize adapter_config, lalu load."""
169
  global TOK, MODEL, _MODEL_READY, _MODEL_ERROR
170
  try:
171
  log.info("[MODEL] downloading base=%s adapter=%s", BASE_MODEL_ID, ADAPTER_ID or "-")
 
193
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
194
  from peft import PeftModel
195
 
 
196
  TOK = AutoTokenizer.from_pretrained(base_dir)
 
 
197
  base = AutoModelForSeq2SeqLM.from_pretrained(base_dir)
198
 
199
+ MODEL = PeftModel.from_pretrained(base, adapter_dir) if adapter_dir else base
 
 
 
 
 
200
  MODEL.eval().to(DEVICE)
201
+
202
  _MODEL_READY = True
203
  _MODEL_ERROR = None
204
  log.info("[MODEL] ready on %s", DEVICE)
 
208
  log.exception("[MODEL] load error")
209
 
210
  def get_model():
211
+ global MODEL
212
  if MODEL is None:
213
  with _MODEL_LOCK:
214
  if MODEL is None:
 
233
  return tok.decode(out[0], skip_special_tokens=True)
234
 
235
  def _preload_thread():
236
+ try: _load_model()
237
+ except Exception: pass
 
 
238
 
239
  if PRELOAD_MODEL:
240
  threading.Thread(target=_preload_thread, daemon=True).start()
241
 
242
+ # ---------- Utils / Logging ----------
243
  @app.before_request
244
  def _log_req():
245
+ if request.path not in ("/health","/ping","/favicon.ico"):
246
  log.info("[REQ] %s %s", request.method, request.path)
247
 
248
  @app.errorhandler(Exception)
 
250
  log.exception("Unhandled error")
251
  return "Internal Server Error", 500
252
 
253
+ # ---------- Debug & Diag ----------
254
  @app.get("/diag")
255
  def diag():
256
  import sys
 
261
  peft_v = peft.__version__
262
  except Exception as e:
263
  torch_v = tf_v = peft_v = f"import error: {e}"
 
264
  return jsonify({
265
  "ok": True,
266
  "time": datetime.now(timezone.utc).isoformat(),
 
284
  def dbg_get():
285
  return {"uid": session.get("uid"), "email": session.get("email")}
286
 
287
+ # ---------- Auth & Pages ----------
288
  @app.get("/health")
289
  @app.get("/ping")
290
  def health():
 
341
  .filter(Translation.user_id == uid)
342
  .order_by(Translation.id.desc())
343
  .limit(10).all())
344
+ recent = [{"src": it.src, "mt": it.mt, "created_at": it.created_at.strftime("%Y-%m-%d %H:%M")} for it in items]
345
+ return render_template("index.html", logged_in=True, device=DEVICE, recent=recent)
346
 
347
  @app.get("/about")
348
  def about_page():
349
  return render_template("about.html")
350
 
351
+ # ---------- API ----------
352
+ @app.get("/history")
353
+ def api_history():
354
+ if not session.get("uid"):
355
+ return jsonify({"items": []})
356
+ with SessionLocal() as s:
357
+ uid = session["uid"]
358
+ items = (s.query(Translation)
359
+ .filter(Translation.user_id == uid)
360
+ .order_by(Translation.id.desc())
361
+ .limit(10).all())
362
+ out = [{"src": it.src, "mt": it.mt, "created_at": it.created_at.strftime("%Y-%m-%d %H:%M")} for it in items]
363
+ return jsonify({"items": out})
364
+
365
  @app.post("/translate")
366
  def api_translate():
367
  if not session.get("uid"):
368
  return jsonify({"ok": False, "error": "Unauthorized"}), 401
 
369
  payload = request.get_json(silent=True) or {}
370
  text = (payload.get("text") or "").strip()
371
  max_new = int(payload.get("max_new_tokens", 48))
372
  if not text:
373
  return jsonify({"ok": False, "error": "Empty text"}), 400
 
374
  try:
375
  clean = prenorm(text)
376
  mt = f"[FAKE] {clean}" if FALLBACK_TRANSLATE else translate_with_model(clean, max_new_tokens=max_new)
 
383
  log.error(traceback.format_exc())
384
  return jsonify({"ok": False, "error": f"{type(e).__name__}: {e}"}), 500
385
 
386
+ # ---------- Run ----------
387
  if __name__ == "__main__":
388
  app.run(host="0.0.0.0", port=int(os.getenv("PORT", "7860")), debug=False)