Spaces:

proyecto-meis
/

MEIS

Sleeping

App Files Files Community

proyecto-meis commited on Oct 10

Commit

27b1d64

verified ·

1 Parent(s): 1c5761b

Modelo y app.py

Browse files

Files changed (3) hide show

app.py +137 -0
model.h5 +3 -0
requirements.txt +43 -0

app.py ADDED Viewed

	@@ -0,0 +1,137 @@

+from keras.layers import Layer
+import keras.backend as K
+from transformers import TFAutoModel, AutoTokenizer
+from tensorflow.keras.layers import (
+    Softmax, GlobalAveragePooling1D, GlobalMaxPooling1D, Activation, Concatenate,
+    Conv1D, MultiHeadAttention, LayerNormalization, Input, LSTM, Embedding,
+    Lambda, Dense, Dropout, concatenate, SpatialDropout1D, Bidirectional
+)
+from keras.models import Model
+from tcn import TCN
+import keras.ops as ops
+from keras import initializers
+import tensorflow as tf
+import re
+import os
+import gradio as gr
+bert_model_name = "dccuchile/bert-base-spanish-wwm-uncased"
+MAX_LEN = 274
+WEIGHTS_PATH = os.getenv("WEIGHTS_PATH", "model.h5")
+THRESHOLD = float(os.getenv("THRESHOLD", "0.5"))
+tokenizer = AutoTokenizer.from_pretrained(bert_model_name)
+bert_model = TFAutoModel.from_pretrained(
+    bert_model_name,
+    output_hidden_states=False,
+    output_attentions=False,
+)
+bert_model.trainable = False
+def tcn_model_with_bert(bert_model_name="google-bert/bert-base-multilingual-uncased", max_length=512):
+    input_ids = Input(shape=(max_length,), dtype=tf.int32, name='input_ids')
+    attention_mask = Input(shape=(max_length,),
+                           dtype=tf.int32, name='attention_mask')
+    def extract_bert_embeddings(inputs):
+        return tf.cast(
+            bert_model(
+                {'input_ids': inputs[0], 'attention_mask': inputs[1]}).last_hidden_state,
+            tf.float32
+        )
+    bert_output = Lambda(extract_bert_embeddings, output_shape=(
+        max_length, 768))([input_ids, attention_mask])
+    x = SpatialDropout1D(0.15)(bert_output)
+    x = LSTM(128, activation='tanh', stateful=False,
+             return_sequences=True, dropout=0.1)(x)
+    x = LayerNormalization()(x)
+    x = Bidirectional(TCN(128, dilations=[
+                      1, 2, 4, 8], kernel_size=5, return_sequences=True, activation='gelu', name='tcn1'))(x)
+    gap = GlobalAveragePooling1D()(x)
+    gmp = GlobalMaxPooling1D()(x)
+    head = Concatenate()([gap, gmp])
+    head = Dense(64, activation="gelu")(head)
+    head = Dropout(0.2)(head)
+    outp = Dense(1, activation="sigmoid")(head)
+    model = Model(inputs=[input_ids, attention_mask], outputs=outp)
+    model.compile(
+        optimizer=tf.keras.optimizers.AdamW(
+            learning_rate=1e-4, weight_decay=0.01, clipnorm=1.0),
+        loss="binary_crossentropy",
+        metrics=['accuracy']
+    )
+    return model
+def preprocessing(text):
+    if not isinstance(text, str) or not text:
+        return ""
+    text = re.sub(r'\s*https?://\S+(\s+|$)', ' ', text).strip()
+    text = re.sub(r'\S*@\S*\s?', ' ', text).strip()
+    text = re.sub(r'#\S*\s?', ' ', text).strip()
+    text = re.sub(r'[.?!¡¿]+$', '', text)
+    text = text.lower()
+    text = text.strip()
+    return text
+model = tcn_model_with_bert(
+    bert_model_name=bert_model_name, max_length=MAX_LEN)
+_loaded = False
+if os.path.exists(WEIGHTS_PATH):
+    try:
+        model.load_weights(WEIGHTS_PATH)
+        _loaded = True
+    except Exception:
+        try:
+            from tensorflow.keras.models import load_model
+            model = load_model(WEIGHTS_PATH, custom_objects={"TCN": TCN})
+            _loaded = True
+        except Exception:
+            pass
+def predict_text(text: str, max_len: int = MAX_LEN, threshold: float = THRESHOLD):
+    preprocessed_text = preprocessing(text)
+    enc = tokenizer(
+        preprocessed_text,
+        truncation=True,
+        padding='max_length',
+        max_length=max_len,
+        return_tensors='tf'
+    )
+    probs = model.predict(
+        {'input_ids': enc['input_ids'],
+            'attention_mask': enc['attention_mask']},
+        verbose=0
+    )
+    score = float(probs[0][0])
+    label = int(score >= threshold)
+    return {
+        "text": text,
+        "preprocessed": preprocessed_text,
+        "score": score,
+        "label": label
+    }
+def predict_fn(texto):
+    if not isinstance(texto, list):
+        texto = [texto]
+    details = []
+    for t in texto:
+        result = predict_text(t)
+        details.append({
+            "txt": t,
+            "probability": round(float(result["score"]), 3),
+            "risk": "ALTO" if result["label"] == 1 else "BAJO"
+        })
+    return details
+iface = gr.Interface(fn=predict_fn, inputs="text", outputs="json")
+if __name__ == "__main__":
+    iface.launch()

model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0c4d1f868a5464614f1a4530426aa076d8fb254e4b29a9e5c0986599415c90e9
+size 21791632

requirements.txt ADDED Viewed

	@@ -0,0 +1,43 @@

+tensorflow==2.20.0
+tf-keras==2.20.1
+keras-tcn>=3.5.6
+transformers>=4.44.0
+huggingface_hub
+sentencepiece
+annotated-types==0.7.0
+anyio==4.9.0
+async-timeout==5.0.1
+asyncpg==0.30.0
+bcrypt==4.3.0
+certifi==2025.8.3
+charset-normalizer==3.4.3
+click==8.1.8
+dnspython==2.7.0
+ecdsa==0.19.1
+email_validator==2.2.0
+exceptiongroup==1.2.2
+fastapi==0.115.12
+greenlet==3.1.1
+h11==0.16.0
+httpcore==1.0.9
+httpx==0.28.1
+idna==3.10
+passlib==1.7.4
+psycopg2-binary==2.9.10
+pyasn1==0.6.1
+pydantic==2.11.2
+pydantic_core==2.33.1
+PyJWT==2.10.1
+python-dotenv==1.1.0
+python-jose==3.5.0
+requests==2.32.5
+resend==2.16.0
+rsa==4.9.1
+six==1.17.0
+sniffio==1.3.1
+SQLAlchemy==2.0.40
+starlette==0.46.1
+typing-inspection==0.4.0
+typing_extensions==4.13.1
+urllib3==2.5.0
+uvicorn==0.34.0