submission-template-frugal-ai

Sleeping

App Files Files Community

Oriaz commited on Jan 7

Commit

41b7068

verified ·

1 Parent(s): d77b584

Update tasks/text.py

Browse files

Files changed (1) hide show

tasks/text.py +50 -16

tasks/text.py CHANGED Viewed

@@ -8,14 +8,21 @@ from .utils.evaluation import TextEvaluationRequest
 from .utils.emissions import tracker, clean_emissions_data, get_space_info
 ## add-on imports
 from sentence_transformers import SentenceTransformer
 from sklearn.preprocessing import MinMaxScaler
-import numpy as np
 import skops.io as sio
 router = APIRouter()
-DESCRIPTION = "Embedding + Logistic Regression"
 ROUTE = "/text"
 @router.post(ROUTE, tags=["Text Task"],
@@ -62,23 +69,50 @@ async def evaluate_text(request: TextEvaluationRequest):
     # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
     #--------------------------------------------------------------------------------------------
-    ## Models loading
-    # Embedding model
-    query_prompt_name = "s2s_query"
-    model = SentenceTransformer("dunzhang/stella_en_400M_v5",trust_remote_code=True).cuda()
-    # Pre-trained Logistic Regression model
-    trusted_types = ['sklearn.feature_selection._univariate_selection.f_classif']
-    disp = sio.load('./tasks/logistic_regression_model.skops',trusted=trusted_types)
-    ## Data prep
-    embeddings = model.encode(list(test_dataset['quote']), prompt_name=query_prompt_name)
-    scaler = MinMaxScaler()
-    X_scaled = scaler.fit_transform(embeddings)
-    ## Predictions
-    predictions = disp.predict(X_scaled)
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE
     #--------------------------------------------------------------------------------------------

 from .utils.emissions import tracker, clean_emissions_data, get_space_info
 ## add-on imports
+import numpy as np
+# Logistic REG reqs
 from sentence_transformers import SentenceTransformer
 from sklearn.preprocessing import MinMaxScaler
 import skops.io as sio
+# BERT reqs
+from transformers import AutoTokenizer,BertForSequenceClassification,AutoModelForSequenceClassification,Trainer, TrainingArguments,DataCollatorWithPadding
+from datasets import Dataset
+import torch
 router = APIRouter()
+DESCRIPTION = "Simple BERT classif"
 ROUTE = "/text"
 @router.post(ROUTE, tags=["Text Task"],
     # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
     #--------------------------------------------------------------------------------------------
+    ######################## LOG REG  ########################
+    # ## Models loading
+    # # Embedding model
+    # query_prompt_name = "s2s_query"
+    # model = SentenceTransformer("dunzhang/stella_en_400M_v5",trust_remote_code=True).cuda()
+    # # Pre-trained Logistic Regression model
+    # trusted_types = ['sklearn.feature_selection._univariate_selection.f_classif']
+    # disp = sio.load('./tasks/logistic_regression_model.skops',trusted=trusted_types)
+    # ## Data prep
+    # embeddings = model.encode(list(test_dataset['quote']), prompt_name=query_prompt_name)
+    # scaler = MinMaxScaler()
+    # X_scaled = scaler.fit_transform(embeddings)
+    # ## Predictions
+    # predictions = disp.predict(X_scaled)
+    ######################## BERT  ########################
+    ## Model loading
+    model = BertForSequenceClassification.from_pretrained("Oriaz/climate_change_bert_classif")
+    tokenizer = AutoTokenizer.from_pretrained("Oriaz/climate_change_bert_classif")
+    ## Data prep
+    def preprocess_function(df):
+        return tokenizer(df["quote"], truncation=True)
+    tokenized_test = test_dataset.map(preprocess_function, batched=True)
+    ## Modify inference model
+    training_args = torch.load("./tasks/utils/training_args.bin")
+    training_args.eval_strategy='no'
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        tokenizer=tokenizer
+    )
+    ## prediction
+    preds = trainer.predict(tokenized_test)
+    predictions = np.array([np.argmax(x) for x in preds[0]])
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE
     #--------------------------------------------------------------------------------------------