|
|
--- |
|
|
license: mit |
|
|
datasets: |
|
|
- manueltonneau/arabic-hate-speech-superset |
|
|
language: |
|
|
- ar |
|
|
metrics: |
|
|
- f1 |
|
|
- accuracy |
|
|
base_model: |
|
|
- aubmindlab/bert-base-arabertv02 |
|
|
pipeline_tag: text-classification |
|
|
library_name: transformers |
|
|
--- |
|
|
|
|
|
|
|
|
|
|
|
model loading: |
|
|
```py |
|
|
import torch |
|
|
from transformers import ( |
|
|
AutoTokenizer, |
|
|
AutoModelForSequenceClassification, |
|
|
) |
|
|
|
|
|
model_name = "AyaHazem61/araBERT-For-Hate-Speech-Detection" |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
model = AutoModelForSequenceClassification.from_pretrained( |
|
|
model_name, |
|
|
num_labels=2 |
|
|
) |
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
model.to(device) |
|
|
``` |
|
|
|
|
|
model predicting: |
|
|
```py |
|
|
texts = ["السلام عليكم و رحمة الله و بركاته"] |
|
|
inputs = tokenizer(texts , return_tensors="pt", padding="max_length", truncation=True, max_length=512) |
|
|
|
|
|
model .eval() |
|
|
with torch.no_grad(): |
|
|
outputs = model (**inputs) |
|
|
|
|
|
logits = outputs.logits |
|
|
|
|
|
``` |