import torch import torch.nn as nn from transformers import AutoModel # Kiến trúc mô hình A (Multimodal) class MultimodalClassifier(nn.Module): def __init__(self, num_classes, text_feature_dim=768, audio_feature_dim=768, hidden_dim=512): super(MultimodalClassifier, self).__init__() self.fc1 = nn.Linear(text_feature_dim + audio_feature_dim, hidden_dim) self.relu = nn.ReLU() self.dropout = nn.Dropout(0.5) self.fc2 = nn.Linear(hidden_dim, num_classes) def forward(self, text_features, audio_features): combined_features = torch.cat((text_features, audio_features), dim=1) x = self.fc1(combined_features) x = self.relu(x) x = self.dropout(x) x = self.fc2(x) return x # Kiến trúc mô hình B (Text-only) class TextClassifier(nn.Module): def __init__(self, n_classes): super(TextClassifier, self).__init__() # Load mô hình nền khi khởi tạo class self.bert = AutoModel.from_pretrained("vinai/phobert-base") self.drop = nn.Dropout(p=0.3) self.out = nn.Linear(self.bert.config.hidden_size, n_classes) def forward(self, input_ids, attention_mask): outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask) pooled_output = outputs.pooler_output output = self.drop(pooled_output) return self.out(output)