Antonio
commited on
Commit
·
adad62e
1
Parent(s):
374f948
Change
Browse files
app.py
CHANGED
|
@@ -25,6 +25,8 @@ def get_emotion_from_filename(filename):
|
|
| 25 |
|
| 26 |
def separate_video_audio(file_path):
|
| 27 |
output_dir = './temp/'
|
|
|
|
|
|
|
| 28 |
video_path = os.path.join(output_dir, os.path.basename(file_path).replace('.mp4', '_video.mp4'))
|
| 29 |
audio_path = os.path.join(output_dir, os.path.basename(file_path).replace('.mp4', '_audio.wav'))
|
| 30 |
|
|
@@ -81,7 +83,7 @@ def video_label_to_emotion(label):
|
|
| 81 |
def predict_video(file_path, video_model, image_processor):
|
| 82 |
video = process_video(file_path)
|
| 83 |
inputs = image_processor(list(video), return_tensors="pt")
|
| 84 |
-
device = torch.device("
|
| 85 |
inputs = inputs.to(device)
|
| 86 |
|
| 87 |
with torch.no_grad():
|
|
@@ -100,7 +102,7 @@ def audio_label_to_emotion(label):
|
|
| 100 |
def preprocess_and_predict_audio(file_path, model, processor):
|
| 101 |
audio_array, _ = librosa.load(file_path, sr=16000)
|
| 102 |
inputs = processor(audio_array, sampling_rate=16000, return_tensors="pt", padding=True, max_length=75275)
|
| 103 |
-
device = torch.device("
|
| 104 |
model = model.to(device)
|
| 105 |
inputs = {k: v.to(device) for k, v in inputs.items()}
|
| 106 |
|
|
@@ -182,13 +184,13 @@ decision_frameworks = {
|
|
| 182 |
def predict(video_file, video_model_name, audio_model_name, framework_name):
|
| 183 |
|
| 184 |
image_processor = VivitImageProcessor.from_pretrained("google/vivit-b-16x2-kinetics400")
|
| 185 |
-
video_model = torch.load('./' + video_model_name)
|
| 186 |
|
| 187 |
model_id = "facebook/wav2vec2-large"
|
| 188 |
config = AutoConfig.from_pretrained(model_id, num_labels=6)
|
| 189 |
audio_processor = AutoFeatureExtractor.from_pretrained(model_id)
|
| 190 |
audio_model = Wav2Vec2ForSequenceClassification.from_pretrained(model_id, config=config)
|
| 191 |
-
audio_model.load_state_dict(torch.load('./' + audio_model_name))
|
| 192 |
audio_model.eval()
|
| 193 |
|
| 194 |
delete_directory_path = "./temp/"
|
|
|
|
| 25 |
|
| 26 |
def separate_video_audio(file_path):
|
| 27 |
output_dir = './temp/'
|
| 28 |
+
if not os.path.exists(output_dir):
|
| 29 |
+
os.makedirs(output_dir)
|
| 30 |
video_path = os.path.join(output_dir, os.path.basename(file_path).replace('.mp4', '_video.mp4'))
|
| 31 |
audio_path = os.path.join(output_dir, os.path.basename(file_path).replace('.mp4', '_audio.wav'))
|
| 32 |
|
|
|
|
| 83 |
def predict_video(file_path, video_model, image_processor):
|
| 84 |
video = process_video(file_path)
|
| 85 |
inputs = image_processor(list(video), return_tensors="pt")
|
| 86 |
+
device = torch.device("cpu")
|
| 87 |
inputs = inputs.to(device)
|
| 88 |
|
| 89 |
with torch.no_grad():
|
|
|
|
| 102 |
def preprocess_and_predict_audio(file_path, model, processor):
|
| 103 |
audio_array, _ = librosa.load(file_path, sr=16000)
|
| 104 |
inputs = processor(audio_array, sampling_rate=16000, return_tensors="pt", padding=True, max_length=75275)
|
| 105 |
+
device = torch.device("cpu")
|
| 106 |
model = model.to(device)
|
| 107 |
inputs = {k: v.to(device) for k, v in inputs.items()}
|
| 108 |
|
|
|
|
| 184 |
def predict(video_file, video_model_name, audio_model_name, framework_name):
|
| 185 |
|
| 186 |
image_processor = VivitImageProcessor.from_pretrained("google/vivit-b-16x2-kinetics400")
|
| 187 |
+
video_model = torch.load('./' + video_model_name, map_location=torch.device('cpu'))
|
| 188 |
|
| 189 |
model_id = "facebook/wav2vec2-large"
|
| 190 |
config = AutoConfig.from_pretrained(model_id, num_labels=6)
|
| 191 |
audio_processor = AutoFeatureExtractor.from_pretrained(model_id)
|
| 192 |
audio_model = Wav2Vec2ForSequenceClassification.from_pretrained(model_id, config=config)
|
| 193 |
+
audio_model.load_state_dict(torch.load('./' + audio_model_name, map_location=torch.device('cpu')))
|
| 194 |
audio_model.eval()
|
| 195 |
|
| 196 |
delete_directory_path = "./temp/"
|