Spaces:

Anudeep05
/

VideoMAE-API

Sleeping

App Files Files Community

Anudeep05 commited on Sep 4

Commit

a38e917

verified ·

1 Parent(s): dab5eb8

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -32

app.py CHANGED Viewed

@@ -3,14 +3,9 @@ from transformers import VideoMAEForVideoClassification, VideoMAEImageProcessor
 from decord import VideoReader, cpu
 import gradio as gr
-# -------------------------------
-# Device
-# -------------------------------
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# -------------------------------
 # Load processor and model
-# -------------------------------
 processor = VideoMAEImageProcessor.from_pretrained("MCG-NJU/videomae-small-finetuned-ssv2")
 model = VideoMAEForVideoClassification.from_pretrained(
     "MCG-NJU/videomae-small-finetuned-ssv2",
@@ -22,29 +17,15 @@ model.load_state_dict(checkpoint["model_state_dict"])
 model.to(device)
 model.eval()
-# -------------------------------
 # Class mapping
-# -------------------------------
 id2class = {
-    0: "AFGHANISTAN",
-    1: "AFRICA",
-    2: "ANDHRA_PRADESH",
-    3: "ARGENTINA",
-    4: "DELHI",
-    5: "DENMARK",
-    6: "ENGLAND",
-    7: "GANGTOK",
-    8: "GOA",
-    9: "GUJARAT",
-    10: "HARYANA",
-    11: "HIMACHAL_PRADESH",
-    12: "JAIPUR",
-    13: "JAMMU_AND_KASHMIR"
 }
-# -------------------------------
 # Video preprocessing
-# -------------------------------
 def preprocess_video(video_path, processor, num_frames=16):
     vr = VideoReader(video_path, ctx=cpu(0))
     total_frames = len(vr)
@@ -56,11 +37,8 @@ def preprocess_video(video_path, processor, num_frames=16):
     inputs = processor(list(video), return_tensors="pt")
     return inputs["pixel_values"][0]
-# -------------------------------
 # Prediction function
-# -------------------------------
 def predict_video(video_file):
-    # video_file is a file-like object from Gradio
     video_path = video_file.name
     pixel_values = preprocess_video(video_path, processor)
     pixel_values = pixel_values.unsqueeze(0).to(device)
@@ -69,16 +47,13 @@ def predict_video(video_file):
         pred_index = torch.argmax(logits, dim=1).item()
     return id2class[pred_index]
-# -------------------------------
-# Gradio Interface
-# -------------------------------
 iface = gr.Interface(
     fn=predict_video,
-    inputs=gr.Video(source="upload"),  # corrected argument
     outputs="text",
     title="VideoMAE Classification API",
     description="Upload a video and get the predicted class."
 )
-# Expose API
-iface.launch(server_name="0.0.0.0", server_port=7860, share=True)

 from decord import VideoReader, cpu
 import gradio as gr
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Load processor and model
 processor = VideoMAEImageProcessor.from_pretrained("MCG-NJU/videomae-small-finetuned-ssv2")
 model = VideoMAEForVideoClassification.from_pretrained(
     "MCG-NJU/videomae-small-finetuned-ssv2",
 model.to(device)
 model.eval()
 # Class mapping
 id2class = {
+    0: "AFGHANISTAN", 1: "AFRICA", 2: "ANDHRA_PRADESH", 3: "ARGENTINA",
+    4: "DELHI", 5: "DENMARK", 6: "ENGLAND", 7: "GANGTOK",
+    8: "GOA", 9: "GUJARAT", 10: "HARYANA", 11: "HIMACHAL_PRADESH",
+    12: "JAIPUR", 13: "JAMMU_AND_KASHMIR"
 }
 # Video preprocessing
 def preprocess_video(video_path, processor, num_frames=16):
     vr = VideoReader(video_path, ctx=cpu(0))
     total_frames = len(vr)
     inputs = processor(list(video), return_tensors="pt")
     return inputs["pixel_values"][0]
 # Prediction function
 def predict_video(video_file):
     video_path = video_file.name
     pixel_values = preprocess_video(video_path, processor)
     pixel_values = pixel_values.unsqueeze(0).to(device)
         pred_index = torch.argmax(logits, dim=1).item()
     return id2class[pred_index]
+# Gradio interface
 iface = gr.Interface(
     fn=predict_video,
+    inputs=gr.Video(),  # just this
     outputs="text",
     title="VideoMAE Classification API",
     description="Upload a video and get the predicted class."
 )
+iface.launch(share=True)