Spaces:

Fer14
/

coffee_machine_captioning

Sleeping

vicgalle commited on Jun 10, 2024

Commit

be1d27c

1 Parent(s): 7931ee0

update

Files changed (2) hide show

app.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import streamlit as st
 from PIL import Image
 from transformers import PaliGemmaForConditionalGeneration, PaliGemmaProcessor
-st.title("Image to Text Converter")
 uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
@@ -12,6 +13,18 @@ model_id = "Fer14/paligemma_coffe_machine_caption"
 model = PaliGemmaForConditionalGeneration.from_pretrained(model_id)
 processor = PaliGemmaProcessor.from_pretrained(model_id)
 prompt  = (
             f"Generate a caption for the following coffee maker image. The caption has to be of the following structure:\n"
             "\"A <color> <type>, <accessories>, <shape> shaped, with <screen> and <number> <b_color> butons\"\n\n"
@@ -37,22 +50,16 @@ if uploaded_image is not None:
             padding="longest",
         )
-    output = model.generate(**inputs, max_length=1000)
     out = processor.decode(output[0], skip_special_tokens=True)[len(prompt) :]
-    # Extract text from the image
-    st.write("Extracting text from the image...")
     # Display the extracted text
     st.text_area("Coffe machine description", out, height=300)
-# Instructions for Tesseract OCR
-st.sidebar.title("Instructions")
-st.sidebar.write(
-    """
-    1. Upload an image using the file uploader.
-    2. Wait for the app to process and extract text from the image.
-    3. The extracted text will be displayed in the text area.
-    """
-)

 import streamlit as st
 from PIL import Image
 from transformers import PaliGemmaForConditionalGeneration, PaliGemmaProcessor
+from tqdm import tqdm
+st.title("Coffe machine captioning app")
 uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
 model = PaliGemmaForConditionalGeneration.from_pretrained(model_id)
 processor = PaliGemmaProcessor.from_pretrained(model_id)
+# Instructions for Tesseract OCR
+st.sidebar.title("Instructions")
+st.sidebar.write(
+    """
+    1. Upload an image using the file uploader.
+    2. Wait for the app to process and extract text from the image.
+    3. The extracted text will be displayed in the text area.
+    """
+)
 prompt  = (
             f"Generate a caption for the following coffee maker image. The caption has to be of the following structure:\n"
             "\"A <color> <type>, <accessories>, <shape> shaped, with <screen> and <number> <b_color> butons\"\n\n"
             padding="longest",
         )
+    st.write("Generating caption for the image...")
+    with tqdm(total=100) as pbar:
+        output = model.generate(**inputs, max_length=1000)
+        pbar.update(100)
     out = processor.decode(output[0], skip_special_tokens=True)[len(prompt) :]
     # Display the extracted text
     st.text_area("Coffe machine description", out, height=300)

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 transformers @ git+https://github.com/huggingface/transformers.git
 datasets
 accelerate

 transformers @ git+https://github.com/huggingface/transformers.git
 datasets
 accelerate
+tqdm