Spaces:

Soumen
/

Text-Summarization-and-NLP-tasks

Running

App Files Files Community

Soumen commited on Sep 8, 2023

Commit

c7a7627

1 Parent(s): f1bdc15

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -12

app.py CHANGED Viewed

@@ -33,8 +33,6 @@ import line_cor
 import altair as alt
 #pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
 from PIL import Image
-API_URL = "https://api-inference.huggingface.co/models/gpt2-large"
-headers = {"Authorization": "Bearer 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"}
 API_URL0 = "https://api-inference.huggingface.co/models/csebuetnlp/mT5_multilingual_XLSum"
 headers0 = {"Authorization": "Bearer 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"}
@@ -65,11 +63,29 @@ def read_pdf(file):
 #     # Display the extracted text
 #     #st.text(extracted_text)
 #     return extracted_text
 st.title("NLP APPLICATION")
 #@st.cache_resource(experimental_allow_widgets=True)
 def main():
     #global tokenizer, model
     #tokenizer = AutoTokenizer.from_pretrained('t5-base')
     #model = AutoModelWithLMHead.from_pretrained('t5-base', return_dict=True)
@@ -114,13 +130,24 @@ def main():
             #ret,thresh1 = cv2.threshold(imge,120,255,cv2.THRESH_BINARY)
             # pytesseract image to string to get results
             #text = str(pytesseract.image_to_string(img, config='--psm 6',lang="ben")) if st.checkbox("Bangla") else str(pytesseract.image_to_string(thresh1, config='--psm 6'))
-            text =  pytesseract.image_to_string(img, lang="ben") if st.checkbox("Bangla") else pytesseract.image_to_string(img)
             #st.success(text)
         elif camera_photo:
             img = Image.open(camera_photo)
             img = img.save("img.png")
             img = cv2.imread("img.png")
-            text = pytesseract.image_to_string(img) if st.checkbox("Bangla") else pytesseract.image_to_string(img, lang="ben")
             #st.success(text)
         elif uploaded_photo==None and camera_photo==None:
     	#our_image=load_image("image.jpg")
@@ -128,6 +155,9 @@ def main():
             text = message
         if st.checkbox("English Text Generation"):
             def query(payload):
             	response = requests.post(API_URL, headers=headers, json=payload)
             	return response.json()
@@ -136,12 +166,15 @@ def main():
             	"inputs": text,
             })
             st.success(output)
-        if st.checkbox("Mark for ENG Text Summarization, ENSURE MARKING ABOVE BANGLA FOR BANGLA!"):
-            def query(payload):
-                response = requests.post(API_URL0, headers=headers0, json=payload)
-                return response.json()
-            output = query({
-                "inputs": text})
-            st.success(output) #END
 if __name__ == '__main__':
     main()

 import altair as alt
 #pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
 from PIL import Image
 API_URL0 = "https://api-inference.huggingface.co/models/csebuetnlp/mT5_multilingual_XLSum"
 headers0 = {"Authorization": "Bearer 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"}
 #     # Display the extracted text
 #     #st.text(extracted_text)
 #     return extracted_text
+def engsum(output):
+    API_URL1 = "https://api-inference.huggingface.co/models/Michael-Vptn/text-summarization-t5-base"
+    headers1 = {"Authorization": "Bearer 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"}
+    def query(payload):
+        response = requests.post(API_URL1, headers=headers1, json=payload)
+        return response.json()
+    output = query({
+        "inputs": output,
+    })
+    st.success(output)
+def bansum(text):
+    def query(payload):
+        response = requests.post(API_URL0, headers=headers0, json=payload)
+        return response.json()
+    output = query({"inputs": text})
+    st.success(output)
 st.title("NLP APPLICATION")
 #@st.cache_resource(experimental_allow_widgets=True)
 def main():
+    b=0
     #global tokenizer, model
     #tokenizer = AutoTokenizer.from_pretrained('t5-base')
     #model = AutoModelWithLMHead.from_pretrained('t5-base', return_dict=True)
             #ret,thresh1 = cv2.threshold(imge,120,255,cv2.THRESH_BINARY)
             # pytesseract image to string to get results
             #text = str(pytesseract.image_to_string(img, config='--psm 6',lang="ben")) if st.checkbox("Bangla") else str(pytesseract.image_to_string(thresh1, config='--psm 6'))
+            if st.checkbox("Bangla"):
+                b=1
+                text =  pytesseract.image_to_string(img, lang="ben")
+            else:
+                b=0
+                text=pytesseract.image_to_string(img)
             #st.success(text)
         elif camera_photo:
             img = Image.open(camera_photo)
             img = img.save("img.png")
             img = cv2.imread("img.png")
+            #text = pytesseract.image_to_string(img) if st.checkbox("Bangla") else pytesseract.image_to_string(img, lang="ben")
+            if st.checkbox("Bangla"):
+                b=1
+                text =  pytesseract.image_to_string(img, lang="ben")
+            else:
+                b=0
+                text=pytesseract.image_to_string(img)
             #st.success(text)
         elif uploaded_photo==None and camera_photo==None:
     	#our_image=load_image("image.jpg")
             text = message
         if st.checkbox("English Text Generation"):
+            API_URL = "https://api-inference.huggingface.co/models/gpt2"
+            headers = {"Authorization": "Bearer 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"}
             def query(payload):
             	response = requests.post(API_URL, headers=headers, json=payload)
             	return response.json()
             	"inputs": text,
             })
             st.success(output)
+            if st.checkbox("Summarize generated text"):
+                engsum(output)
+        if st.checkbox("Mark for Text Summarization"):
+            if b==1:
+                bansum(text)
+            else:
+                engsum(text)
+           #END
 if __name__ == '__main__':
     main()