Update app.py
Browse files
app.py
CHANGED
|
@@ -33,8 +33,6 @@ import line_cor
|
|
| 33 |
import altair as alt
|
| 34 |
#pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
|
| 35 |
from PIL import Image
|
| 36 |
-
API_URL = "https://api-inference.huggingface.co/models/gpt2-large"
|
| 37 |
-
headers = {"Authorization": "Bearer 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"}
|
| 38 |
API_URL0 = "https://api-inference.huggingface.co/models/csebuetnlp/mT5_multilingual_XLSum"
|
| 39 |
headers0 = {"Authorization": "Bearer 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"}
|
| 40 |
|
|
@@ -65,11 +63,29 @@ def read_pdf(file):
|
|
| 65 |
# # Display the extracted text
|
| 66 |
# #st.text(extracted_text)
|
| 67 |
# return extracted_text
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
st.title("NLP APPLICATION")
|
| 71 |
#@st.cache_resource(experimental_allow_widgets=True)
|
| 72 |
def main():
|
|
|
|
| 73 |
#global tokenizer, model
|
| 74 |
#tokenizer = AutoTokenizer.from_pretrained('t5-base')
|
| 75 |
#model = AutoModelWithLMHead.from_pretrained('t5-base', return_dict=True)
|
|
@@ -114,13 +130,24 @@ def main():
|
|
| 114 |
#ret,thresh1 = cv2.threshold(imge,120,255,cv2.THRESH_BINARY)
|
| 115 |
# pytesseract image to string to get results
|
| 116 |
#text = str(pytesseract.image_to_string(img, config='--psm 6',lang="ben")) if st.checkbox("Bangla") else str(pytesseract.image_to_string(thresh1, config='--psm 6'))
|
| 117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
#st.success(text)
|
| 119 |
elif camera_photo:
|
| 120 |
img = Image.open(camera_photo)
|
| 121 |
img = img.save("img.png")
|
| 122 |
img = cv2.imread("img.png")
|
| 123 |
-
text = pytesseract.image_to_string(img) if st.checkbox("Bangla") else pytesseract.image_to_string(img, lang="ben")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
#st.success(text)
|
| 125 |
elif uploaded_photo==None and camera_photo==None:
|
| 126 |
#our_image=load_image("image.jpg")
|
|
@@ -128,6 +155,9 @@ def main():
|
|
| 128 |
text = message
|
| 129 |
|
| 130 |
if st.checkbox("English Text Generation"):
|
|
|
|
|
|
|
|
|
|
| 131 |
def query(payload):
|
| 132 |
response = requests.post(API_URL, headers=headers, json=payload)
|
| 133 |
return response.json()
|
|
@@ -136,12 +166,15 @@ def main():
|
|
| 136 |
"inputs": text,
|
| 137 |
})
|
| 138 |
st.success(output)
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
|
|
|
|
|
|
|
|
|
| 146 |
if __name__ == '__main__':
|
| 147 |
main()
|
|
|
|
| 33 |
import altair as alt
|
| 34 |
#pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
|
| 35 |
from PIL import Image
|
|
|
|
|
|
|
| 36 |
API_URL0 = "https://api-inference.huggingface.co/models/csebuetnlp/mT5_multilingual_XLSum"
|
| 37 |
headers0 = {"Authorization": "Bearer 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"}
|
| 38 |
|
|
|
|
| 63 |
# # Display the extracted text
|
| 64 |
# #st.text(extracted_text)
|
| 65 |
# return extracted_text
|
| 66 |
+
def engsum(output):
|
| 67 |
+
API_URL1 = "https://api-inference.huggingface.co/models/Michael-Vptn/text-summarization-t5-base"
|
| 68 |
+
headers1 = {"Authorization": "Bearer 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"}
|
| 69 |
+
|
| 70 |
+
def query(payload):
|
| 71 |
+
response = requests.post(API_URL1, headers=headers1, json=payload)
|
| 72 |
+
return response.json()
|
| 73 |
+
|
| 74 |
+
output = query({
|
| 75 |
+
"inputs": output,
|
| 76 |
+
})
|
| 77 |
+
st.success(output)
|
| 78 |
+
def bansum(text):
|
| 79 |
+
def query(payload):
|
| 80 |
+
response = requests.post(API_URL0, headers=headers0, json=payload)
|
| 81 |
+
return response.json()
|
| 82 |
+
output = query({"inputs": text})
|
| 83 |
+
st.success(output)
|
| 84 |
|
| 85 |
st.title("NLP APPLICATION")
|
| 86 |
#@st.cache_resource(experimental_allow_widgets=True)
|
| 87 |
def main():
|
| 88 |
+
b=0
|
| 89 |
#global tokenizer, model
|
| 90 |
#tokenizer = AutoTokenizer.from_pretrained('t5-base')
|
| 91 |
#model = AutoModelWithLMHead.from_pretrained('t5-base', return_dict=True)
|
|
|
|
| 130 |
#ret,thresh1 = cv2.threshold(imge,120,255,cv2.THRESH_BINARY)
|
| 131 |
# pytesseract image to string to get results
|
| 132 |
#text = str(pytesseract.image_to_string(img, config='--psm 6',lang="ben")) if st.checkbox("Bangla") else str(pytesseract.image_to_string(thresh1, config='--psm 6'))
|
| 133 |
+
if st.checkbox("Bangla"):
|
| 134 |
+
b=1
|
| 135 |
+
text = pytesseract.image_to_string(img, lang="ben")
|
| 136 |
+
else:
|
| 137 |
+
b=0
|
| 138 |
+
text=pytesseract.image_to_string(img)
|
| 139 |
#st.success(text)
|
| 140 |
elif camera_photo:
|
| 141 |
img = Image.open(camera_photo)
|
| 142 |
img = img.save("img.png")
|
| 143 |
img = cv2.imread("img.png")
|
| 144 |
+
#text = pytesseract.image_to_string(img) if st.checkbox("Bangla") else pytesseract.image_to_string(img, lang="ben")
|
| 145 |
+
if st.checkbox("Bangla"):
|
| 146 |
+
b=1
|
| 147 |
+
text = pytesseract.image_to_string(img, lang="ben")
|
| 148 |
+
else:
|
| 149 |
+
b=0
|
| 150 |
+
text=pytesseract.image_to_string(img)
|
| 151 |
#st.success(text)
|
| 152 |
elif uploaded_photo==None and camera_photo==None:
|
| 153 |
#our_image=load_image("image.jpg")
|
|
|
|
| 155 |
text = message
|
| 156 |
|
| 157 |
if st.checkbox("English Text Generation"):
|
| 158 |
+
API_URL = "https://api-inference.huggingface.co/models/gpt2"
|
| 159 |
+
headers = {"Authorization": "Bearer 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"}
|
| 160 |
+
|
| 161 |
def query(payload):
|
| 162 |
response = requests.post(API_URL, headers=headers, json=payload)
|
| 163 |
return response.json()
|
|
|
|
| 166 |
"inputs": text,
|
| 167 |
})
|
| 168 |
st.success(output)
|
| 169 |
+
if st.checkbox("Summarize generated text"):
|
| 170 |
+
engsum(output)
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
if st.checkbox("Mark for Text Summarization"):
|
| 174 |
+
if b==1:
|
| 175 |
+
bansum(text)
|
| 176 |
+
else:
|
| 177 |
+
engsum(text)
|
| 178 |
+
#END
|
| 179 |
if __name__ == '__main__':
|
| 180 |
main()
|