Update app.py
Browse files
app.py
CHANGED
|
@@ -55,22 +55,17 @@ def read_pdf(file):
|
|
| 55 |
# text = pytesseract.image_to_string(image_name, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(image_name)
|
| 56 |
all_page_text += page.extractText()+" "
|
| 57 |
return all_page_text
|
| 58 |
-
|
| 59 |
-
#
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
#
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
# # get co-ordinates to cr
|
| 70 |
-
# # # get co-ordinates to cr
|
| 71 |
-
# text = pytesseract.image_to_string(image_name, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(image_name)
|
| 72 |
-
# all_page_text += text + " " #page.extractText()
|
| 73 |
-
# return all_page_text
|
| 74 |
st.title("NLP APPLICATION")
|
| 75 |
#@st.cache_resource(experimental_allow_widgets=True)
|
| 76 |
def main():
|
|
@@ -97,7 +92,7 @@ def main():
|
|
| 97 |
#file = uploaded_photo.read() # Read the data
|
| 98 |
#image_result = open(uploaded_photo.name, 'wb') # creates a writable image and later we can write the decoded result
|
| 99 |
#image_result.write(file)
|
| 100 |
-
tet =
|
| 101 |
#tet = pytesseract.image_to_string(img, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(img)
|
| 102 |
values = st.slider('Select a approximate number of lines to see and summarize',value=[0, len(tet)//(7*10)])
|
| 103 |
text = tet[values[0]*7*10:values[1]*7*10] if values[0]!=len(tet)//(7*10) else tet[len(tet)//(7*10):]
|
|
|
|
| 55 |
# text = pytesseract.image_to_string(image_name, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(image_name)
|
| 56 |
all_page_text += page.extractText()+" "
|
| 57 |
return all_page_text
|
| 58 |
+
def read_pdf_with_pdfplumber(file):
|
| 59 |
+
# Open the uploaded PDF file with pdfplumber
|
| 60 |
+
with pdfplumber.open(file) as pdf:
|
| 61 |
+
extracted_text = ''
|
| 62 |
+
for page in pdf.pages:
|
| 63 |
+
extracted_text += page.extract_text()
|
| 64 |
+
|
| 65 |
+
# Display the extracted text
|
| 66 |
+
text=st.text(extracted_text)
|
| 67 |
+
|
| 68 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
st.title("NLP APPLICATION")
|
| 70 |
#@st.cache_resource(experimental_allow_widgets=True)
|
| 71 |
def main():
|
|
|
|
| 92 |
#file = uploaded_photo.read() # Read the data
|
| 93 |
#image_result = open(uploaded_photo.name, 'wb') # creates a writable image and later we can write the decoded result
|
| 94 |
#image_result.write(file)
|
| 95 |
+
tet = read_pdf_with_pdfplumber(uploaded_photo)
|
| 96 |
#tet = pytesseract.image_to_string(img, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(img)
|
| 97 |
values = st.slider('Select a approximate number of lines to see and summarize',value=[0, len(tet)//(7*10)])
|
| 98 |
text = tet[values[0]*7*10:values[1]*7*10] if values[0]!=len(tet)//(7*10) else tet[len(tet)//(7*10):]
|