Spaces:
Runtime error
Runtime error
Update modules/index_func.py
Browse files- modules/index_func.py +3 -2
modules/index_func.py
CHANGED
|
@@ -38,8 +38,9 @@ def get_documents(file_src):
|
|
| 38 |
with open(filepath, "rb") as pdfFileObj:
|
| 39 |
pdfReader = PyPDF2.PdfReader(pdfFileObj) #pdfReader.pages 有多少頁
|
| 40 |
for page in tqdm(pdfReader.pages):
|
| 41 |
-
|
| 42 |
-
logging.info(f"
|
|
|
|
| 43 |
texts = [Document(page_content=pdftext,
|
| 44 |
metadata={"source": filepath})]
|
| 45 |
elif file_type == ".docx":
|
|
|
|
| 38 |
with open(filepath, "rb") as pdfFileObj:
|
| 39 |
pdfReader = PyPDF2.PdfReader(pdfFileObj) #pdfReader.pages 有多少頁
|
| 40 |
for page in tqdm(pdfReader.pages):
|
| 41 |
+
pdftmp = page.extract_text() #每頁的文字加起來
|
| 42 |
+
logging.info(f"pdftmp:{pdftmp}")
|
| 43 |
+
pdftext += pdftmp
|
| 44 |
texts = [Document(page_content=pdftext,
|
| 45 |
metadata={"source": filepath})]
|
| 46 |
elif file_type == ".docx":
|