Spaces:
Runtime error
Runtime error
Update modules/index_func.py
Browse files- modules/index_func.py +2 -4
modules/index_func.py
CHANGED
|
@@ -38,10 +38,8 @@ def get_documents(file_src):
|
|
| 38 |
with open(filepath, "rb") as pdfFileObj:
|
| 39 |
pdfReader = PyPDF2.PdfReader(pdfFileObj) #pdfReader.pages 有多少頁
|
| 40 |
for page in tqdm(pdfReader.pages):
|
| 41 |
-
|
| 42 |
-
logging.info(f"
|
| 43 |
-
pdftext += pdftmp
|
| 44 |
-
|
| 45 |
texts = [Document(page_content=pdftext,
|
| 46 |
metadata={"source": filepath})]
|
| 47 |
elif file_type == ".docx":
|
|
|
|
| 38 |
with open(filepath, "rb") as pdfFileObj:
|
| 39 |
pdfReader = PyPDF2.PdfReader(pdfFileObj) #pdfReader.pages 有多少頁
|
| 40 |
for page in tqdm(pdfReader.pages):
|
| 41 |
+
pdftext += page.extract_text() #每頁的文字加起來
|
| 42 |
+
logging.info(f"pdftext:{pdftext}")
|
|
|
|
|
|
|
| 43 |
texts = [Document(page_content=pdftext,
|
| 44 |
metadata={"source": filepath})]
|
| 45 |
elif file_type == ".docx":
|