Spaces:
Sleeping
Sleeping
darisdzakwanhoesien2
commited on
Commit
·
faad49a
1
Parent(s):
eab69c3
label new
Browse files- create_triplets.py +3 -3
create_triplets.py
CHANGED
|
@@ -10,15 +10,15 @@ def create_triplets(corpus_path="data/esg_corpus.csv", output_path="data/esg_tri
|
|
| 10 |
|
| 11 |
try:
|
| 12 |
df = pd.read_csv(corpus_path)
|
| 13 |
-
if '
|
| 14 |
-
print(f"Error: Corpus file at {corpus_path} must have a '
|
| 15 |
return
|
| 16 |
except FileNotFoundError:
|
| 17 |
print(f"Error: Processed corpus file not found at {corpus_path}.")
|
| 18 |
print("Please run the preprocessing step in the main application first.")
|
| 19 |
return
|
| 20 |
|
| 21 |
-
sentences = df['
|
| 22 |
if len(sentences) < 3:
|
| 23 |
print("Error: Not enough sentences in the corpus to generate triplets.")
|
| 24 |
return
|
|
|
|
| 10 |
|
| 11 |
try:
|
| 12 |
df = pd.read_csv(corpus_path)
|
| 13 |
+
if 'markdown' not in df.columns:
|
| 14 |
+
print(f"Error: Corpus file at {corpus_path} must have a 'markdown' column.")
|
| 15 |
return
|
| 16 |
except FileNotFoundError:
|
| 17 |
print(f"Error: Processed corpus file not found at {corpus_path}.")
|
| 18 |
print("Please run the preprocessing step in the main application first.")
|
| 19 |
return
|
| 20 |
|
| 21 |
+
sentences = df['markdown'].dropna().tolist()
|
| 22 |
if len(sentences) < 3:
|
| 23 |
print("Error: Not enough sentences in the corpus to generate triplets.")
|
| 24 |
return
|