darisdzakwanhoesien2 commited on
Commit
faad49a
·
1 Parent(s): eab69c3
Files changed (1) hide show
  1. create_triplets.py +3 -3
create_triplets.py CHANGED
@@ -10,15 +10,15 @@ def create_triplets(corpus_path="data/esg_corpus.csv", output_path="data/esg_tri
10
 
11
  try:
12
  df = pd.read_csv(corpus_path)
13
- if 'text' not in df.columns:
14
- print(f"Error: Corpus file at {corpus_path} must have a 'text' column.")
15
  return
16
  except FileNotFoundError:
17
  print(f"Error: Processed corpus file not found at {corpus_path}.")
18
  print("Please run the preprocessing step in the main application first.")
19
  return
20
 
21
- sentences = df['text'].dropna().tolist()
22
  if len(sentences) < 3:
23
  print("Error: Not enough sentences in the corpus to generate triplets.")
24
  return
 
10
 
11
  try:
12
  df = pd.read_csv(corpus_path)
13
+ if 'markdown' not in df.columns:
14
+ print(f"Error: Corpus file at {corpus_path} must have a 'markdown' column.")
15
  return
16
  except FileNotFoundError:
17
  print(f"Error: Processed corpus file not found at {corpus_path}.")
18
  print("Please run the preprocessing step in the main application first.")
19
  return
20
 
21
+ sentences = df['markdown'].dropna().tolist()
22
  if len(sentences) < 3:
23
  print("Error: Not enough sentences in the corpus to generate triplets.")
24
  return