darisdzakwanhoesien2 commited on
Commit
962d966
·
1 Parent(s): 75e5ca4
data/esg_corpus.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee5a416e3fffc64a1ecc99a28a3acf174aeec5fc5ee9692adb4018f2426f8242
3
- size 18920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b58253bef0d7537600eced9cab9ef062f93e4f9151fdb5a706252e0d085162c
3
+ size 19002
data/esg_corpus_original.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee5a416e3fffc64a1ecc99a28a3acf174aeec5fc5ee9692adb4018f2426f8242
3
+ size 18920
sanitize_csv.py CHANGED
@@ -3,8 +3,8 @@ import re
3
  import os
4
 
5
  # Use relative paths for portability
6
- input_file_path = 'data/esg_corpus.csv'
7
- output_file_path = 'data/esg_corpus_sanitized.csv'
8
 
9
  def sanitize_csv(input_path, output_path):
10
  """
 
3
  import os
4
 
5
  # Use relative paths for portability
6
+ input_file_path = 'data/esg_corpus_original.csv'
7
+ output_file_path = 'data/esg_corpus.csv'
8
 
9
  def sanitize_csv(input_path, output_path):
10
  """