Julian Bilcke
commited on
Commit
·
347756a
1
Parent(s):
5a793ee
small fix
Browse files
vms/utils/finetrainers_utils.py
CHANGED
|
@@ -136,7 +136,7 @@ def copy_files_to_training_dir(prompt_prefix: str, training_videos_path=None) ->
|
|
| 136 |
Number of copied pairs
|
| 137 |
"""
|
| 138 |
|
| 139 |
-
gr.Info("Copying assets to the training dataset..")
|
| 140 |
|
| 141 |
# Get project ID from global config
|
| 142 |
config = load_global_config()
|
|
@@ -162,8 +162,17 @@ def copy_files_to_training_dir(prompt_prefix: str, training_videos_path=None) ->
|
|
| 162 |
all_files = video_files + image_files
|
| 163 |
|
| 164 |
nb_copied_pairs = 0
|
|
|
|
| 165 |
|
| 166 |
for file_path in all_files:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
|
| 168 |
caption = ""
|
| 169 |
file_caption_path = file_path.with_suffix('.txt')
|
|
@@ -181,10 +190,6 @@ def copy_files_to_training_dir(prompt_prefix: str, training_videos_path=None) ->
|
|
| 181 |
logger.debug(f"Found parent caption file: {parent_caption_path}")
|
| 182 |
parent_caption = parent_caption_path.read_text().strip()
|
| 183 |
|
| 184 |
-
target_file_path = training_videos_path / file_path.name
|
| 185 |
-
|
| 186 |
-
target_caption_path = target_file_path.with_suffix('.txt')
|
| 187 |
-
|
| 188 |
if parent_caption and not caption.endswith(parent_caption):
|
| 189 |
caption = f"{caption}\n{parent_caption}"
|
| 190 |
|
|
@@ -213,7 +218,10 @@ def copy_files_to_training_dir(prompt_prefix: str, training_videos_path=None) ->
|
|
| 213 |
training_path, _, _, _ = get_project_paths(project_id)
|
| 214 |
prepare_finetrainers_dataset(training_path, training_videos_path)
|
| 215 |
|
| 216 |
-
|
|
|
|
|
|
|
|
|
|
| 217 |
|
| 218 |
return nb_copied_pairs
|
| 219 |
|
|
|
|
| 136 |
Number of copied pairs
|
| 137 |
"""
|
| 138 |
|
| 139 |
+
gr.Info("Copying new assets to the training dataset..")
|
| 140 |
|
| 141 |
# Get project ID from global config
|
| 142 |
config = load_global_config()
|
|
|
|
| 162 |
all_files = video_files + image_files
|
| 163 |
|
| 164 |
nb_copied_pairs = 0
|
| 165 |
+
nb_skipped_pairs = 0
|
| 166 |
|
| 167 |
for file_path in all_files:
|
| 168 |
+
target_file_path = training_videos_path / file_path.name
|
| 169 |
+
target_caption_path = target_file_path.with_suffix('.txt')
|
| 170 |
+
|
| 171 |
+
# Skip if both file and caption already exist in training directory
|
| 172 |
+
if target_file_path.exists() and target_caption_path.exists():
|
| 173 |
+
logger.debug(f"Skipping {file_path.name} - already exists in training directory")
|
| 174 |
+
nb_skipped_pairs += 1
|
| 175 |
+
continue
|
| 176 |
|
| 177 |
caption = ""
|
| 178 |
file_caption_path = file_path.with_suffix('.txt')
|
|
|
|
| 190 |
logger.debug(f"Found parent caption file: {parent_caption_path}")
|
| 191 |
parent_caption = parent_caption_path.read_text().strip()
|
| 192 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
if parent_caption and not caption.endswith(parent_caption):
|
| 194 |
caption = f"{caption}\n{parent_caption}"
|
| 195 |
|
|
|
|
| 218 |
training_path, _, _, _ = get_project_paths(project_id)
|
| 219 |
prepare_finetrainers_dataset(training_path, training_videos_path)
|
| 220 |
|
| 221 |
+
if nb_skipped_pairs > 0:
|
| 222 |
+
gr.Info(f"Successfully updated the training dataset ({nb_copied_pairs} new pairs, {nb_skipped_pairs} already existed)")
|
| 223 |
+
else:
|
| 224 |
+
gr.Info(f"Successfully generated the training dataset ({nb_copied_pairs} pairs)")
|
| 225 |
|
| 226 |
return nb_copied_pairs
|
| 227 |
|