fx PATH
Browse files- api.py +15 -14
- landscape2soundscape.py +2 -2
api.py
CHANGED
|
@@ -21,7 +21,8 @@ from audiocraft.audiogen import AudioGen, audio_write
|
|
| 21 |
sound_generator = AudioGen.get_pretrained('facebook/audiogen-medium')
|
| 22 |
sound_generator.set_generation_params(duration=6)
|
| 23 |
|
| 24 |
-
|
|
|
|
| 25 |
|
| 26 |
# SSH AGENT
|
| 27 |
# eval $(ssh-agent -s)
|
|
@@ -127,15 +128,15 @@ def serve_wav():
|
|
| 127 |
|
| 128 |
# Physically Save Client Files
|
| 129 |
for filename, obj in request.files.items():
|
| 130 |
-
obj.save(f'
|
| 131 |
|
| 132 |
print('Saved all files on Server Side\n\n')
|
| 133 |
|
| 134 |
-
args = SimpleNamespace(text=None if r.get('text') is None else
|
| 135 |
-
video=None if r.get('video') is None else
|
| 136 |
-
image=None if r.get('image') is None else
|
| 137 |
voice=r.get('voice')[0],
|
| 138 |
-
native=None if r.get('native') is None else
|
| 139 |
affective = r.get('affective')[0],
|
| 140 |
scene=r.get('scene')[0]
|
| 141 |
)
|
|
@@ -291,7 +292,7 @@ def serve_wav():
|
|
| 291 |
# ==== TTS .srt ====
|
| 292 |
|
| 293 |
if do_video_dub:
|
| 294 |
-
OUT_FILE = '
|
| 295 |
subtitles = text
|
| 296 |
MAX_LEN = int(subtitles[-1][2] + 17) * 24000
|
| 297 |
# 17 extra seconds fail-safe for long-last-segment
|
|
@@ -321,7 +322,7 @@ def serve_wav():
|
|
| 321 |
(.64 * total + .27 * x_native)[:, None],
|
| 322 |
24000)
|
| 323 |
else: # Video from plain (.txt)
|
| 324 |
-
OUT_FILE = '
|
| 325 |
x = tts_multi_sentence(text=text,
|
| 326 |
precomputed_style_vector=precomputed_style_vector,
|
| 327 |
voice=args.voice,
|
|
@@ -333,7 +334,7 @@ def serve_wav():
|
|
| 333 |
if args.image is not None:
|
| 334 |
|
| 335 |
STATIC_FRAME = args.image # 'assets/image_from_T31.jpg'
|
| 336 |
-
OUT_FILE = '
|
| 337 |
|
| 338 |
# SILENT CLIP
|
| 339 |
|
|
@@ -346,7 +347,7 @@ def serve_wav():
|
|
| 346 |
scene=args.scene
|
| 347 |
)
|
| 348 |
soundfile.write(AUDIO_TRACK, x, 24000)
|
| 349 |
-
|
| 350 |
# write final output video
|
| 351 |
subprocess.call(
|
| 352 |
["ffmpeg",
|
|
@@ -361,7 +362,7 @@ def serve_wav():
|
|
| 361 |
"0:v:0",
|
| 362 |
"-map",
|
| 363 |
" 1:a:0",
|
| 364 |
-
OUT_FILE])
|
| 365 |
|
| 366 |
print(f'\noutput video is saved as {OUT_FILE}')
|
| 367 |
|
|
@@ -372,8 +373,8 @@ def serve_wav():
|
|
| 372 |
precomputed_style_vector=precomputed_style_vector,
|
| 373 |
voice=args.voice,
|
| 374 |
scene=args.scene)
|
| 375 |
-
OUT_FILE = '
|
| 376 |
-
soundfile.write(OUT_FILE, x, 24000)
|
| 377 |
|
| 378 |
|
| 379 |
|
|
@@ -393,7 +394,7 @@ def serve_wav():
|
|
| 393 |
|
| 394 |
# send server's output as default file -> srv_result.xx
|
| 395 |
print(f'\n=SERVER saved as {OUT_FILE=}\n')
|
| 396 |
-
response = send_from_directory(
|
| 397 |
response.headers['suffix-file-type'] = OUT_FILE
|
| 398 |
return response
|
| 399 |
|
|
|
|
| 21 |
sound_generator = AudioGen.get_pretrained('facebook/audiogen-medium')
|
| 22 |
sound_generator.set_generation_params(duration=6)
|
| 23 |
|
| 24 |
+
CACHE_DIR = 'flask_cache/'
|
| 25 |
+
Path(CACHE_DIR).mkdir(parents=True, exist_ok=True)
|
| 26 |
|
| 27 |
# SSH AGENT
|
| 28 |
# eval $(ssh-agent -s)
|
|
|
|
| 128 |
|
| 129 |
# Physically Save Client Files
|
| 130 |
for filename, obj in request.files.items():
|
| 131 |
+
obj.save(f'{CACHE_DIR}{filename.replace("/","")}')
|
| 132 |
|
| 133 |
print('Saved all files on Server Side\n\n')
|
| 134 |
|
| 135 |
+
args = SimpleNamespace(text=None if r.get('text') is None else CACHE_DIR + r.get('text')[0].replace("/",""),
|
| 136 |
+
video=None if r.get('video') is None else CACHE_DIR + r.get('video')[0].replace("/",""),
|
| 137 |
+
image=None if r.get('image') is None else CACHE_DIR + r.get('image')[0].replace("/",""),
|
| 138 |
voice=r.get('voice')[0],
|
| 139 |
+
native=None if r.get('native') is None else CACHE_DIR + r.get('native')[0].replace("/",""),
|
| 140 |
affective = r.get('affective')[0],
|
| 141 |
scene=r.get('scene')[0]
|
| 142 |
)
|
|
|
|
| 292 |
# ==== TTS .srt ====
|
| 293 |
|
| 294 |
if do_video_dub:
|
| 295 |
+
OUT_FILE = 'tmp.mp4' #args.out_file + '_video_dub.mp4'
|
| 296 |
subtitles = text
|
| 297 |
MAX_LEN = int(subtitles[-1][2] + 17) * 24000
|
| 298 |
# 17 extra seconds fail-safe for long-last-segment
|
|
|
|
| 322 |
(.64 * total + .27 * x_native)[:, None],
|
| 323 |
24000)
|
| 324 |
else: # Video from plain (.txt)
|
| 325 |
+
OUT_FILE = 'tmp.mp4'
|
| 326 |
x = tts_multi_sentence(text=text,
|
| 327 |
precomputed_style_vector=precomputed_style_vector,
|
| 328 |
voice=args.voice,
|
|
|
|
| 334 |
if args.image is not None:
|
| 335 |
|
| 336 |
STATIC_FRAME = args.image # 'assets/image_from_T31.jpg'
|
| 337 |
+
OUT_FILE = 'tmp.mp4' #args.out_file + '_image_to_speech.mp4'
|
| 338 |
|
| 339 |
# SILENT CLIP
|
| 340 |
|
|
|
|
| 347 |
scene=args.scene
|
| 348 |
)
|
| 349 |
soundfile.write(AUDIO_TRACK, x, 24000)
|
| 350 |
+
if args.video or args.image:
|
| 351 |
# write final output video
|
| 352 |
subprocess.call(
|
| 353 |
["ffmpeg",
|
|
|
|
| 362 |
"0:v:0",
|
| 363 |
"-map",
|
| 364 |
" 1:a:0",
|
| 365 |
+
CACHE_DIR + OUT_FILE])
|
| 366 |
|
| 367 |
print(f'\noutput video is saved as {OUT_FILE}')
|
| 368 |
|
|
|
|
| 373 |
precomputed_style_vector=precomputed_style_vector,
|
| 374 |
voice=args.voice,
|
| 375 |
scene=args.scene)
|
| 376 |
+
OUT_FILE = 'tmp.wav'
|
| 377 |
+
soundfile.write(CACHE_DIR + OUT_FILE, x, 24000)
|
| 378 |
|
| 379 |
|
| 380 |
|
|
|
|
| 394 |
|
| 395 |
# send server's output as default file -> srv_result.xx
|
| 396 |
print(f'\n=SERVER saved as {OUT_FILE=}\n')
|
| 397 |
+
response = send_from_directory(CACHE_DIR, path=OUT_FILE)
|
| 398 |
response.headers['suffix-file-type'] = OUT_FILE
|
| 399 |
return response
|
| 400 |
|
landscape2soundscape.py
CHANGED
|
@@ -56,7 +56,7 @@ DESCRIPTIONS = [
|
|
| 56 |
'01_Schick_AII840_001.jpg', # image
|
| 57 |
'01_Schick_AII840_001.txt', # text
|
| 58 |
'Statue in shire hill on autumn beach.', # audiocraft
|
| 59 |
-
'Gottlieb
|
| 60 |
'en_US/m-ailabs_low#mary_ann',
|
| 61 |
],
|
| 62 |
# 2
|
|
@@ -156,7 +156,7 @@ SILENT_VIDEO = '_silent_video.mp4'
|
|
| 156 |
# SILENT CLIP
|
| 157 |
|
| 158 |
|
| 159 |
-
for _img_, _text_, soundscape_text, _title_, _voice_ in DESCRIPTIONS[:
|
| 160 |
|
| 161 |
# cv2put txt
|
| 162 |
im = cv2.imread(PIC_DIR + _img_) # IMG must have EVEN shape
|
|
|
|
| 56 |
'01_Schick_AII840_001.jpg', # image
|
| 57 |
'01_Schick_AII840_001.txt', # text
|
| 58 |
'Statue in shire hill on autumn beach.', # audiocraft
|
| 59 |
+
'Gottlieb Schick - Bildnis der Heinrike Dannecker - 1802', # cv2 puttext title
|
| 60 |
'en_US/m-ailabs_low#mary_ann',
|
| 61 |
],
|
| 62 |
# 2
|
|
|
|
| 156 |
# SILENT CLIP
|
| 157 |
|
| 158 |
|
| 159 |
+
for _img_, _text_, soundscape_text, _title_, _voice_ in DESCRIPTIONS[:20]:
|
| 160 |
|
| 161 |
# cv2put txt
|
| 162 |
im = cv2.imread(PIC_DIR + _img_) # IMG must have EVEN shape
|