Spaces:

gpt-omni
/

mini-omni

Running

gpt-omni commited on Sep 5, 2024

Commit

7ba9b1d

1 Parent(s): e1adc1c

update

Files changed (1) hide show

app.py CHANGED Viewed

@@ -128,7 +128,7 @@ def get_input_ids_whisper_ATBatch(mel, leng, whispermodel, device):
     stacked_inputids = [torch.stack(tensors) for tensors in stacked_inputids]
     return torch.stack([audio_feature, audio_feature]), stacked_inputids
 @spaces.GPU
 def next_token_batch(
     model: GPT,
@@ -156,7 +156,7 @@ def next_token_batch(
     next_t = sample(logit_t, **kwargs).to(dtype=input_ids[0].dtype)
     return next_audio_tokens, next_t
 def load_audio(path):
     audio = whisper.load_audio(path)
     duration_ms = (len(audio) / 16000) * 1000
@@ -164,7 +164,7 @@ def load_audio(path):
     mel = whisper.log_mel_spectrogram(audio)
     return mel, int(duration_ms / 20) + 1
 @spaces.GPU
 def generate_audio_data(snac_tokens, snacmodel, device=None):
     audio = reconstruct_tensors(snac_tokens, device)
@@ -190,7 +190,7 @@ def run_AT_batch_stream(
     assert os.path.exists(audio_path), f"audio file {audio_path} not found"
-    model.set_kv_cache(batch_size=2)
     mel, leng = load_audio(audio_path)
     audio_feature, input_ids = get_input_ids_whisper_ATBatch(mel, leng, whispermodel, device)
@@ -295,7 +295,7 @@ def run_AT_batch_stream(
     model.clear_kv_cache()
     return list_output
 for chunk in run_AT_batch_stream('./data/samples/output1.wav'):
     pass
@@ -326,4 +326,4 @@ demo = gr.Interface(
     # live=True,
 )
 demo.queue()
-demo.launch()

     stacked_inputids = [torch.stack(tensors) for tensors in stacked_inputids]
     return torch.stack([audio_feature, audio_feature]), stacked_inputids
 @spaces.GPU
 def next_token_batch(
     model: GPT,
     next_t = sample(logit_t, **kwargs).to(dtype=input_ids[0].dtype)
     return next_audio_tokens, next_t
 def load_audio(path):
     audio = whisper.load_audio(path)
     duration_ms = (len(audio) / 16000) * 1000
     mel = whisper.log_mel_spectrogram(audio)
     return mel, int(duration_ms / 20) + 1
 @spaces.GPU
 def generate_audio_data(snac_tokens, snacmodel, device=None):
     audio = reconstruct_tensors(snac_tokens, device)
     assert os.path.exists(audio_path), f"audio file {audio_path} not found"
+    model.set_kv_cache(batch_size=2, device=device)
     mel, leng = load_audio(audio_path)
     audio_feature, input_ids = get_input_ids_whisper_ATBatch(mel, leng, whispermodel, device)
     model.clear_kv_cache()
     return list_output
 for chunk in run_AT_batch_stream('./data/samples/output1.wav'):
     pass
     # live=True,
 )
 demo.queue()
+demo.launch()