index out of bounds 2048 @ dequantize()

Browse files

Files changed (5) hide show

audiocraft/audiogen.py +2 -14
audiocraft/codebooks_patterns.py +118 -91
audiocraft/genmodel.py +5 -16
audiocraft/lm.py +37 -20
audiocraft/vq.py +2 -0

audiocraft/audiogen.py CHANGED Viewed

@@ -12,24 +12,12 @@ and provide easy access to the generation API.
 import typing as tp
 import torch
-from audiocraft.encodec import CompressionModel
 from audiocraft.genmodel import BaseGenModel
-from audiocraft.lm import LMModel
 from audiocraft.loaders import load_compression_model, load_lm_model
 class AudioGen(BaseGenModel):
-    """AudioGen main model with convenient generation API.
-    Args:
-        name (str): name of the model.
-        compression_model (CompressionModel): Compression model
-            used to map audio to invertible discrete representations.
-        lm (LMModel): Language model over discrete representations.
-        max_duration (float, optional): maximum duration the model can produce,
-            otherwise, inferred from the training params.
-    """
-    def __init__(self, name: str, compression_model: CompressionModel, lm: LMModel,
-                 max_duration: tp.Optional[float] = None):
         # print(f'Using {compression_model=}\n-----=-----')
         super().__init__(name, compression_model, lm, max_duration)
         self.set_generation_params(duration=5)  # default duration

 import typing as tp
 import torch
 from audiocraft.genmodel import BaseGenModel
 from audiocraft.loaders import load_compression_model, load_lm_model
 class AudioGen(BaseGenModel):
+    def __init__(self, name, compression_model, lm, max_duration=None):
         # print(f'Using {compression_model=}\n-----=-----')
         super().__init__(name, compression_model, lm, max_duration)
         self.set_generation_params(duration=5)  # default duration

audiocraft/codebooks_patterns.py CHANGED Viewed

@@ -6,11 +6,9 @@
 from collections import namedtuple
 from dataclasses import dataclass
-from functools import lru_cache
 import logging
 import typing as tp
-from abc import ABC, abstractmethod
 import torch
 LayoutCoord = namedtuple('LayoutCoord', ['t', 'q'])  # (timestep, codebook index)
@@ -50,8 +48,8 @@ class Pattern:
     def __post_init__(self):
         assert len(self.layout) > 0
         self._validate_layout()
-        self._build_reverted_sequence_scatter_indexes = lru_cache(100)(self._build_reverted_sequence_scatter_indexes)
-        self._build_pattern_sequence_scatter_indexes = lru_cache(100)(self._build_pattern_sequence_scatter_indexes)
         print("New pattern, time steps: %d, sequence steps: %d", self.timesteps, len(self.layout))
     def _validate_layout(self):
@@ -74,6 +72,53 @@ class Pattern:
                 # each sequence step contains at max 1 coordinate per codebook
                 assert len(qs) == len(seq_coords), \
                     f"Multiple entries for a same codebook are found at step {s}"
     @property
     def num_sequence_steps(self):
@@ -151,22 +196,10 @@ class Pattern:
         mask = torch.from_numpy(mask).to(device)
         return indexes, mask
-    def build_pattern_sequence(self, z: torch.Tensor, special_token: int, keep_only_valid_steps: bool = False):
-        """Build sequence corresponding to the pattern from the input tensor z.
-        The sequence is built using up to sequence_steps if specified, and non-pattern
-        coordinates are filled with the special token.
-        Args:
-            z (torch.Tensor): Input tensor of multi-codebooks sequence, of shape [B, K, T].
-            special_token (int): Special token used to fill non-pattern coordinates in the new sequence.
-            keep_only_valid_steps (bool): Build a sequence from the pattern up to valid (= fully defined) steps.
-                Steps that are beyond valid steps will be replaced by the special_token in that case.
-        Returns:
-            values (torch.Tensor): Interleaved sequence matching the pattern, of shape [B, K, S] with S
-                corresponding either to the sequence_steps if provided, otherwise to the length of the pattern.
-            indexes (torch.Tensor): Indexes corresponding to the interleaved sequence, of shape [K, S].
-            mask (torch.Tensor): Mask corresponding to indexes that matches valid indexes of shape [K, S].
-        """
         B, K, T = z.shape
         indexes, mask = self._build_pattern_sequence_scatter_indexes(
             T, K, keep_only_valid_steps=keep_only_valid_steps, device=str(z.device)
@@ -176,6 +209,11 @@ class Pattern:
         z = torch.cat([z, torch.zeros_like(z[:, :1]) + special_token], dim=1)
         values = z[:, indexes.view(-1)]
         values = values.view(B, K, indexes.shape[-1])
         return values, indexes, mask
     def _build_reverted_sequence_scatter_indexes(self, sequence_steps: int, n_q: int,
@@ -216,25 +254,26 @@ class Pattern:
             if s < sequence_steps:
                 for code in sequence_codes:
                     if code.t < timesteps:
-                        indexes[code.q, code.t] = s + code.q * sequence_steps
                         mask[code.q, code.t] = 1
         indexes = torch.from_numpy(indexes).to(device)
         mask = torch.from_numpy(mask).to(device)
         return indexes, mask
-    def revert_pattern_sequence(self, s: torch.Tensor, special_token: int, keep_only_valid_steps: bool = False):
-        """Revert a sequence built from the pattern back to the original multi-codebook sequence without interleaving.
-        The sequence is reverted using up to timesteps if specified, and non-pattern coordinates
-        are filled with the special token.
         Args:
             s (torch.Tensor): Interleaved sequence tensor obtained from the pattern, of shape [B, K, S].
             special_token (int or float): Special token used to fill non-pattern coordinates in the new sequence.
         Returns:
-            values (torch.Tensor): Interleaved sequence matching the pattern, of shape [B, K, T] with T
-                corresponding either to the timesteps if provided, or the total timesteps in pattern otherwise.
             indexes (torch.Tensor): Indexes corresponding to the interleaved sequence, of shape [K, T].
-            mask (torch.Tensor): Mask corresponding to indexes that matches valid indexes of shape [K, T].
         """
         B, K, S = s.shape
         indexes, mask = self._build_reverted_sequence_scatter_indexes(
@@ -245,64 +284,44 @@ class Pattern:
         s = torch.cat([s, torch.zeros_like(s[:, :1]) + special_token], dim=1)
         values = s[:, indexes.view(-1)]
         values = values.view(B, K, indexes.shape[-1])
         return values, indexes, mask
-    def revert_pattern_logits(self, logits: torch.Tensor, special_token: float, keep_only_valid_steps: bool = False):
-        """Revert model logits obtained on a sequence built from the pattern
-        back to a tensor matching the original sequence.
-        This method is similar to ``revert_pattern_sequence`` with the following specificities:
-        1. It is designed to work with the extra cardinality dimension
-        2. We return the logits for the first sequence item that matches the special_token and
-        which matching target in the original sequence is the first item of the sequence,
-        while we skip the last logits as there is no matching target
-        """
-        B, card, K, S = logits.shape
-        indexes, mask = self._build_reverted_sequence_scatter_indexes(
-            S, K, keep_only_valid_steps, is_model_output=True, device=logits.device
-        )
-        logits = logits.reshape(B, card, -1)
-        # we append the special token as the last index of our flattened z tensor
-        logits = torch.cat([logits, torch.zeros_like(logits[:, :, :1]) + special_token], dim=-1)  # [B, card, K x S]
-        values = logits[:, :, indexes.view(-1)]
-        values = values.view(B, card, K, indexes.shape[-1])
-        return values, indexes, mask
-class CodebooksPatternProvider(ABC):
-    """Abstraction around providing pattern for interleaving codebooks.
-    The CodebooksPatternProvider abstraction allows to implement various strategies to
-    define interleaving pattern of sequences composed of multiple codebooks. For a given
-    number of codebooks `n_q`, the pattern provider can generate a specified pattern
-    corresponding to a sequence of `T` timesteps with `n_q` parallel codebooks. This pattern
-    can be used to construct a new sequence from the original codes respecting the specified
-    pattern. The pattern is defined as a list of list of code coordinates, code coordinate
-    being a tuple with the original timestep and codebook to build the new sequence.
-    Note that all patterns must start with an empty list that is then used to insert a first
-    sequence step of special tokens in the newly generated sequence.
-    Args:
-        n_q (int): number of codebooks.
-        cached (bool): if True, patterns for a given length are cached. In general
-            that should be true for efficiency reason to avoid synchronization points.
-    """
-    def __init__(self, n_q: int, cached: bool = True):
-        assert n_q > 0
-        self.n_q = n_q
-        self.get_pattern = lru_cache(100)(self.get_pattern)  # type: ignore
-    @abstractmethod
-    def get_pattern(self, timesteps: int) -> Pattern:
-        """Builds pattern with specific interleaving between codebooks.
-        Args:
-            timesteps (int): Total number of timesteps.
-        """
-        raise NotImplementedError()
-class DelayedPatternProvider(CodebooksPatternProvider):
     """Provider for delayed pattern across delayed codebooks.
     Codebooks are delayed in the sequence and sequence steps will contain codebooks
     from different timesteps.
@@ -325,9 +344,12 @@ class DelayedPatternProvider(CodebooksPatternProvider):
         flatten_first (int): Flatten the first N timesteps.
         empty_initial (int): Prepend with N empty list of coordinates.
     """
-    def __init__(self, n_q: int, delays: tp.Optional[tp.List[int]] = None,
-                 flatten_first: int = 0, empty_initial: int = 0):
-        super().__init__(n_q)
         if delays is None:
             delays = list(range(n_q))
         self.delays = delays
@@ -336,8 +358,12 @@ class DelayedPatternProvider(CodebooksPatternProvider):
         assert len(self.delays) == self.n_q
         assert sorted(self.delays) == self.delays
-    def get_pattern(self, timesteps: int) -> Pattern:
-        omit_special_token = self.empty_initial < 0
         out: PatternLayout = [] if omit_special_token else [[]]
         max_delay = max(self.delays)
         if self.empty_initial:
@@ -353,6 +379,7 @@ class DelayedPatternProvider(CodebooksPatternProvider):
                 if t_for_q >= self.flatten_first:
                     v.append(LayoutCoord(t_for_q, q))
             out.append(v)
         return Pattern(out, n_q=self.n_q, timesteps=timesteps)

 from collections import namedtuple
 from dataclasses import dataclass
 import logging
 import typing as tp
 import torch
 LayoutCoord = namedtuple('LayoutCoord', ['t', 'q'])  # (timestep, codebook index)
     def __post_init__(self):
         assert len(self.layout) > 0
         self._validate_layout()
+        self._build_reverted_sequence_scatter_indexes = self._build_reverted_sequence_scatter_indexes
+        self._build_pattern_sequence_scatter_indexes = self._build_pattern_sequence_scatter_indexes
         print("New pattern, time steps: %d, sequence steps: %d", self.timesteps, len(self.layout))
     def _validate_layout(self):
                 # each sequence step contains at max 1 coordinate per codebook
                 assert len(qs) == len(seq_coords), \
                     f"Multiple entries for a same codebook are found at step {s}"
+                print(f'{qs=}\n\n\n\n QS VALIDATE LAYOUT')  # this prints 0,1,2,3 although
+                # if the q_timesteps contains special_index doe sthis show somehting diff than 0123
+        # =======================================================
+        # QS VALIDATE LAYOUT
+        # qs={0, 1}
+        #  QS VALIDATE LAYOUT
+        # qs={0, 1, 2}
+        #  QS VALIDATE LAYOUT
+        # qs={0, 1, 2, 3}
+        #  QS VALIDATE LAYOUT
+        # qs={0, 1, 2, 3}
+        #  QS VALIDATE LAYOUT
+        # qs={0, 1, 2, 3}
+        #  QS VALIDATE LAYOUT
+        # qs={0, 1, 2, 3}
+        #  QS VALIDATE LAYOUT
+        # qs={0, 1, 2, 3}
+        #  QS VALIDATE LAYOUT
+        # qs={0, 1, 2, 3}
+        #  QS VALIDATE LAYOUT
+        # qs={0, 1, 2, 3}
     @property
     def num_sequence_steps(self):
         mask = torch.from_numpy(mask).to(device)
         return indexes, mask
+    def build_pattern_sequence(self,
+                               z,
+                               special_token,
+                               keep_only_valid_steps=False):
         B, K, T = z.shape
         indexes, mask = self._build_pattern_sequence_scatter_indexes(
             T, K, keep_only_valid_steps=keep_only_valid_steps, device=str(z.device)
         z = torch.cat([z, torch.zeros_like(z[:, :1]) + special_token], dim=1)
         values = z[:, indexes.view(-1)]
         values = values.view(B, K, indexes.shape[-1])
+        # print(values.shape, indexes.shape, mask.shape, 'BUILD PATTERN')
+        # --
+        # torch.Size([1, 4, 39]) torch.Size([4, 39]) torch.Size([4, 39]) BUILD PATTERN
         return values, indexes, mask
     def _build_reverted_sequence_scatter_indexes(self, sequence_steps: int, n_q: int,
             if s < sequence_steps:
                 for code in sequence_codes:
                     if code.t < timesteps:
+                        indexes[code.q, code.t] = s + code.q * sequence_steps  # oh the jump - so are the codes linearised
                         mask[code.q, code.t] = 1
         indexes = torch.from_numpy(indexes).to(device)
         mask = torch.from_numpy(mask).to(device)
         return indexes, mask
+    def revert_pattern_sequence(self,
+                                s,
+                                special_token,
+                                keep_only_valid_steps=False):
+        """SPECIAL TOKEN NOT DELETED HERE !!!!
         Args:
             s (torch.Tensor): Interleaved sequence tensor obtained from the pattern, of shape [B, K, S].
             special_token (int or float): Special token used to fill non-pattern coordinates in the new sequence.
         Returns:
+            values (torch.Tensor) : Interleaved sequence matching the pattern, of shape [B, K, T] with T
             indexes (torch.Tensor): Indexes corresponding to the interleaved sequence, of shape [K, T].
+            mask (torch.Tensor)   : Mask corresponding to indexes that matches valid indexes of shape [K, T].
+                                    shall this mask delete special token id;
         """
         B, K, S = s.shape
         indexes, mask = self._build_reverted_sequence_scatter_indexes(
         s = torch.cat([s, torch.zeros_like(s[:, :1]) + special_token], dim=1)
         values = s[:, indexes.view(-1)]
         values = values.view(B, K, indexes.shape[-1])
         return values, indexes, mask
+    # def revert_pattern_logits(self, logits,
+    #                           special_token,
+    #                           keep_only_valid_steps=False):
+    #     """similar to ``revert_pattern_sequence`` with the following specificities:
+    #     1. It is designed to work with the extra cardinality dimension
+    #     2. We return the logits for the first sequence item that matches the special_token and
+    #     which matching target in the original sequence is the first item of the sequence,
+    #     while we skip the last logits as there is no matching target
+    #     """
+    #     B, card, K, S = logits.shape
+    #     indexes, mask = self._build_reverted_sequence_scatter_indexes(
+    #         S, K, keep_only_valid_steps, is_model_output=True, device=logits.device
+    #     )
+    #     logits = logits.reshape(B, card, -1)
+    #     # we append the special token as the last index of our flattened z tensor
+    #     logits = torch.cat([logits, torch.zeros_like(logits[:, :, :1]) + special_token], dim=-1)  # [B, card, K x S]
+    #     values = logits[:, :, indexes.view(-1)]
+    #     values = values.view(B, card, K, indexes.shape[-1])
+    #     return values, indexes, mask
+class DelayedPatternProvider():
     """Provider for delayed pattern across delayed codebooks.
     Codebooks are delayed in the sequence and sequence steps will contain codebooks
     from different timesteps.
         flatten_first (int): Flatten the first N timesteps.
         empty_initial (int): Prepend with N empty list of coordinates.
     """
+    def __init__(self,
+                 n_q,
+                 delays,
+                 flatten_first=0,
+                 empty_initial=0):
+        self.n_q = n_q
         if delays is None:
             delays = list(range(n_q))
         self.delays = delays
         assert len(self.delays) == self.n_q
         assert sorted(self.delays) == self.delays
+    def get_pattern(self, timesteps):
+        # get_pattern for desired length?
+        # print(f'{timesteps=} GET_PATTERn')   # 35
+        # print(f'{self.empty_initial=}')
+        omit_special_token = self.empty_initial < 0   # False as initial = 0 unset
         out: PatternLayout = [] if omit_special_token else [[]]
         max_delay = max(self.delays)
         if self.empty_initial:
                 if t_for_q >= self.flatten_first:
                     v.append(LayoutCoord(t_for_q, q))
             out.append(v)
+        # print(self.n_q, 'N_Q in PATTERN')  # 4 N_Q in PATTERN
         return Pattern(out, n_q=self.n_q, timesteps=timesteps)

audiocraft/genmodel.py CHANGED Viewed

@@ -44,7 +44,7 @@ class BaseGenModel(ABC):
         self.duration = self.max_duration
         self.device = next(iter(lm.parameters())).device
         self.generation_params={}
-        self._progress_callback=None
         if self.device.type == 'cpu':
             self.autocast = TorchAutocast(enabled=False)
         else:
@@ -68,9 +68,7 @@ class BaseGenModel(ABC):
         """Audio channels of the generated audio."""
         return self.compression_model.channels
-    def set_custom_progress_callback(self, progress_callback: tp.Optional[tp.Callable[[int, int], None]] = None):
-        """Override the default progress callback."""
-        self._progress_callback = progress_callback
     @abstractmethod
     def set_generation_params(self, *args, **kwargs):
@@ -119,25 +117,16 @@ class BaseGenModel(ABC):
         max_prompt_len = int(min(self.duration, self.max_duration) * self.frame_rate)
         current_gen_offset: int = 0
-        def _progress_callback(generated_tokens: int, tokens_to_generate: int):
-            generated_tokens += current_gen_offset
-            if self._progress_callback is not None:
-                # Note that total_gen_len might be quite wrong depending on the
-                # codebook pattern used, but with delay it is almost accurate.
-                self._progress_callback(generated_tokens, tokens_to_generate)
-            else:
-                print(f'{generated_tokens: 6d} / {tokens_to_generate: 6d}', end='\r')
-        callback = None
-        if progress:
-            callback = _progress_callback
         if self.duration <= self.max_duration:
             # generate by sampling from LM, simple case.
             with self.autocast:
                 gen_tokens = self.lm.generate(conditions=attributes,
-                                                callback=callback,
                                                 max_gen_len=total_gen_len,
                                                 **self.generation_params)
         else:

         self.duration = self.max_duration
         self.device = next(iter(lm.parameters())).device
         self.generation_params={}
         if self.device.type == 'cpu':
             self.autocast = TorchAutocast(enabled=False)
         else:
         """Audio channels of the generated audio."""
         return self.compression_model.channels
     @abstractmethod
     def set_generation_params(self, *args, **kwargs):
         max_prompt_len = int(min(self.duration, self.max_duration) * self.frame_rate)
         current_gen_offset: int = 0
         if self.duration <= self.max_duration:
             # generate by sampling from LM, simple case.
             with self.autocast:
                 gen_tokens = self.lm.generate(conditions=attributes,
+                                                callback=None,
                                                 max_gen_len=total_gen_len,
                                                 **self.generation_params)
         else:

audiocraft/lm.py CHANGED Viewed

@@ -373,8 +373,8 @@ class LMModel(StreamingModule):
         gen_codes = torch.full((B, K, max_gen_len), unknown_token, dtype=torch.long, device=device)
-        gen_codes[..., :start_offset] = prompt
-        # create the gen_sequence with proper interleaving from the pattern: [B, K, S]
         gen_sequence, _, mask = pattern.build_pattern_sequence(gen_codes, self.special_token_id)
         start_offset_sequence = pattern.get_first_step_with_timesteps(start_offset)
@@ -397,26 +397,26 @@ class LMModel(StreamingModule):
                 curr_sequence = gen_sequence[..., prev_offset:offset]
                 curr_mask = mask[None, ..., prev_offset:offset].expand(B, -1, -1)
-                if check:
-                    # check coherence between mask and sequence
-                    assert (curr_sequence == torch.where(curr_mask, curr_sequence, self.special_token_id)).all()
-                    # should never happen as gen_sequence is filled progressively
-                    assert not (curr_sequence == unknown_token).any()
-                # sample next token from the model, next token shape is [B, K, 1]
                 next_token = self._sample_next_token(
                     curr_sequence, cfg_conditions, unconditional_state, use_sampling, temp, top_k, top_p,
                     cfg_coef=cfg_coef, two_step_cfg=two_step_cfg)
                 # ensure the tokens that should be masked are properly set to special_token_id
                 # as the model never output special_token_id
-                valid_mask = mask[..., offset:offset+1].expand(B, -1, -1)
                 # next_token[~valid_mask] = self.special_token_id
                 # print(f'{unconditional_state=} \n
                 # print('Set All to Special')
-                # RUNS with = 2047 just different of self.special_token_id  -> 2047 is drill noise
-                # next_token[:] = self.special_token_id
@@ -427,17 +427,34 @@ class LMModel(StreamingModule):
                     next_token, gen_sequence[..., offset:offset+1]
                 )
                 prev_offset = offset
-                if callback is not None:
-                    callback(1 + offset - start_offset_sequence, gen_sequence_len - start_offset_sequence)
         unconditional_state.clear()
         out_codes, _, _ = pattern.revert_pattern_sequence(gen_sequence, special_token=unknown_token)
-        print(f' <=> CODES {out_codes.shape=} {out_codes.min()}  {out_codes.max()}\n')   # ARRIVES here also if special
-        out_start_offset = start_offset if remove_prompts else 0
-        out_codes = out_codes[..., out_start_offset:max_gen_len]
-        # ensure the returned codes are all valid
-        # assert (out_codes >= 0).all() and (out_codes <= self.card).all()
-        return out_codes

         gen_codes = torch.full((B, K, max_gen_len), unknown_token, dtype=torch.long, device=device)
+        gen_codes[..., :start_offset] = prompt  # place 0
         gen_sequence, _, mask = pattern.build_pattern_sequence(gen_codes, self.special_token_id)
         start_offset_sequence = pattern.get_first_step_with_timesteps(start_offset)
                 curr_sequence = gen_sequence[..., prev_offset:offset]
                 curr_mask = mask[None, ..., prev_offset:offset].expand(B, -1, -1)
                 next_token = self._sample_next_token(
                     curr_sequence, cfg_conditions, unconditional_state, use_sampling, temp, top_k, top_p,
                     cfg_coef=cfg_coef, two_step_cfg=two_step_cfg)
                 # ensure the tokens that should be masked are properly set to special_token_id
                 # as the model never output special_token_id
+                # valid_mask = mask[..., offset:offset+1].expand(B, -1, -1)
                 # next_token[~valid_mask] = self.special_token_id
                 # print(f'{unconditional_state=} \n
                 # print('Set All to Special')
+                # RUNS with = 2047 just different of self.special_token_id = 2047 = drill noise
+                # special_token_id is filler for CODEBOOK_PATTERN ?
+                # next_token[:] = self.special_token_id    # seanet.embed torch.embedding does not have this - out of bounds in detokenize
                     next_token, gen_sequence[..., offset:offset+1]
                 )
                 prev_offset = offset
         unconditional_state.clear()
+        # revert_pattern_logits ~ NOT CALLED EXPLICIT
         out_codes, _, _ = pattern.revert_pattern_sequence(gen_sequence, special_token=unknown_token)
+        # set(out_codes.unique().tolist()) - set(gen_sequence.unique().tolist())  # set()
+        # UNIQUE are the SAME ---------------?> is it rearrange
+        # ARE SOME PARTS IGNORED OR RE-ARRANGED
+        # print(f'{unknown_token=} {gen_sequence.shape=}  {out_codes.shape=}')
+        # -> unknown tokn = -1 or 2048
+        # unknown_token=-1
+        # print(f' <=> CODES {out_codes.shape=} {out_codes.min()}  {out_codes.max()}\n')   # ARRIVES here also if special
+        # unknown_token=-1 gen_sequence.shape=torch.Size([1, 4, 39])  out_codes.shape=torch.Size([1, 4, 35])
+        # <=> CODES out_codes.shape=torch.Size([1, 4, 35]) 30  2024
+        return out_codes  # supposedly contains extra prompt

audiocraft/vq.py CHANGED Viewed

@@ -64,6 +64,8 @@ class EuclideanCodebook(nn.Module):
         return embed_ind.view(*shape[:-1])
     def dequantize(self, embed_ind):
         quantize = F.embedding(embed_ind, self.embed)
         # print('\n\nDE QUANT\n\n', quantize.shape)  # (1, 35, 128) -> also arrives here for special_token
         return quantize

         return embed_ind.view(*shape[:-1])
     def dequantize(self, embed_ind):
+        # embed_ind[0] = 2048
+        # print('MAX MAX MAX', embed_ind.shape)
         quantize = F.embedding(embed_ind, self.embed)
         # print('\n\nDE QUANT\n\n', quantize.shape)  # (1, 35, 128) -> also arrives here for special_token
         return quantize