Spaces:

Zw07
/

test1

Sleeping

App Files Files Community

Zw07 commited on Oct 14, 2024

Commit

430043a

verified ·

1 Parent(s): 19cea60

Delete src

Browse files

Files changed (14) hide show

src/audioseal/__init__.py +0 -21
src/audioseal/builder.py +0 -118
src/audioseal/cards/audioseal_detector_16bits.yaml +0 -33
src/audioseal/cards/audioseal_wm_16bits.yaml +0 -39
src/audioseal/libs/__init__.py +0 -5
src/audioseal/libs/audiocraft/__init__.py +0 -5
src/audioseal/libs/audiocraft/modules/__init__.py +0 -8
src/audioseal/libs/audiocraft/modules/conv.py +0 -337
src/audioseal/libs/audiocraft/modules/lstm.py +0 -28
src/audioseal/libs/audiocraft/modules/seanet.py +0 -426
src/audioseal/loader.py +0 -227
src/audioseal/models.py +0 -175
src/audioseal/py.typed +0 -0
src/scripts/checkpoints.py +0 -51

src/audioseal/__init__.py DELETED Viewed

@@ -1,21 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-"""
-Watermarking and detection for speech audios
-A Pytorch-based localized algorithm for proactive detection
-of the watermarkings in AI-generated audios, with very fast
-detector.
-"""
-__version__ = "0.1.4"
-from audioseal import builder
-from audioseal.loader import AudioSeal
-from audioseal.models import AudioSealDetector, AudioSealWM, MsgProcessor

src/audioseal/builder.py DELETED Viewed

@@ -1,118 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-from dataclasses import asdict, dataclass, field, is_dataclass
-from typing import Any, Dict, List, Optional
-from omegaconf import DictConfig, OmegaConf
-from torch import device, dtype
-from typing_extensions import TypeAlias
-from audioseal.libs import audiocraft
-from audioseal.models import AudioSealDetector, AudioSealWM, MsgProcessor
-Device: TypeAlias = device
-DataType: TypeAlias = dtype
-@dataclass
-class SEANetConfig:
-    """
-    Map common hparams of SEANet encoder and decoder.
-    """
-    channels: int
-    dimension: int
-    n_filters: int
-    n_residual_layers: int
-    ratios: List[int]
-    activation: str
-    activation_params: Dict[str, float]
-    norm: str
-    norm_params: Dict[str, Any]
-    kernel_size: int
-    last_kernel_size: int
-    residual_kernel_size: int
-    dilation_base: int
-    causal: bool
-    pad_mode: str
-    true_skip: bool
-    compress: int
-    lstm: int
-    disable_norm_outer_blocks: int
-@dataclass
-class DecoderConfig:
-    final_activation: Optional[str]
-    final_activation_params: Optional[dict]
-    trim_right_ratio: float
-@dataclass
-class DetectorConfig:
-    output_dim: int = 32
-@dataclass
-class AudioSealWMConfig:
-    nbits: int
-    seanet: SEANetConfig
-    decoder: DecoderConfig
-@dataclass
-class AudioSealDetectorConfig:
-    nbits: int
-    seanet: SEANetConfig
-    detector: DetectorConfig = field(default_factory=lambda: DetectorConfig())
-def as_dict(obj: Any) -> Dict[str, Any]:
-    if isinstance(obj, dict):
-        return obj
-    if is_dataclass(obj) and not isinstance(obj, type):
-        return asdict(obj)
-    elif isinstance(obj, DictConfig):
-        return OmegaConf.to_container(obj)  # type: ignore
-    else:
-        raise NotImplementedError(f"Unsupported type for config: {type(obj)}")
-def create_generator(
-    config: AudioSealWMConfig,
-    *,
-    device: Optional[Device] = None,
-    dtype: Optional[DataType] = None,
-) -> AudioSealWM:
-    """Create a generator from hparams"""
-    #  Currently the encoder hparams are the same as
-    # SEANet, but this can be changed in the future.
-    encoder = audiocraft.modules.SEANetEncoder(**as_dict(config.seanet))
-    encoder = encoder.to(device=device, dtype=dtype)
-    decoder_config = {**as_dict(config.seanet), **as_dict(config.decoder)}
-    decoder = audiocraft.modules.SEANetDecoder(**as_dict(decoder_config))
-    decoder = decoder.to(device=device, dtype=dtype)
-    msgprocessor = MsgProcessor(nbits=config.nbits, hidden_size=config.seanet.dimension)
-    msgprocessor = msgprocessor.to(device=device, dtype=dtype)
-    return AudioSealWM(encoder=encoder, decoder=decoder, msg_processor=msgprocessor)
-def create_detector(
-    config: AudioSealDetectorConfig,
-    *,
-    device: Optional[Device] = None,
-    dtype: Optional[DataType] = None,
-) -> AudioSealDetector:
-    detector_config = {**as_dict(config.seanet), **as_dict(config.detector)}
-    detector = AudioSealDetector(nbits=config.nbits, **detector_config)
-    detector = detector.to(device=device, dtype=dtype)
-    return detector

src/audioseal/cards/audioseal_detector_16bits.yaml DELETED Viewed

@@ -1,33 +0,0 @@
-# @package __global__
-name: audioseal_detector_16bits
-model_type: seanet
-checkpoint: "https://huggingface.co/facebook/audioseal/resolve/main/detector_base.pth"
-nbits: 16
-seanet:
-  activation: ELU
-  activation_params:
-    alpha: 1.0
-  causal: false
-  channels: 1
-  compress: 2
-  dilation_base: 2
-  dimension: 128
-  disable_norm_outer_blocks: 0
-  kernel_size: 7
-  last_kernel_size: 7
-  lstm: 2
-  n_filters: 32
-  n_residual_layers: 1
-  norm: weight_norm
-  norm_params: {}
-  pad_mode: constant
-  ratios:
-    - 8
-    - 5
-    - 4
-    - 2
-  residual_kernel_size: 3
-  true_skip: true
-detector:
-  output_dim: 32

src/audioseal/cards/audioseal_wm_16bits.yaml DELETED Viewed

@@ -1,39 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-name: audioseal_wm_16bits
-model_type: seanet
-checkpoint: "https://huggingface.co/facebook/audioseal/resolve/main/generator_base.pth"
-nbits: 16
-seanet:
-  activation: ELU
-  activation_params:
-    alpha: 1.0
-  causal: false
-  channels: 1
-  compress: 2
-  dilation_base: 2
-  dimension: 128
-  disable_norm_outer_blocks: 0
-  kernel_size: 7
-  last_kernel_size: 7
-  lstm: 2
-  n_filters: 32
-  n_residual_layers: 1
-  norm: weight_norm
-  norm_params: {}
-  pad_mode: constant
-  ratios:
-    - 8
-    - 5
-    - 4
-    - 2
-  residual_kernel_size: 3
-  true_skip: true
-decoder:
-  final_activation: null
-  final_activation_params: null
-  trim_right_ratio: 1.0

src/audioseal/libs/__init__.py DELETED Viewed

@@ -1,5 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.

src/audioseal/libs/audiocraft/__init__.py DELETED Viewed

@@ -1,5 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.

src/audioseal/libs/audiocraft/modules/__init__.py DELETED Viewed

@@ -1,8 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-from .seanet import SEANetDecoder, SEANetEncoder, SEANetEncoderKeepDimension

src/audioseal/libs/audiocraft/modules/conv.py DELETED Viewed

@@ -1,337 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-#
-# Vendor from https://github.com/facebookresearch/audiocraft
-import math
-import typing as tp
-import warnings
-import torch
-from torch import nn
-from torch.nn import functional as F
-from torch.nn.utils import spectral_norm
-try:
-    from torch.nn.utils.parametrizations import weight_norm
-except ImportError:
-    # Old Pytorch
-    from torch.nn.utils import weight_norm
-CONV_NORMALIZATIONS = frozenset(
-    ["none", "weight_norm", "spectral_norm", "time_group_norm"]
-)
-def apply_parametrization_norm(module: nn.Module, norm: str = "none"):
-    assert norm in CONV_NORMALIZATIONS
-    if norm == "weight_norm":
-        return weight_norm(module)
-    elif norm == "spectral_norm":
-        return spectral_norm(module)
-    else:
-        # We already check was in CONV_NORMALIZATION, so any other choice
-        # doesn't need reparametrization.
-        return module
-def get_norm_module(
-    module: nn.Module, causal: bool = False, norm: str = "none", **norm_kwargs
-):
-    """Return the proper normalization module. If causal is True, this will ensure the returned
-    module is causal, or return an error if the normalization doesn't support causal evaluation.
-    """
-    assert norm in CONV_NORMALIZATIONS
-    if norm == "time_group_norm":
-        if causal:
-            raise ValueError("GroupNorm doesn't support causal evaluation.")
-        assert isinstance(module, nn.modules.conv._ConvNd)
-        return nn.GroupNorm(1, module.out_channels, **norm_kwargs)
-    else:
-        return nn.Identity()
-def get_extra_padding_for_conv1d(
-    x: torch.Tensor, kernel_size: int, stride: int, padding_total: int = 0
-) -> int:
-    """See `pad_for_conv1d`."""
-    length = x.shape[-1]
-    n_frames = (length - kernel_size + padding_total) / stride + 1
-    ideal_length = (math.ceil(n_frames) - 1) * stride + (kernel_size - padding_total)
-    return ideal_length - length
-def pad_for_conv1d(
-    x: torch.Tensor, kernel_size: int, stride: int, padding_total: int = 0
-):
-    """Pad for a convolution to make sure that the last window is full.
-    Extra padding is added at the end. This is required to ensure that we can rebuild
-    an output of the same length, as otherwise, even with padding, some time steps
-    might get removed.
-    For instance, with total padding = 4, kernel size = 4, stride = 2:
-        0 0 1 2 3 4 5 0 0   # (0s are padding)
-        1   2   3           # (output frames of a convolution, last 0 is never used)
-        0 0 1 2 3 4 5 0     # (output of tr. conv., but pos. 5 is going to get removed as padding)
-            1 2 3 4         # once you removed padding, we are missing one time step !
-    """
-    extra_padding = get_extra_padding_for_conv1d(x, kernel_size, stride, padding_total)
-    return F.pad(x, (0, extra_padding))
-def pad1d(
-    x: torch.Tensor,
-    paddings: tp.Tuple[int, int],
-    mode: str = "constant",
-    value: float = 0.0,
-):
-    """Tiny wrapper around F.pad, just to allow for reflect padding on small input.
-    If this is the case, we insert extra 0 padding to the right before the reflection happen.
-    """
-    length = x.shape[-1]
-    padding_left, padding_right = paddings
-    assert padding_left >= 0 and padding_right >= 0, (padding_left, padding_right)
-    if mode == "reflect":
-        max_pad = max(padding_left, padding_right)
-        extra_pad = 0
-        if length <= max_pad:
-            extra_pad = max_pad - length + 1
-            x = F.pad(x, (0, extra_pad))
-        padded = F.pad(x, paddings, mode, value)
-        end = padded.shape[-1] - extra_pad
-        return padded[..., :end]
-    else:
-        return F.pad(x, paddings, mode, value)
-def unpad1d(x: torch.Tensor, paddings: tp.Tuple[int, int]):
-    """Remove padding from x, handling properly zero padding. Only for 1d!"""
-    padding_left, padding_right = paddings
-    assert padding_left >= 0 and padding_right >= 0, (padding_left, padding_right)
-    assert (padding_left + padding_right) <= x.shape[-1]
-    end = x.shape[-1] - padding_right
-    return x[..., padding_left:end]
-class NormConv1d(nn.Module):
-    """Wrapper around Conv1d and normalization applied to this conv
-    to provide a uniform interface across normalization approaches.
-    """
-    def __init__(
-        self,
-        *args,
-        causal: bool = False,
-        norm: str = "none",
-        norm_kwargs: tp.Dict[str, tp.Any] = {},
-        **kwargs,
-    ):
-        super().__init__()
-        self.conv = apply_parametrization_norm(nn.Conv1d(*args, **kwargs), norm)
-        self.norm = get_norm_module(self.conv, causal, norm, **norm_kwargs)
-        self.norm_type = norm
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.norm(x)
-        return x
-class NormConv2d(nn.Module):
-    """Wrapper around Conv2d and normalization applied to this conv
-    to provide a uniform interface across normalization approaches.
-    """
-    def __init__(
-        self,
-        *args,
-        norm: str = "none",
-        norm_kwargs: tp.Dict[str, tp.Any] = {},
-        **kwargs,
-    ):
-        super().__init__()
-        self.conv = apply_parametrization_norm(nn.Conv2d(*args, **kwargs), norm)
-        self.norm = get_norm_module(self.conv, causal=False, norm=norm, **norm_kwargs)
-        self.norm_type = norm
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.norm(x)
-        return x
-class NormConvTranspose1d(nn.Module):
-    """Wrapper around ConvTranspose1d and normalization applied to this conv
-    to provide a uniform interface across normalization approaches.
-    """
-    def __init__(
-        self,
-        *args,
-        causal: bool = False,
-        norm: str = "none",
-        norm_kwargs: tp.Dict[str, tp.Any] = {},
-        **kwargs,
-    ):
-        super().__init__()
-        self.convtr = apply_parametrization_norm(
-            nn.ConvTranspose1d(*args, **kwargs), norm
-        )
-        self.norm = get_norm_module(self.convtr, causal, norm, **norm_kwargs)
-        self.norm_type = norm
-    def forward(self, x):
-        x = self.convtr(x)
-        x = self.norm(x)
-        return x
-class NormConvTranspose2d(nn.Module):
-    """Wrapper around ConvTranspose2d and normalization applied to this conv
-    to provide a uniform interface across normalization approaches.
-    """
-    def __init__(
-        self,
-        *args,
-        norm: str = "none",
-        norm_kwargs: tp.Dict[str, tp.Any] = {},
-        **kwargs,
-    ):
-        super().__init__()
-        self.convtr = apply_parametrization_norm(
-            nn.ConvTranspose2d(*args, **kwargs), norm
-        )
-        self.norm = get_norm_module(self.convtr, causal=False, norm=norm, **norm_kwargs)
-    def forward(self, x):
-        x = self.convtr(x)
-        x = self.norm(x)
-        return x
-class StreamableConv1d(nn.Module):
-    """Conv1d with some builtin handling of asymmetric or causal padding
-    and normalization.
-    """
-    def __init__(
-        self,
-        in_channels: int,
-        out_channels: int,
-        kernel_size: int,
-        stride: int = 1,
-        dilation: int = 1,
-        groups: int = 1,
-        bias: bool = True,
-        causal: bool = False,
-        norm: str = "none",
-        norm_kwargs: tp.Dict[str, tp.Any] = {},
-        pad_mode: str = "reflect",
-    ):
-        super().__init__()
-        # warn user on unusual setup between dilation and stride
-        if stride > 1 and dilation > 1:
-            warnings.warn(
-                "StreamableConv1d has been initialized with stride > 1 and dilation > 1"
-                f" (kernel_size={kernel_size} stride={stride}, dilation={dilation})."
-            )
-        self.conv = NormConv1d(
-            in_channels,
-            out_channels,
-            kernel_size,
-            stride,
-            dilation=dilation,
-            groups=groups,
-            bias=bias,
-            causal=causal,
-            norm=norm,
-            norm_kwargs=norm_kwargs,
-        )
-        self.causal = causal
-        self.pad_mode = pad_mode
-    def forward(self, x):
-        B, C, T = x.shape
-        kernel_size = self.conv.conv.kernel_size[0]
-        stride = self.conv.conv.stride[0]
-        dilation = self.conv.conv.dilation[0]
-        kernel_size = (
-            kernel_size - 1
-        ) * dilation + 1  # effective kernel size with dilations
-        padding_total = kernel_size - stride
-        extra_padding = get_extra_padding_for_conv1d(
-            x, kernel_size, stride, padding_total
-        )
-        if self.causal:
-            # Left padding for causal
-            x = pad1d(x, (padding_total, extra_padding), mode=self.pad_mode)
-        else:
-            # Asymmetric padding required for odd strides
-            padding_right = padding_total // 2
-            padding_left = padding_total - padding_right
-            x = pad1d(
-                x, (padding_left, padding_right + extra_padding), mode=self.pad_mode
-            )
-        return self.conv(x)
-class StreamableConvTranspose1d(nn.Module):
-    """ConvTranspose1d with some builtin handling of asymmetric or causal padding
-    and normalization.
-    """
-    def __init__(
-        self,
-        in_channels: int,
-        out_channels: int,
-        kernel_size: int,
-        stride: int = 1,
-        causal: bool = False,
-        norm: str = "none",
-        trim_right_ratio: float = 1.0,
-        norm_kwargs: tp.Dict[str, tp.Any] = {},
-    ):
-        super().__init__()
-        self.convtr = NormConvTranspose1d(
-            in_channels,
-            out_channels,
-            kernel_size,
-            stride,
-            causal=causal,
-            norm=norm,
-            norm_kwargs=norm_kwargs,
-        )
-        self.causal = causal
-        self.trim_right_ratio = trim_right_ratio
-        assert (
-            self.causal or self.trim_right_ratio == 1.0
-        ), "`trim_right_ratio` != 1.0 only makes sense for causal convolutions"
-        assert self.trim_right_ratio >= 0.0 and self.trim_right_ratio <= 1.0
-    def forward(self, x):
-        kernel_size = self.convtr.convtr.kernel_size[0]
-        stride = self.convtr.convtr.stride[0]
-        padding_total = kernel_size - stride
-        y = self.convtr(x)
-        # We will only trim fixed padding. Extra padding from `pad_for_conv1d` would be
-        # removed at the very end, when keeping only the right length for the output,
-        # as removing it here would require also passing the length at the matching layer
-        # in the encoder.
-        if self.causal:
-            # Trim the padding on the right according to the specified ratio
-            # if trim_right_ratio = 1.0, trim everything from right
-            padding_right = math.ceil(padding_total * self.trim_right_ratio)
-            padding_left = padding_total - padding_right
-            y = unpad1d(y, (padding_left, padding_right))
-        else:
-            # Asymmetric padding required for odd strides
-            padding_right = padding_total // 2
-            padding_left = padding_total - padding_right
-            y = unpad1d(y, (padding_left, padding_right))
-        return y

src/audioseal/libs/audiocraft/modules/lstm.py DELETED Viewed

@@ -1,28 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-#
-# Vendor from https://github.com/facebookresearch/audiocraft
-from torch import nn
-class StreamableLSTM(nn.Module):
-    """LSTM without worrying about the hidden state, nor the layout of the data.
-    Expects input as convolutional layout.
-    """
-    def __init__(self, dimension: int, num_layers: int = 2, skip: bool = True):
-        super().__init__()
-        self.skip = skip
-        self.lstm = nn.LSTM(dimension, dimension, num_layers)
-    def forward(self, x):
-        x = x.permute(2, 0, 1)
-        y, _ = self.lstm(x)
-        if self.skip:
-            y = y + x
-        y = y.permute(1, 2, 0)
-        return y

src/audioseal/libs/audiocraft/modules/seanet.py DELETED Viewed

@@ -1,426 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-#
-# Vendor from https://github.com/facebookresearch/audiocraft
-import math
-import typing as tp
-import numpy as np
-import torch.nn as nn
-from audioseal.libs.audiocraft.modules.conv import (
-    StreamableConv1d,
-    StreamableConvTranspose1d,
-)
-from audioseal.libs.audiocraft.modules.lstm import StreamableLSTM
-class SEANetResnetBlock(nn.Module):
-    """Residual block from SEANet model.
-    Args:
-        dim (int): Dimension of the input/output.
-        kernel_sizes (list): List of kernel sizes for the convolutions.
-        dilations (list): List of dilations for the convolutions.
-        activation (str): Activation function.
-        activation_params (dict): Parameters to provide to the activation function.
-        norm (str): Normalization method.
-        norm_params (dict): Parameters to provide to the underlying normalization used along with the convolution.
-        causal (bool): Whether to use fully causal convolution.
-        pad_mode (str): Padding mode for the convolutions.
-        compress (int): Reduced dimensionality in residual branches (from Demucs v3).
-        true_skip (bool): Whether to use true skip connection or a simple
-            (streamable) convolution as the skip connection.
-    """
-    def __init__(
-        self,
-        dim: int,
-        kernel_sizes: tp.List[int] = [3, 1],
-        dilations: tp.List[int] = [1, 1],
-        activation: str = "ELU",
-        activation_params: dict = {"alpha": 1.0},
-        norm: str = "none",
-        norm_params: tp.Dict[str, tp.Any] = {},
-        causal: bool = False,
-        pad_mode: str = "reflect",
-        compress: int = 2,
-        true_skip: bool = True,
-    ):
-        super().__init__()
-        assert len(kernel_sizes) == len(
-            dilations
-        ), "Number of kernel sizes should match number of dilations"
-        act = getattr(nn, activation)
-        hidden = dim // compress
-        block = []
-        for i, (kernel_size, dilation) in enumerate(zip(kernel_sizes, dilations)):
-            in_chs = dim if i == 0 else hidden
-            out_chs = dim if i == len(kernel_sizes) - 1 else hidden
-            block += [
-                act(**activation_params),
-                StreamableConv1d(
-                    in_chs,
-                    out_chs,
-                    kernel_size=kernel_size,
-                    dilation=dilation,
-                    norm=norm,
-                    norm_kwargs=norm_params,
-                    causal=causal,
-                    pad_mode=pad_mode,
-                ),
-            ]
-        self.block = nn.Sequential(*block)
-        self.shortcut: nn.Module
-        if true_skip:
-            self.shortcut = nn.Identity()
-        else:
-            self.shortcut = StreamableConv1d(
-                dim,
-                dim,
-                kernel_size=1,
-                norm=norm,
-                norm_kwargs=norm_params,
-                causal=causal,
-                pad_mode=pad_mode,
-            )
-    def forward(self, x):
-        return self.shortcut(x) + self.block(x)
-class SEANetEncoder(nn.Module):
-    """SEANet encoder.
-    Args:
-        channels (int): Audio channels.
-        dimension (int): Intermediate representation dimension.
-        n_filters (int): Base width for the model.
-        n_residual_layers (int): nb of residual layers.
-        ratios (Sequence[int]): kernel size and stride ratios. The encoder uses downsampling ratios instead of
-            upsampling ratios, hence it will use the ratios in the reverse order to the ones specified here
-            that must match the decoder order. We use the decoder order as some models may only employ the decoder.
-        activation (str): Activation function.
-        activation_params (dict): Parameters to provide to the activation function.
-        norm (str): Normalization method.
-        norm_params (dict): Parameters to provide to the underlying normalization used along with the convolution.
-        kernel_size (int): Kernel size for the initial convolution.
-        last_kernel_size (int): Kernel size for the initial convolution.
-        residual_kernel_size (int): Kernel size for the residual layers.
-        dilation_base (int): How much to increase the dilation with each layer.
-        causal (bool): Whether to use fully causal convolution.
-        pad_mode (str): Padding mode for the convolutions.
-        true_skip (bool): Whether to use true skip connection or a simple
-            (streamable) convolution as the skip connection in the residual network blocks.
-        compress (int): Reduced dimensionality in residual branches (from Demucs v3).
-        lstm (int): Number of LSTM layers at the end of the encoder.
-        disable_norm_outer_blocks (int): Number of blocks for which we don't apply norm.
-            For the encoder, it corresponds to the N first blocks.
-    """
-    def __init__(
-        self,
-        channels: int = 1,
-        dimension: int = 128,
-        n_filters: int = 32,
-        n_residual_layers: int = 3,
-        ratios: tp.List[int] = [8, 5, 4, 2],
-        activation: str = "ELU",
-        activation_params: dict = {"alpha": 1.0},
-        norm: str = "none",
-        norm_params: tp.Dict[str, tp.Any] = {},
-        kernel_size: int = 7,
-        last_kernel_size: int = 7,
-        residual_kernel_size: int = 3,
-        dilation_base: int = 2,
-        causal: bool = False,
-        pad_mode: str = "reflect",
-        true_skip: bool = True,
-        compress: int = 2,
-        lstm: int = 0,
-        disable_norm_outer_blocks: int = 0,
-    ):
-        super().__init__()
-        self.channels = channels
-        self.dimension = dimension
-        self.n_filters = n_filters
-        self.ratios = list(reversed(ratios))
-        del ratios
-        self.n_residual_layers = n_residual_layers
-        self.hop_length = np.prod(self.ratios)
-        self.n_blocks = len(self.ratios) + 2  # first and last conv + residual blocks
-        self.disable_norm_outer_blocks = disable_norm_outer_blocks
-        assert (
-            self.disable_norm_outer_blocks >= 0
-            and self.disable_norm_outer_blocks <= self.n_blocks
-        ), (
-            "Number of blocks for which to disable norm is invalid."
-            "It should be lower or equal to the actual number of blocks in the network and greater or equal to 0."
-        )
-        act = getattr(nn, activation)
-        mult = 1
-        model: tp.List[nn.Module] = [
-            StreamableConv1d(
-                channels,
-                mult * n_filters,
-                kernel_size,
-                norm="none" if self.disable_norm_outer_blocks >= 1 else norm,
-                norm_kwargs=norm_params,
-                causal=causal,
-                pad_mode=pad_mode,
-            )
-        ]
-        # Downsample to raw audio scale
-        for i, ratio in enumerate(self.ratios):
-            block_norm = "none" if self.disable_norm_outer_blocks >= i + 2 else norm
-            # Add residual layers
-            for j in range(n_residual_layers):
-                model += [
-                    SEANetResnetBlock(
-                        mult * n_filters,
-                        kernel_sizes=[residual_kernel_size, 1],
-                        dilations=[dilation_base**j, 1],
-                        norm=block_norm,
-                        norm_params=norm_params,
-                        activation=activation,
-                        activation_params=activation_params,
-                        causal=causal,
-                        pad_mode=pad_mode,
-                        compress=compress,
-                        true_skip=true_skip,
-                    )
-                ]
-            # Add downsampling layers
-            model += [
-                act(**activation_params),
-                StreamableConv1d(
-                    mult * n_filters,
-                    mult * n_filters * 2,
-                    kernel_size=ratio * 2,
-                    stride=ratio,
-                    norm=block_norm,
-                    norm_kwargs=norm_params,
-                    causal=causal,
-                    pad_mode=pad_mode,
-                ),
-            ]
-            mult *= 2
-        if lstm:
-            model += [StreamableLSTM(mult * n_filters, num_layers=lstm)]
-        model += [
-            act(**activation_params),
-            StreamableConv1d(
-                mult * n_filters,
-                dimension,
-                last_kernel_size,
-                norm=(
-                    "none" if self.disable_norm_outer_blocks == self.n_blocks else norm
-                ),
-                norm_kwargs=norm_params,
-                causal=causal,
-                pad_mode=pad_mode,
-            ),
-        ]
-        self.model = nn.Sequential(*model)
-    def forward(self, x):
-        return self.model(x)
-class SEANetEncoderKeepDimension(SEANetEncoder):
-    """
-    similar architecture to the SEANet encoder but with an extra step that
-    projects the output dimension to the same input dimension by repeating
-    the sequential
-    Args:
-        SEANetEncoder (_type_): _description_
-    """
-    def __init__(self, *args, **kwargs):
-        self.output_dim = kwargs.pop("output_dim")
-        super().__init__(*args, **kwargs)
-        # Adding a reverse convolution layer
-        self.reverse_convolution = nn.ConvTranspose1d(
-            in_channels=self.dimension,
-            out_channels=self.output_dim,
-            kernel_size=math.prod(self.ratios),
-            stride=math.prod(self.ratios),
-            padding=0,
-        )
-    def forward(self, x):
-        orig_nframes = x.shape[-1]
-        x = self.model(x)
-        x = self.reverse_convolution(x)
-        # make sure dim didn't change
-        return x[:, :, :orig_nframes]
-class SEANetDecoder(nn.Module):
-    """SEANet decoder.
-    Args:
-        channels (int): Audio channels.
-        dimension (int): Intermediate representation dimension.
-        n_filters (int): Base width for the model.
-        n_residual_layers (int): nb of residual layers.
-        ratios (Sequence[int]): kernel size and stride ratios.
-        activation (str): Activation function.
-        activation_params (dict): Parameters to provide to the activation function.
-        final_activation (str): Final activation function after all convolutions.
-        final_activation_params (dict): Parameters to provide to the activation function.
-        norm (str): Normalization method.
-        norm_params (dict): Parameters to provide to the underlying normalization used along with the convolution.
-        kernel_size (int): Kernel size for the initial convolution.
-        last_kernel_size (int): Kernel size for the initial convolution.
-        residual_kernel_size (int): Kernel size for the residual layers.
-        dilation_base (int): How much to increase the dilation with each layer.
-        causal (bool): Whether to use fully causal convolution.
-        pad_mode (str): Padding mode for the convolutions.
-        true_skip (bool): Whether to use true skip connection or a simple.
-            (streamable) convolution as the skip connection in the residual network blocks.
-        compress (int): Reduced dimensionality in residual branches (from Demucs v3).
-        lstm (int): Number of LSTM layers at the end of the encoder.
-        disable_norm_outer_blocks (int): Number of blocks for which we don't apply norm.
-            For the decoder, it corresponds to the N last blocks.
-        trim_right_ratio (float): Ratio for trimming at the right of the transposed convolution under the causal setup.
-            If equal to 1.0, it means that all the trimming is done at the right.
-    """
-    def __init__(
-        self,
-        channels: int = 1,
-        dimension: int = 128,
-        n_filters: int = 32,
-        n_residual_layers: int = 3,
-        ratios: tp.List[int] = [8, 5, 4, 2],
-        activation: str = "ELU",
-        activation_params: dict = {"alpha": 1.0},
-        final_activation: tp.Optional[str] = None,
-        final_activation_params: tp.Optional[dict] = None,
-        norm: str = "none",
-        norm_params: tp.Dict[str, tp.Any] = {},
-        kernel_size: int = 7,
-        last_kernel_size: int = 7,
-        residual_kernel_size: int = 3,
-        dilation_base: int = 2,
-        causal: bool = False,
-        pad_mode: str = "reflect",
-        true_skip: bool = True,
-        compress: int = 2,
-        lstm: int = 0,
-        disable_norm_outer_blocks: int = 0,
-        trim_right_ratio: float = 1.0,
-    ):
-        super().__init__()
-        self.dimension = dimension
-        self.channels = channels
-        self.n_filters = n_filters
-        self.ratios = ratios
-        del ratios
-        self.n_residual_layers = n_residual_layers
-        self.hop_length = np.prod(self.ratios)
-        self.n_blocks = len(self.ratios) + 2  # first and last conv + residual blocks
-        self.disable_norm_outer_blocks = disable_norm_outer_blocks
-        assert (
-            self.disable_norm_outer_blocks >= 0
-            and self.disable_norm_outer_blocks <= self.n_blocks
-        ), (
-            "Number of blocks for which to disable norm is invalid."
-            "It should be lower or equal to the actual number of blocks in the network and greater or equal to 0."
-        )
-        act = getattr(nn, activation)
-        mult = int(2 ** len(self.ratios))
-        model: tp.List[nn.Module] = [
-            StreamableConv1d(
-                dimension,
-                mult * n_filters,
-                kernel_size,
-                norm=(
-                    "none" if self.disable_norm_outer_blocks == self.n_blocks else norm
-                ),
-                norm_kwargs=norm_params,
-                causal=causal,
-                pad_mode=pad_mode,
-            )
-        ]
-        if lstm:
-            model += [StreamableLSTM(mult * n_filters, num_layers=lstm)]
-        # Upsample to raw audio scale
-        for i, ratio in enumerate(self.ratios):
-            block_norm = (
-                "none"
-                if self.disable_norm_outer_blocks >= self.n_blocks - (i + 1)
-                else norm
-            )
-            # Add upsampling layers
-            model += [
-                act(**activation_params),
-                StreamableConvTranspose1d(
-                    mult * n_filters,
-                    mult * n_filters // 2,
-                    kernel_size=ratio * 2,
-                    stride=ratio,
-                    norm=block_norm,
-                    norm_kwargs=norm_params,
-                    causal=causal,
-                    trim_right_ratio=trim_right_ratio,
-                ),
-            ]
-            # Add residual layers
-            for j in range(n_residual_layers):
-                model += [
-                    SEANetResnetBlock(
-                        mult * n_filters // 2,
-                        kernel_sizes=[residual_kernel_size, 1],
-                        dilations=[dilation_base**j, 1],
-                        activation=activation,
-                        activation_params=activation_params,
-                        norm=block_norm,
-                        norm_params=norm_params,
-                        causal=causal,
-                        pad_mode=pad_mode,
-                        compress=compress,
-                        true_skip=true_skip,
-                    )
-                ]
-            mult //= 2
-        # Add final layers
-        model += [
-            act(**activation_params),
-            StreamableConv1d(
-                n_filters,
-                channels,
-                last_kernel_size,
-                norm="none" if self.disable_norm_outer_blocks >= 1 else norm,
-                norm_kwargs=norm_params,
-                causal=causal,
-                pad_mode=pad_mode,
-            ),
-        ]
-        # Add optional final activation to decoder (eg. tanh)
-        if final_activation is not None:
-            final_act = getattr(nn, final_activation)
-            final_activation_params = final_activation_params or {}
-            model += [final_act(**final_activation_params)]
-        self.model = nn.Sequential(*model)
-    def forward(self, z):
-        y = self.model(z)
-        return y

src/audioseal/loader.py DELETED Viewed

@@ -1,227 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-import os
-from dataclasses import fields
-from hashlib import sha1
-from pathlib import Path
-from typing import (  # type: ignore[attr-defined]
-    Any,
-    Dict,
-    List,
-    Optional,
-    Tuple,
-    Type,
-    TypeVar,
-    Union,
-    cast,
-)
-from urllib.parse import urlparse  # noqa: F401
-import torch
-from omegaconf import DictConfig, OmegaConf
-import audioseal
-from audioseal.builder import (
-    AudioSealDetectorConfig,
-    AudioSealWMConfig,
-    create_detector,
-    create_generator,
-)
-from audioseal.models import AudioSealDetector, AudioSealWM
-AudioSealT = TypeVar("AudioSealT", AudioSealWMConfig, AudioSealDetectorConfig)
-class ModelLoadError(RuntimeError):
-    """Raised when the model loading fails"""
-def _get_path_from_env(var_name: str) -> Optional[Path]:
-    pathname = os.getenv(var_name)
-    if not pathname:
-        return None
-    try:
-        return Path(pathname)
-    except ValueError as ex:
-        raise RuntimeError(f"Expect valid pathname, get '{pathname}'.") from ex
-def _get_cache_dir(env_names: List[str]):
-    """Re-use cache dir from a list of existing caches"""
-    for env in env_names:
-        cache_dir = _get_path_from_env(env)
-        if cache_dir:
-            break
-    else:
-        cache_dir = Path("~/.cache").expanduser().resolve()
-    # Create a sub-dir to not mess up with existing caches
-    cache_dir = cache_dir / "audioseal"
-    cache_dir.mkdir(exist_ok=True, parents=True)
-    return cache_dir
-def load_model_checkpoint(
-    model_path: Union[Path, str],
-    device: Union[str, torch.device] = "cpu",
-):
-    if Path(model_path).is_file():
-        return torch.load(model_path, map_location=device)
-    cache_dir = _get_cache_dir(
-        ["AUDIOSEAL_CACHE_DIR", "AUDIOCRAFT_CACHE_DIR", "XDG_CACHE_HOME"]
-    )
-    parts = urlparse(str(model_path))
-    if parts.scheme == "https":
-        hash_ = sha1(parts.path.encode()).hexdigest()[:24]
-        return torch.hub.load_state_dict_from_url(
-            str(model_path), model_dir=cache_dir, map_location=device, file_name=hash_
-        )
-    elif str(model_path).startswith("facebook/audioseal/"):
-        hf_filename = str(model_path)[len("facebook/audioseal/") :]
-        try:
-            from huggingface_hub import hf_hub_download
-        except ModuleNotFoundError:
-            print(
-                f"The model path {model_path} seems to be a direct HF path, "
-                "but you do not install Huggingface_hub. Install with for example "
-                "`pip install huggingface_hub` to use this feature."
-            )
-        file = hf_hub_download(
-            repo_id="facebook/audioseal",
-            filename=hf_filename,
-            cache_dir=cache_dir,
-            library_name="audioseal",
-            library_version=audioseal.__version__,
-        )
-        return torch.load(file, map_location=device)
-    else:
-        raise ModelLoadError(f"Path or uri {model_path} is unknown or does not exist")
-def load_local_model_config(model_card: str) -> Optional[DictConfig]:
-    config_file = Path(__file__).parent / "cards" / (model_card + ".yaml")
-    if Path(config_file).is_file():
-        return cast(DictConfig, OmegaConf.load(config_file.resolve()))
-    else:
-        return None
-class AudioSeal:
-    @staticmethod
-    def parse_model(
-        model_card_or_path: str,
-        model_type: Type[AudioSealT],
-        nbits: Optional[int] = None,
-    ) -> Tuple[Dict[str, Any], AudioSealT]:
-        """
-        Parse the information from the model card or checkpoint path using
-        the schema `model_type` that defines the model type
-        """
-        # Get the raw checkpoint and config from the local model cards
-        config = load_local_model_config(model_card_or_path)
-        if config:
-            assert "checkpoint" in config, f"Checkpoint missing in {model_card_or_path}"
-            config_dict = OmegaConf.to_container(config)
-            assert isinstance(
-                config_dict, dict
-            ), f"Cannot parse config from {model_card_or_path}"
-            checkpoint = config_dict.pop("checkpoint")
-            checkpoint = load_model_checkpoint(checkpoint)
-        # Get the raw checkpoint and config from the checkpoint path
-        else:
-            config_dict = {}
-            checkpoint = load_model_checkpoint(model_card_or_path)
-        if "xp.cfg" in checkpoint:
-            config_dict = {**checkpoint["xp.cfg"], **config_dict}  # type: ignore
-        model_config = AudioSeal.parse_config(config_dict, config_type=model_type, nbits=nbits)  # type: ignore
-        if "model" in checkpoint:
-            checkpoint = checkpoint["model"]
-        return checkpoint, model_config
-    @staticmethod
-    def parse_config(
-        config: Dict[str, Any],
-        config_type: Type[AudioSealT],
-        nbits: Optional[int] = None,
-    ) -> AudioSealT:
-        assert "seanet" in config, f"missing seanet backbone config in {config}"
-        # Patch 1: Resolve the variables in the checkpoint
-        config = OmegaConf.create(config)  # type: ignore
-        OmegaConf.resolve(config)  # type: ignore
-        config = OmegaConf.to_container(config)  # type: ignore
-        # Patch 2: Put decoder, encoder and detector outside seanet
-        seanet_config = config["seanet"]
-        for key_to_patch in ["encoder", "decoder", "detector"]:
-            if key_to_patch in seanet_config:
-                config_to_patch = config.get(key_to_patch) or {}
-                config[key_to_patch] = {
-                    **config_to_patch,
-                    **seanet_config.pop(key_to_patch),
-                }
-        config["seanet"] = seanet_config
-        # Patch 3: Put nbits into config if specified
-        if nbits and "nbits" not in config:
-            config["nbits"] = nbits
-        # remove attributes not related to the model_type
-        result_config = {}
-        assert config, f"Empty config"
-        for field in fields(config_type):
-            if field.name in config:
-                result_config[field.name] = config[field.name]
-        schema = OmegaConf.structured(config_type)
-        schema.merge_with(result_config)
-        return schema
-    @staticmethod
-    def load_generator(
-        model_card_or_path: str,
-        nbits: Optional[int] = None,
-    ) -> AudioSealWM:
-        """Load the AudioSeal generator from the model card"""
-        checkpoint, config = AudioSeal.parse_model(
-            model_card_or_path,
-            AudioSealWMConfig,
-            nbits=nbits,
-        )
-        model = create_generator(config)
-        model.load_state_dict(checkpoint)
-        return model
-    @staticmethod
-    def load_detector(
-        model_card_or_path: str,
-        nbits: Optional[int] = None,
-    ) -> AudioSealDetector:
-        checkpoint, config = AudioSeal.parse_model(
-            model_card_or_path,
-            AudioSealDetectorConfig,
-            nbits=nbits,
-        )
-        model = create_detector(config)
-        model.load_state_dict(checkpoint)
-        return model

src/audioseal/models.py DELETED Viewed

@@ -1,175 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-import logging
-from typing import Optional, Tuple
-import librosa
-import numpy as np
-import torch
-from audioseal.libs.audiocraft.modules.seanet import SEANetEncoderKeepDimension
-logger = logging.getLogger("Audioseal")
-COMPATIBLE_WARNING = """
-AudioSeal is designed to work at a sample rate 16khz.
-Implicit sampling rate usage is deprecated and will be removed in future version.
-To remove this warning please add this argument to the function call:
-sample_rate = your_sample_rate
-"""
-class MsgProcessor(torch.nn.Module):
-    def __init__(self, nbits: int, hidden_size: int):
-        super().__init__()
-        assert nbits > 0, "MsgProcessor should not be built in 0bit watermarking"
-        self.nbits = nbits
-        self.hidden_size = hidden_size
-        self.msg_processor = torch.nn.Embedding(2 * nbits, hidden_size)
-    def forward(self, hidden: torch.Tensor, msg: torch.Tensor) -> torch.Tensor:
-        indices = 2 * torch.arange(msg.shape[-1]).to(msg.device)
-        indices = indices.repeat(msg.shape[0], 1)
-        indices = (indices + msg).long()
-        msg_aux = self.msg_processor(indices)
-        msg_aux = msg_aux.sum(dim=-2)
-        msg_aux = msg_aux.unsqueeze(-1).repeat(1, 1, hidden.shape[2])
-        hidden = hidden + msg_aux
-        return hidden
-def compute_stft_energy(audio: torch.Tensor, sr: int, n_fft: int = 2048, hop_length: int = 512) -> torch.Tensor:
-    batch_size = audio.size(0)
-    energy_values = []
-    for i in range(batch_size):
-        y = audio[i].cpu().numpy()
-        stft = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length))
-        frame_energy = torch.tensor(np.sum(stft ** 2, axis=0), device=audio.device)
-        energy_values.append(frame_energy)
-    energy_values = torch.stack(energy_values, dim=0)
-    return energy_values
-def compute_adaptive_alpha_librosa(energy_values: torch.Tensor, min_alpha: float = 0.5, max_alpha: float = 1.5) -> torch.Tensor:
-    normalized_energy = (energy_values - energy_values.min(dim=1, keepdim=True)[0]) / (
-        energy_values.max(dim=1, keepdim=True)[0] - energy_values.min(dim=1, keepdim=True)[0] + 1e-6
-    )
-    alpha_values = min_alpha + normalized_energy * (max_alpha - min_alpha)
-    return alpha_values
-class AudioSealWM(torch.nn.Module):
-    def __init__(self, encoder: torch.nn.Module, decoder: torch.nn.Module, msg_processor: Optional[torch.nn.Module] = None):
-        super().__init__()
-        self.encoder = encoder
-        self.decoder = decoder
-        self.msg_processor = msg_processor
-        self._message: Optional[torch.Tensor] = None
-        self._original_payload: Optional[torch.Tensor] = None
-    @property
-    def message(self) -> Optional[torch.Tensor]:
-        return self._message
-    @message.setter
-    def message(self, message: torch.Tensor) -> None:
-        self._message = message
-    def get_original_payload(self) -> Optional[torch.Tensor]:
-        return self._original_payload
-    def get_watermark(self, x: torch.Tensor, sample_rate: Optional[int] = None, message: Optional[torch.Tensor] = None) -> torch.Tensor:
-        # Call the forward method manually here
-        return self.forward(x, sample_rate, message)
-    def forward(self, x: torch.Tensor, sample_rate: Optional[int] = None, message: Optional[torch.Tensor] = None,
-                n_fft: int = 2048, hop_length: int = 512, min_alpha: float = 0.5, max_alpha: float = 1.5) -> torch.Tensor:
-        print("Forward method called!")  # This should always print if forward is being executed
-        if sample_rate is None:
-            logger.warning(COMPATIBLE_WARNING)
-            sample_rate = 16_000
-        if sample_rate != 16000:
-            x_np = x.detach().cpu().numpy()  # Ensure detached tensor is converted to NumPy array
-            resampled_x = librosa.resample(x_np, orig_sr=sample_rate, target_sr=16000)
-            x = torch.tensor(resampled_x, device=x.device)
-        hidden = self.encoder(x)
-        if self.msg_processor is not None:
-            if message is None:
-                if self.message is None:
-                    message = torch.randint(0, 2, (x.shape[0], self.msg_processor.nbits), device=x.device)
-                else:
-                    message = self.message.to(device=x.device)
-            else:
-                message = message.to(device=x.device)
-            hidden = self.msg_processor(hidden, message)
-            self._original_payload = message
-        watermark = self.decoder(hidden)
-        if sample_rate != 16000:
-            watermark_np = watermark.detach().cpu().numpy()
-            resampled_watermark = librosa.resample(watermark_np, orig_sr=16000, target_sr=sample_rate)
-            watermark = torch.tensor(resampled_watermark, device=watermark.device)
-        energy_values = compute_stft_energy(x, sr=sample_rate, n_fft=n_fft, hop_length=hop_length)
-        adaptive_alpha = compute_adaptive_alpha_librosa(energy_values, min_alpha=min_alpha, max_alpha=max_alpha)
-        # Adjust stretched_alpha to match the dimensions of watermark
-        num_frames = adaptive_alpha.size(1)
-        stretched_alpha = torch.repeat_interleave(adaptive_alpha, hop_length, dim=1)
-        stretched_alpha = stretched_alpha[:, :x.size(1)]
-        # Make sure dimensions align
-        if stretched_alpha.dim() < watermark.dim():
-            stretched_alpha = stretched_alpha.unsqueeze(-1)  # Add extra dimension
-        stretched_alpha = stretched_alpha.expand_as(watermark)  # Match dimensions
-        print(f"stretched_alpha shape: {stretched_alpha.shape} for debugging")
-        watermarked_audio = x + stretched_alpha * watermark
-        return watermarked_audio
-class AudioSealDetector(torch.nn.Module):
-    def __init__(self, *args, nbits: int = 0, **kwargs):
-        super().__init__()
-        encoder = SEANetEncoderKeepDimension(*args, **kwargs)
-        last_layer = torch.nn.Conv1d(encoder.output_dim, 2 + nbits, 1)
-        self.detector = torch.nn.Sequential(encoder, last_layer)
-        self.nbits = nbits
-    def detect_watermark(self, x: torch.Tensor, sample_rate: Optional[int] = None, message_threshold: float = 0.5) -> Tuple[float, torch.Tensor]:
-        result, message = self.forward(x, sample_rate=sample_rate)
-        print("Forward method in detector called!")
-        detected = (torch.count_nonzero(torch.gt(result[:, 1, :], 0.5)) / result.shape[-1])
-        detect_prob = detected.cpu().item()
-        message = torch.gt(message, message_threshold).int()
-        return detect_prob, message
-    def decode_message(self, result: torch.Tensor) -> torch.Tensor:
-        assert (result.dim() > 2 and result.shape[1] == self.nbits) or (
-            result.dim() == 2 and result.shape[0] == self.nbits
-        ), f"Expect message of size [,{self.nbits}, frames] (get {result.size()})"
-        decoded_message = result.mean(dim=-1)
-        return torch.sigmoid(decoded_message)
-    def forward(self, x: torch.Tensor, sample_rate: Optional[int] = None) -> Tuple[torch.Tensor, torch.Tensor]:
-        if sample_rate is None:
-            logger.warning(COMPATIBLE_WARNING)
-            sample_rate = 16_000
-        if sample_rate != 16000:
-            x_np = x.detach().cpu().numpy()
-            resampled_x = librosa.resample(x_np, orig_sr=sample_rate, target_sr=16000)
-            x = torch.tensor(resampled_x, device=x.device)
-        result = self.detector(x)
-        result[:, :2, :] = torch.softmax(result[:, :2, :], dim=1)
-        message = self.decode_message(result[:, 2:, :])
-        return result[:, :2, :], message

src/audioseal/py.typed DELETED Viewed

File without changes

src/scripts/checkpoints.py DELETED Viewed

@@ -1,51 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-from pathlib import Path
-import torch
-def convert(checkpoint: str, outdir: str, suffix: str = "base"):
-    """Convert the checkpoint to generator and detector"""
-    outdir_path = Path(outdir)
-    ckpt = torch.load(checkpoint)
-    # keep inference-related params only
-    infer_cfg = {
-        "seanet": ckpt["xp.cfg"]["seanet"],
-        "channels": ckpt["xp.cfg"]["channels"],
-        "dtype": ckpt["xp.cfg"]["dtype"],
-        "sample_rate": ckpt["xp.cfg"]["sample_rate"],
-    }
-    generator_ckpt = {"xp.cfg": infer_cfg, "model": {}}
-    detector_ckpt = {"xp.cfg": infer_cfg, "model": {}}
-    for layer in ckpt["model"].keys():
-        if layer.startswith("detector"):
-            new_layer = layer[9:]
-            detector_ckpt["model"][new_layer] = ckpt["model"][layer]  # type: ignore
-        elif layer == "msg_processor.msg_processor.0.weight":
-            generator_ckpt["model"]["msg_processor.msg_processor.weight"] = ckpt[  # type: ignore
-                "model"
-            ][
-                layer
-            ]
-        else:
-            assert layer.startswith("generator"), f"Invalid layer: {layer}"
-            new_layer = layer[10:]
-            generator_ckpt["model"][new_layer] = ckpt["model"][layer]  # type: ignore
-    torch.save(generator_ckpt, outdir_path / (f"checkpoint_generator_{suffix}.pth"))
-    torch.save(detector_ckpt, outdir_path / (f"checkpoint_detector_{suffix}.pth"))
-if __name__ == "__main__":
-    import fire
-    fire.Fire(convert)