Add new SentenceTransformer model

Browse files

Files changed (11) hide show

README.md +679 -0
config.json +90 -0
config_sentence_transformers.json +14 -0
merges.txt +0 -0
model.safetensors +3 -0
modules.json +8 -0
sentence_bert_config.json +14 -0
special_tokens_map.json +51 -0
tokenizer.json +0 -0
tokenizer_config.json +63 -0
vocab.json +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,679 @@

+---
+language:
+- en
+license: apache-2.0
+tags:
+- sentence-transformers
+- sentence-similarity
+- feature-extraction
+- dense
+- generated_from_trainer
+- dataset_size:132553
+- loss:MultipleNegativesSymmetricRankingLoss
+base_model: laion/clap-htsat-fused
+widget:
+- source_sentence: HE WAS OUT OF HIS MIND WITH SOMETHING HE OVERHEARD ABOUT EATING
+    PEOPLE'S FLESH AND DRINKING BLOOD WHAT'S THE GOOD OF TALKING LIKE THAT
+  sentences:
+  - NESTORIUS WHO DEPENDED ON THE NEAR APPROACH OF HIS EASTERN FRIENDS PERSISTED LIKE
+    HIS PREDECESSOR CHRYSOSTOM TO DISCLAIM THE JURISDICTION AND TO DISOBEY THE SUMMONS
+    OF HIS ENEMIES THEY HASTENED HIS TRIAL AND HIS ACCUSER PRESIDED IN THE SEAT OF
+    JUDGMENT
+  - THEN BACK I TURNED MY FACE TO THOSE HIGH THINGS WHICH MOVED THEMSELVES TOWARDS
+    US SO SEDATELY THEY HAD BEEN DISTANCED BY NEW WEDDED BRIDES
+  - THE PROGRESS OF PRESIDENT DAVIS TO THE NEW CAPITAL SET IN THE VERY FACE OF THE
+    FOE WAS TO BE ONE HUGE TRIUMPH OF FAITH AND LOYALTY
+- source_sentence: I BELIEVE THE SERIOUSNESS OF THE AMERICANS ARISES PARTLY FROM THEIR
+    PRIDE
+  sentences:
+  - YOU HAVE BEEN TO THE HOTEL HE BURST OUT YOU HAVE SEEN CATHERINE
+  - WHAT DO YOU MEAN SIR
+  - A HARSH LAUGH FROM COMRADE OSSIPON CUT THE TIRADE DEAD SHORT IN A SUDDEN FALTERING
+    OF THE TONGUE AND A BEWILDERED UNSTEADINESS OF THE APOSTLE'S MILDLY EXALTED EYES
+- source_sentence: BUT YOU OUGHT TO HAVE KNOWN THAT WE ARE ONLY HALF AN HOUR BEHIND
+    YOU AT SYDENHAM IN THE MATTER OF NEWS
+  sentences:
+  - DOWN BELOW IN THE QUIET NARROW STREET MEASURED FOOTSTEPS APPROACHED THE HOUSE
+    THEN DIED AWAY UNHURRIED AND FIRM AS IF THE PASSER BY HAD STARTED TO PACE OUT
+    ALL ETERNITY FROM GAS LAMP TO GAS LAMP IN A NIGHT WITHOUT END AND THE DROWSY TICKING
+    OF THE OLD CLOCK ON THE LANDING BECAME DISTINCTLY AUDIBLE IN THE BEDROOM
+  - IT WAS A SUMMER NIGHT AND THE GUESTS WERE WANDERING IN AND OUT AT WILL AND THROUGH
+    HOUSE AND GARDEN AMID LOVELY THINGS OF ALL COLORS AND ODORS
+  - IF A MAN WERE SLAIN IN BATTLE IT WAS AN OLD CUSTOM TO PLACE HIS BODY AGAINST A
+    TREE OR ROCK IN A SITTING POSITION ALWAYS FACING THE ENEMY TO INDICATE HIS UNDAUNTED
+    DEFIANCE AND BRAVERY EVEN IN DEATH
+- source_sentence: THE MERCHANT'S DAUGHTER AT FIRST DID NOT ANSWER BUT AS HE KEPT
+    ON CALLING TO HER SHE FINALLY ASKED HIM WHAT IT WAS THAT HE WANTED
+  sentences:
+  - LODGED IN THE BRANCHES OF A PINYON TREE I THINK IT IS BUT HE DOESN'T ANSWER ME
+  - HOW ASKED TAD
+  - THE SECOND WAS AS IF HER FLESH AND BONES HAD ALL BEEN FASHIONED OUT OF EMERALD
+    THE THIRD APPEARED AS SNOW BUT NEWLY FALLEN
+- source_sentence: THERE ARE NATURES TOO TO WHOSE SENSE OF JUSTICE THE PRICE EXACTED
+    LOOMS UP MONSTROUSLY ENORMOUS ODIOUS OPPRESSIVE WORRYING HUMILIATING EXTORTIONATE
+    INTOLERABLE THOSE ARE THE FANATICS
+  sentences:
+  - I SHALL LOCK UP ALL THE DOORS AND WINDOWS IN THE HOUSE AND THEN I SHALL GIVE YOU
+    MY LATCH KEY AND YOU CAN LET YOURSELF IN AND STAY THE NIGHT HERE THERE IS NO ONE
+    IN THE HOUSE
+  - HERE THE HOLY PRELATE OF FERNS MET HIM AND RELATED A VISION IN WHICH HE HAD BEEN
+    INSTRUCTED TO DEMAND THE ABOLITION OF THE IMPOST
+  - HE BEGAN TO WISH THAT HE HAD COMPROMISED IN SOME WAY OR OTHER THAT HE HAD SENT
+    THE MONEY PERHAPS HE COULD DO IT UP HERE
+datasets:
+- openslr/librispeech_asr
+pipeline_tag: sentence-similarity
+library_name: sentence-transformers
+metrics:
+- cosine_accuracy@1
+- cosine_accuracy@3
+- cosine_accuracy@5
+- cosine_accuracy@10
+- cosine_precision@1
+- cosine_precision@3
+- cosine_precision@5
+- cosine_precision@10
+- cosine_recall@1
+- cosine_recall@3
+- cosine_recall@5
+- cosine_recall@10
+- cosine_ndcg@10
+- cosine_mrr@10
+- cosine_map@100
+co2_eq_emissions:
+  emissions: 114.78151570511905
+  energy_consumed: 0.42889417052827883
+  source: codecarbon
+  training_type: fine-tuning
+  on_cloud: false
+  cpu_model: 13th Gen Intel(R) Core(TM) i7-13700K
+  ram_total_size: 31.777088165283203
+  hours_used: 2.094
+  hardware_used: 1 x NVIDIA GeForce RTX 3090
+model-index:
+- name: CLAP model trained on COCO Captions
+  results:
+  - task:
+      type: information-retrieval
+      name: Information Retrieval
+    dataset:
+      name: librispeech eval
+      type: librispeech-eval
+    metrics:
+    - type: cosine_accuracy@1
+      value: 0.108
+      name: Cosine Accuracy@1
+    - type: cosine_accuracy@3
+      value: 0.196
+      name: Cosine Accuracy@3
+    - type: cosine_accuracy@5
+      value: 0.272
+      name: Cosine Accuracy@5
+    - type: cosine_accuracy@10
+      value: 0.438
+      name: Cosine Accuracy@10
+    - type: cosine_precision@1
+      value: 0.108
+      name: Cosine Precision@1
+    - type: cosine_precision@3
+      value: 0.06533333333333333
+      name: Cosine Precision@3
+    - type: cosine_precision@5
+      value: 0.054400000000000004
+      name: Cosine Precision@5
+    - type: cosine_precision@10
+      value: 0.0438
+      name: Cosine Precision@10
+    - type: cosine_recall@1
+      value: 0.108
+      name: Cosine Recall@1
+    - type: cosine_recall@3
+      value: 0.196
+      name: Cosine Recall@3
+    - type: cosine_recall@5
+      value: 0.272
+      name: Cosine Recall@5
+    - type: cosine_recall@10
+      value: 0.438
+      name: Cosine Recall@10
+    - type: cosine_ndcg@10
+      value: 0.24322279069515917
+      name: Cosine Ndcg@10
+    - type: cosine_mrr@10
+      value: 0.18493690476190464
+      name: Cosine Mrr@10
+    - type: cosine_map@100
+      value: 0.20597911270433167
+      name: Cosine Map@100
+  - task:
+      type: information-retrieval
+      name: Information Retrieval
+    dataset:
+      name: librispeech test
+      type: librispeech-test
+    metrics:
+    - type: cosine_accuracy@1
+      value: 0.151
+      name: Cosine Accuracy@1
+    - type: cosine_accuracy@3
+      value: 0.288
+      name: Cosine Accuracy@3
+    - type: cosine_accuracy@5
+      value: 0.371
+      name: Cosine Accuracy@5
+    - type: cosine_accuracy@10
+      value: 0.518
+      name: Cosine Accuracy@10
+    - type: cosine_precision@1
+      value: 0.151
+      name: Cosine Precision@1
+    - type: cosine_precision@3
+      value: 0.096
+      name: Cosine Precision@3
+    - type: cosine_precision@5
+      value: 0.0742
+      name: Cosine Precision@5
+    - type: cosine_precision@10
+      value: 0.0518
+      name: Cosine Precision@10
+    - type: cosine_recall@1
+      value: 0.151
+      name: Cosine Recall@1
+    - type: cosine_recall@3
+      value: 0.288
+      name: Cosine Recall@3
+    - type: cosine_recall@5
+      value: 0.371
+      name: Cosine Recall@5
+    - type: cosine_recall@10
+      value: 0.518
+      name: Cosine Recall@10
+    - type: cosine_ndcg@10
+      value: 0.31319206378414244
+      name: Cosine Ndcg@10
+    - type: cosine_mrr@10
+      value: 0.25047857142857116
+      name: Cosine Mrr@10
+    - type: cosine_map@100
+      value: 0.2693786295421681
+      name: Cosine Map@100
+---
+# CLAP model trained on COCO Captions
+This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [laion/clap-htsat-fused](https://huggingface.co/laion/clap-htsat-fused) on the [librispeech_asr](https://huggingface.co/datasets/openslr/librispeech_asr) dataset. It maps sentences & paragraphs to a None-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
+## Model Details
+### Model Description
+- **Model Type:** Sentence Transformer
+- **Base model:** [laion/clap-htsat-fused](https://huggingface.co/laion/clap-htsat-fused) <!-- at revision 1d58d5192f5e4f16b57c574c7daf3d941404bd06 -->
+- **Maximum Sequence Length:** None tokens
+- **Output Dimensionality:** None dimensions
+- **Similarity Function:** Cosine Similarity
+- **Training Dataset:**
+    - [librispeech_asr](https://huggingface.co/datasets/openslr/librispeech_asr)
+- **Language:** en
+- **License:** apache-2.0
+### Model Sources
+- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
+- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
+- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
+### Full Model Architecture
+```
+SentenceTransformer(
+  (0): Transformer({'transformer_task': 'feature-extraction', 'modality_config': {'text': {'method': 'get_text_features', 'method_output_name': None}, 'audio': {'method': 'get_audio_features', 'method_output_name': None}}, 'module_output_name': 'sentence_embedding', 'architecture': 'ClapModel'})
+)
+```
+## Usage
+### Direct Usage (Sentence Transformers)
+First install the Sentence Transformers library:
+```bash
+pip install -U sentence-transformers
+```
+Then you can load this model and run inference.
+```python
+from sentence_transformers import SentenceTransformer
+# Download from the 🤗 Hub
+model = SentenceTransformer("tomaarsen/clap-htsat-fused-librispeech")
+# Run inference
+sentences = [
+    'THERE ARE NATURES TOO TO WHOSE SENSE OF JUSTICE THE PRICE EXACTED LOOMS UP MONSTROUSLY ENORMOUS ODIOUS OPPRESSIVE WORRYING HUMILIATING EXTORTIONATE INTOLERABLE THOSE ARE THE FANATICS',
+    'HE BEGAN TO WISH THAT HE HAD COMPROMISED IN SOME WAY OR OTHER THAT HE HAD SENT THE MONEY PERHAPS HE COULD DO IT UP HERE',
+    'HERE THE HOLY PRELATE OF FERNS MET HIM AND RELATED A VISION IN WHICH HE HAD BEEN INSTRUCTED TO DEMAND THE ABOLITION OF THE IMPOST',
+]
+embeddings = model.encode(sentences)
+print(embeddings.shape)
+# [3, 1024]
+# Get the similarity scores for the embeddings
+similarities = model.similarity(embeddings, embeddings)
+print(similarities)
+# tensor([[ 1.0000, -0.4742, -0.2719],
+#         [-0.4742,  1.0000,  0.8206],
+#         [-0.2719,  0.8206,  1.0000]])
+```
+<!--
+### Direct Usage (Transformers)
+<details><summary>Click to see the direct usage in Transformers</summary>
+</details>
+-->
+<!--
+### Downstream Usage (Sentence Transformers)
+You can finetune this model on your own dataset.
+<details><summary>Click to expand</summary>
+</details>
+-->
+<!--
+### Out-of-Scope Use
+*List how the model may foreseeably be misused and address what users ought not to do with the model.*
+-->
+## Evaluation
+### Metrics
+#### Information Retrieval
+* Datasets: `librispeech-eval` and `librispeech-test`
+* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
+| Metric              | librispeech-eval | librispeech-test |
+|:--------------------|:-----------------|:-----------------|
+| cosine_accuracy@1   | 0.108            | 0.151            |
+| cosine_accuracy@3   | 0.196            | 0.288            |
+| cosine_accuracy@5   | 0.272            | 0.371            |
+| cosine_accuracy@10  | 0.438            | 0.518            |
+| cosine_precision@1  | 0.108            | 0.151            |
+| cosine_precision@3  | 0.0653           | 0.096            |
+| cosine_precision@5  | 0.0544           | 0.0742           |
+| cosine_precision@10 | 0.0438           | 0.0518           |
+| cosine_recall@1     | 0.108            | 0.151            |
+| cosine_recall@3     | 0.196            | 0.288            |
+| cosine_recall@5     | 0.272            | 0.371            |
+| cosine_recall@10    | 0.438            | 0.518            |
+| **cosine_ndcg@10**  | **0.2432**       | **0.3132**       |
+| cosine_mrr@10       | 0.1849           | 0.2505           |
+| cosine_map@100      | 0.206            | 0.2694           |
+<!--
+## Bias, Risks and Limitations
+*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
+-->
+<!--
+### Recommendations
+*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
+-->
+## Training Details
+### Training Dataset
+#### librispeech_asr
+* Dataset: [librispeech_asr](https://huggingface.co/datasets/openslr/librispeech_asr) at [71cacbf](https://huggingface.co/datasets/openslr/librispeech_asr/tree/71cacbfb7e2354c4226d01e70d77d5fca3d04ba1)
+* Size: 132,553 training samples
+* Columns: <code>audio</code> and <code>text</code>
+* Approximate statistics based on the first 1000 samples:
+  |         | audio              | text                                                                                             |
+  |:--------|:-------------------|:-------------------------------------------------------------------------------------------------|
+  | type    | dict               | string                                                                                           |
+  | details | <ul><li></li></ul> | <ul><li>min: 20 characters</li><li>mean: 189.15 characters</li><li>max: 294 characters</li></ul> |
+* Samples:
+  | audio                                                                                                                                                                                                        | text                                                                                                                                                                                                                                |
+  |:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+  | <code>{'path': '374-180298-0000.flac', 'array': array([ 6.92203816e-04,  8.04404495e-04,  8.03834875e-04, ...,<br>       -3.02505396e-05, -6.59527450e-06,  1.11444592e-06]), 'sampling_rate': 48000}</code> | <code>CHAPTER SIXTEEN I MIGHT HAVE TOLD YOU OF THE BEGINNING OF THIS LIAISON IN A FEW LINES BUT I WANTED YOU TO SEE EVERY STEP BY WHICH WE CAME I TO AGREE TO WHATEVER MARGUERITE WISHED</code>                                     |
+  | <code>{'path': '374-180298-0001.flac', 'array': array([-9.33515839e-05, -1.25754057e-04, -1.44482241e-04, ...,<br>       -2.66165182e-04, -2.03228556e-04, -1.03404833e-04]), 'sampling_rate': 48000}</code> | <code>MARGUERITE TO BE UNABLE TO LIVE APART FROM ME IT WAS THE DAY AFTER THE EVENING WHEN SHE CAME TO SEE ME THAT I SENT HER MANON LESCAUT FROM THAT TIME SEEING THAT I COULD NOT CHANGE MY MISTRESS'S LIFE I CHANGED MY OWN</code> |
+  | <code>{'path': '374-180298-0002.flac', 'array': array([-2.47883319e-04, -2.91854434e-04, -2.82971043e-04, ...,<br>       -1.43931946e-04, -1.17829914e-04, -6.32331648e-05]), 'sampling_rate': 48000}</code> | <code>I WISHED ABOVE ALL NOT TO LEAVE MYSELF TIME TO THINK OVER THE POSITION I HAD ACCEPTED FOR IN SPITE OF MYSELF IT WAS A GREAT DISTRESS TO ME THUS MY LIFE GENERALLY SO CALM</code>                                              |
+* Loss: [<code>MultipleNegativesSymmetricRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativessymmetricrankingloss) with these parameters:
+  ```json
+  {
+      "scale": 20.0,
+      "similarity_fct": "cos_sim",
+      "gather_across_devices": false
+  }
+  ```
+### Evaluation Dataset
+#### librispeech_asr
+* Dataset: [librispeech_asr](https://huggingface.co/datasets/openslr/librispeech_asr) at [71cacbf](https://huggingface.co/datasets/openslr/librispeech_asr/tree/71cacbfb7e2354c4226d01e70d77d5fca3d04ba1)
+* Size: 1,000 evaluation samples
+* Columns: <code>audio</code> and <code>text</code>
+* Approximate statistics based on the first 1000 samples:
+  |         | audio              | text                                                                                            |
+  |:--------|:-------------------|:------------------------------------------------------------------------------------------------|
+  | type    | dict               | string                                                                                          |
+  | details | <ul><li></li></ul> | <ul><li>min: 8 characters</li><li>mean: 104.62 characters</li><li>max: 516 characters</li></ul> |
+* Samples:
+  | audio                                                                                                                                                                                 | text                                                                                                                                         |
+  |:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------|
+  | <code>{'path': '2277-149896-0000.flac', 'array': array([ 0.00179741,  0.00170625,  0.00120927, ..., -0.00144462,<br>       -0.00102732, -0.00048062]), 'sampling_rate': 48000}</code> | <code>HE WAS IN A FEVERED STATE OF MIND OWING TO THE BLIGHT HIS WIFE'S ACTION THREATENED TO CAST UPON HIS ENTIRE FUTURE</code>               |
+  | <code>{'path': '2277-149896-0001.flac', 'array': array([ 0.00111104,  0.00081758,  0.00021103, ..., -0.00138193,<br>       -0.0009173 , -0.00041702]), 'sampling_rate': 48000}</code> | <code>HE WOULD HAVE TO PAY HER THE MONEY WHICH SHE WOULD NOW REGULARLY DEMAND OR THERE WOULD BE TROUBLE IT DID NOT MATTER WHAT HE DID</code> |
+  | <code>{'path': '2277-149896-0002.flac', 'array': array([0.00080266, 0.00088462, 0.00083408, ..., 0.00105488, 0.00083673,<br>       0.00043296]), 'sampling_rate': 48000}</code>       | <code>HURSTWOOD WALKED THE FLOOR MENTALLY ARRANGING THE CHIEF POINTS OF HIS SITUATION</code>                                                 |
+* Loss: [<code>MultipleNegativesSymmetricRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativessymmetricrankingloss) with these parameters:
+  ```json
+  {
+      "scale": 20.0,
+      "similarity_fct": "cos_sim",
+      "gather_across_devices": false
+  }
+  ```
+### Training Hyperparameters
+#### Non-Default Hyperparameters
+- `eval_strategy`: steps
+- `per_device_train_batch_size`: 16
+- `per_device_eval_batch_size`: 16
+- `learning_rate`: 2e-05
+- `num_train_epochs`: 1
+- `warmup_ratio`: 0.1
+- `bf16`: True
+- `batch_sampler`: no_duplicates
+#### All Hyperparameters
+<details><summary>Click to expand</summary>
+- `overwrite_output_dir`: False
+- `do_predict`: False
+- `eval_strategy`: steps
+- `prediction_loss_only`: True
+- `per_device_train_batch_size`: 16
+- `per_device_eval_batch_size`: 16
+- `gradient_accumulation_steps`: 1
+- `eval_accumulation_steps`: None
+- `torch_empty_cache_steps`: None
+- `learning_rate`: 2e-05
+- `weight_decay`: 0.0
+- `adam_beta1`: 0.9
+- `adam_beta2`: 0.999
+- `adam_epsilon`: 1e-08
+- `max_grad_norm`: 1.0
+- `num_train_epochs`: 1
+- `max_steps`: -1
+- `lr_scheduler_type`: linear
+- `lr_scheduler_kwargs`: {}
+- `warmup_ratio`: 0.1
+- `warmup_steps`: 0
+- `log_level`: passive
+- `log_level_replica`: warning
+- `log_on_each_node`: True
+- `logging_nan_inf_filter`: True
+- `save_safetensors`: True
+- `save_on_each_node`: False
+- `save_only_model`: False
+- `restore_callback_states_from_checkpoint`: False
+- `use_cpu`: False
+- `seed`: 42
+- `data_seed`: None
+- `jit_mode_eval`: False
+- `bf16`: True
+- `fp16`: False
+- `half_precision_backend`: None
+- `bf16_full_eval`: False
+- `fp16_full_eval`: False
+- `tf32`: None
+- `local_rank`: 0
+- `ddp_backend`: None
+- `tpu_num_cores`: None
+- `debug`: []
+- `dataloader_drop_last`: False
+- `dataloader_num_workers`: 0
+- `dataloader_prefetch_factor`: None
+- `past_index`: -1
+- `disable_tqdm`: False
+- `remove_unused_columns`: True
+- `label_names`: None
+- `load_best_model_at_end`: False
+- `ignore_data_skip`: False
+- `fsdp`: []
+- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
+- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
+- `parallelism_config`: None
+- `deepspeed`: None
+- `label_smoothing_factor`: 0.0
+- `optim`: adamw_torch_fused
+- `optim_args`: None
+- `group_by_length`: False
+- `length_column_name`: length
+- `ddp_find_unused_parameters`: None
+- `ddp_bucket_cap_mb`: None
+- `ddp_broadcast_buffers`: False
+- `dataloader_pin_memory`: True
+- `dataloader_persistent_workers`: False
+- `skip_memory_metrics`: True
+- `use_legacy_prediction_loop`: False
+- `push_to_hub`: False
+- `resume_from_checkpoint`: None
+- `hub_model_id`: None
+- `hub_strategy`: every_save
+- `hub_private_repo`: None
+- `hub_always_push`: False
+- `hub_revision`: None
+- `gradient_checkpointing`: False
+- `gradient_checkpointing_kwargs`: None
+- `include_for_metrics`: []
+- `eval_do_concat_batches`: True
+- `mp_parameters`:
+- `auto_find_batch_size`: False
+- `full_determinism`: False
+- `ray_scope`: last
+- `ddp_timeout`: 1800
+- `torch_compile`: False
+- `torch_compile_backend`: None
+- `torch_compile_mode`: None
+- `include_tokens_per_second`: False
+- `include_num_input_tokens_seen`: no
+- `neftune_noise_alpha`: None
+- `optim_target_modules`: None
+- `batch_eval_metrics`: False
+- `eval_on_start`: False
+- `use_liger_kernel`: False
+- `liger_kernel_config`: None
+- `eval_use_gather_object`: False
+- `average_tokens_across_devices`: True
+- `prompts`: None
+- `batch_sampler`: no_duplicates
+- `multi_dataset_batch_sampler`: proportional
+- `router_mapping`: {}
+- `learning_rate_mapping`: {}
+</details>
+### Training Logs
+<details><summary>Click to expand</summary>
+| Epoch  | Step | Training Loss | Validation Loss | librispeech-eval_cosine_ndcg@10 | librispeech-test_cosine_ndcg@10 |
+|:------:|:----:|:-------------:|:---------------:|:-------------------------------:|:-------------------------------:|
+| -1     | -1   | -             | -               | 0.0114                          | -                               |
+| 0.0100 | 83   | 3.5908        | -               | -                               | -                               |
+| 0.0200 | 166  | 2.5371        | -               | -                               | -                               |
+| 0.0301 | 249  | 2.1799        | -               | -                               | -                               |
+| 0.0401 | 332  | 2.0415        | -               | -                               | -                               |
+| 0.0501 | 415  | 1.9394        | -               | -                               | -                               |
+| 0.0601 | 498  | 1.8167        | -               | -                               | -                               |
+| 0.0701 | 581  | 1.7589        | -               | -                               | -                               |
+| 0.0801 | 664  | 1.7262        | -               | -                               | -                               |
+| 0.0902 | 747  | 1.7585        | -               | -                               | -                               |
+| 0.1001 | 829  | -             | 1.5991          | 0.0335                          | -                               |
+| 0.1002 | 830  | 1.7521        | -               | -                               | -                               |
+| 0.1102 | 913  | 1.6822        | -               | -                               | -                               |
+| 0.1202 | 996  | 1.6176        | -               | -                               | -                               |
+| 0.1302 | 1079 | 1.6391        | -               | -                               | -                               |
+| 0.1403 | 1162 | 1.6931        | -               | -                               | -                               |
+| 0.1503 | 1245 | 1.4626        | -               | -                               | -                               |
+| 0.1603 | 1328 | 1.4305        | -               | -                               | -                               |
+| 0.1703 | 1411 | 1.4998        | -               | -                               | -                               |
+| 0.1803 | 1494 | 1.4073        | -               | -                               | -                               |
+| 0.1903 | 1577 | 1.3843        | -               | -                               | -                               |
+| 0.2001 | 1658 | -             | 1.2227          | 0.0925                          | -                               |
+| 0.2004 | 1660 | 1.3371        | -               | -                               | -                               |
+| 0.2104 | 1743 | 1.3908        | -               | -                               | -                               |
+| 0.2204 | 1826 | 1.2835        | -               | -                               | -                               |
+| 0.2304 | 1909 | 1.3203        | -               | -                               | -                               |
+| 0.2404 | 1992 | 1.2549        | -               | -                               | -                               |
+| 0.2505 | 2075 | 1.2384        | -               | -                               | -                               |
+| 0.2605 | 2158 | 1.2189        | -               | -                               | -                               |
+| 0.2705 | 2241 | 1.1658        | -               | -                               | -                               |
+| 0.2805 | 2324 | 1.1771        | -               | -                               | -                               |
+| 0.2905 | 2407 | 1.2068        | -               | -                               | -                               |
+| 0.3002 | 2487 | -             | 1.0471          | 0.1318                          | -                               |
+| 0.3005 | 2490 | 1.1708        | -               | -                               | -                               |
+| 0.3106 | 2573 | 1.1389        | -               | -                               | -                               |
+| 0.3206 | 2656 | 1.0786        | -               | -                               | -                               |
+| 0.3306 | 2739 | 1.0792        | -               | -                               | -                               |
+| 0.3406 | 2822 | 1.0562        | -               | -                               | -                               |
+| 0.3506 | 2905 | 0.98          | -               | -                               | -                               |
+| 0.3607 | 2988 | 1.1153        | -               | -                               | -                               |
+| 0.3707 | 3071 | 0.9987        | -               | -                               | -                               |
+| 0.3807 | 3154 | 1.0002        | -               | -                               | -                               |
+| 0.3907 | 3237 | 1.0017        | -               | -                               | -                               |
+| 0.4002 | 3316 | -             | 0.8901          | 0.1589                          | -                               |
+| 0.4007 | 3320 | 0.9364        | -               | -                               | -                               |
+| 0.4107 | 3403 | 0.9394        | -               | -                               | -                               |
+| 0.4208 | 3486 | 0.9459        | -               | -                               | -                               |
+| 0.4308 | 3569 | 0.9604        | -               | -                               | -                               |
+| 0.4408 | 3652 | 0.9491        | -               | -                               | -                               |
+| 0.4508 | 3735 | 0.9295        | -               | -                               | -                               |
+| 0.4608 | 3818 | 0.9508        | -               | -                               | -                               |
+| 0.4709 | 3901 | 0.9122        | -               | -                               | -                               |
+| 0.4809 | 3984 | 0.8483        | -               | -                               | -                               |
+| 0.4909 | 4067 | 0.8443        | -               | -                               | -                               |
+| 0.5003 | 4145 | -             | 0.7955          | 0.1908                          | -                               |
+| 0.5009 | 4150 | 0.8838        | -               | -                               | -                               |
+| 0.5109 | 4233 | 0.8367        | -               | -                               | -                               |
+| 0.5209 | 4316 | 0.8516        | -               | -                               | -                               |
+| 0.5310 | 4399 | 0.8112        | -               | -                               | -                               |
+| 0.5410 | 4482 | 0.8368        | -               | -                               | -                               |
+| 0.5510 | 4565 | 0.873         | -               | -                               | -                               |
+| 0.5610 | 4648 | 0.8156        | -               | -                               | -                               |
+| 0.5710 | 4731 | 0.8864        | -               | -                               | -                               |
+| 0.5811 | 4814 | 0.8278        | -               | -                               | -                               |
+| 0.5911 | 4897 | 0.8006        | -               | -                               | -                               |
+| 0.6004 | 4974 | -             | 0.7649          | 0.1874                          | -                               |
+| 0.6011 | 4980 | 0.8199        | -               | -                               | -                               |
+| 0.6111 | 5063 | 0.7475        | -               | -                               | -                               |
+| 0.6211 | 5146 | 0.7345        | -               | -                               | -                               |
+| 0.6311 | 5229 | 0.7301        | -               | -                               | -                               |
+| 0.6412 | 5312 | 0.774         | -               | -                               | -                               |
+| 0.6512 | 5395 | 0.7391        | -               | -                               | -                               |
+| 0.6612 | 5478 | 0.6929        | -               | -                               | -                               |
+| 0.6712 | 5561 | 0.7218        | -               | -                               | -                               |
+| 0.6812 | 5644 | 0.7071        | -               | -                               | -                               |
+| 0.6912 | 5727 | 0.7024        | -               | -                               | -                               |
+| 0.7004 | 5803 | -             | 0.6712          | 0.2419                          | -                               |
+| 0.7013 | 5810 | 0.6428        | -               | -                               | -                               |
+| 0.7113 | 5893 | 0.6719        | -               | -                               | -                               |
+| 0.7213 | 5976 | 0.6972        | -               | -                               | -                               |
+| 0.7313 | 6059 | 0.7043        | -               | -                               | -                               |
+| 0.7413 | 6142 | 0.663         | -               | -                               | -                               |
+| 0.7514 | 6225 | 0.6963        | -               | -                               | -                               |
+| 0.7614 | 6308 | 0.6591        | -               | -                               | -                               |
+| 0.7714 | 6391 | 0.6736        | -               | -                               | -                               |
+| 0.7814 | 6474 | 0.7033        | -               | -                               | -                               |
+| 0.7914 | 6557 | 0.6314        | -               | -                               | -                               |
+| 0.8005 | 6632 | -             | 0.6806          | 0.2319                          | -                               |
+| 0.8014 | 6640 | 0.6508        | -               | -                               | -                               |
+| 0.8115 | 6723 | 0.6532        | -               | -                               | -                               |
+| 0.8215 | 6806 | 0.6788        | -               | -                               | -                               |
+| 0.8315 | 6889 | 0.6038        | -               | -                               | -                               |
+| 0.8415 | 6972 | 0.658         | -               | -                               | -                               |
+| 0.8515 | 7055 | 0.656         | -               | -                               | -                               |
+| 0.8616 | 7138 | 0.6533        | -               | -                               | -                               |
+| 0.8716 | 7221 | 0.601         | -               | -                               | -                               |
+| 0.8816 | 7304 | 0.6243        | -               | -                               | -                               |
+| 0.8916 | 7387 | 0.6315        | -               | -                               | -                               |
+| 0.9005 | 7461 | -             | 0.6526          | 0.2432                          | -                               |
+| 0.9016 | 7470 | 0.5707        | -               | -                               | -                               |
+| 0.9116 | 7553 | 0.5778        | -               | -                               | -                               |
+| 0.9217 | 7636 | 0.5736        | -               | -                               | -                               |
+| 0.9317 | 7719 | 0.615         | -               | -                               | -                               |
+| 0.9417 | 7802 | 0.5756        | -               | -                               | -                               |
+| 0.9517 | 7885 | 0.5724        | -               | -                               | -                               |
+| 0.9617 | 7968 | 0.5678        | -               | -                               | -                               |
+| 0.9718 | 8051 | 0.5661        | -               | -                               | -                               |
+| 0.9818 | 8134 | 0.6162        | -               | -                               | -                               |
+| 0.9918 | 8217 | 0.5766        | -               | -                               | -                               |
+| -1     | -1   | -             | -               | -                               | 0.3132                          |
+</details>
+### Environmental Impact
+Carbon emissions were measured using [CodeCarbon](https://github.com/mlco2/codecarbon).
+- **Energy Consumed**: 0.429 kWh
+- **Carbon Emitted**: 0.115 kg of CO2
+- **Hours Used**: 2.094 hours
+### Training Hardware
+- **On Cloud**: No
+- **GPU Model**: 1 x NVIDIA GeForce RTX 3090
+- **CPU Model**: 13th Gen Intel(R) Core(TM) i7-13700K
+- **RAM Size**: 31.78 GB
+### Framework Versions
+- Python: 3.11.6
+- Sentence Transformers: 5.2.0.dev0
+- Transformers: 4.57.0.dev0
+- PyTorch: 2.8.0+cu128
+- Accelerate: 1.6.0
+- Datasets: 3.6.0
+- Tokenizers: 0.22.1
+## Citation
+### BibTeX
+#### Sentence Transformers
+```bibtex
+@inproceedings{reimers-2019-sentence-bert,
+    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
+    author = "Reimers, Nils and Gurevych, Iryna",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
+    month = "11",
+    year = "2019",
+    publisher = "Association for Computational Linguistics",
+    url = "https://arxiv.org/abs/1908.10084",
+}
+```
+<!--
+## Glossary
+*Clearly define terms in order to be accessible across audiences.*
+-->
+<!--
+## Model Card Authors
+*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
+-->
+<!--
+## Model Card Contact
+*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
+-->

config.json ADDED Viewed

	@@ -0,0 +1,90 @@

+{
+  "architectures": [
+    "ClapModel"
+  ],
+  "audio_config": {
+    "aff_block_r": 4,
+    "attention_probs_dropout_prob": 0.0,
+    "depths": [
+      2,
+      2,
+      6,
+      2
+    ],
+    "drop_path_rate": 0.0,
+    "dtype": "float32",
+    "enable_fusion": true,
+    "enable_patch_fusion": true,
+    "enable_patch_layer_norm": true,
+    "flatten_patch_embeds": true,
+    "fusion_num_hidden_layers": 2,
+    "fusion_type": null,
+    "hidden_act": "gelu",
+    "hidden_dropout_prob": 0.1,
+    "hidden_size": 768,
+    "initializer_factor": 1.0,
+    "layer_norm_eps": 1e-05,
+    "mlp_ratio": 4.0,
+    "model_type": "clap_audio_model",
+    "num_attention_heads": [
+      4,
+      8,
+      16,
+      32
+    ],
+    "num_classes": 527,
+    "num_hidden_layers": 4,
+    "num_mel_bins": 64,
+    "patch_embed_input_channels": 1,
+    "patch_embeds_hidden_size": 96,
+    "patch_size": 4,
+    "patch_stride": [
+      4,
+      4
+    ],
+    "projection_dim": 512,
+    "projection_hidden_act": "relu",
+    "projection_hidden_size": 768,
+    "qkv_bias": true,
+    "spec_size": 256,
+    "tf_legacy_loss": false,
+    "use_bfloat16": false,
+    "window_size": 8
+  },
+  "dtype": "float32",
+  "hidden_size": 768,
+  "initializer_factor": 1.0,
+  "logit_scale_init_value": 14.285714285714285,
+  "model_type": "clap",
+  "num_hidden_layers": 16,
+  "projection_dim": 512,
+  "projection_hidden_act": "relu",
+  "text_config": {
+    "attention_probs_dropout_prob": 0.1,
+    "classifier_dropout": null,
+    "dtype": "float32",
+    "fusion_hidden_size": 768,
+    "fusion_num_hidden_layers": 2,
+    "hidden_act": "gelu",
+    "hidden_dropout_prob": 0.1,
+    "hidden_size": 768,
+    "initializer_factor": 1.0,
+    "initializer_range": 0.02,
+    "intermediate_size": 3072,
+    "layer_norm_eps": 1e-12,
+    "max_position_embeddings": 514,
+    "model_type": "clap_text_model",
+    "num_attention_heads": 12,
+    "num_hidden_layers": 12,
+    "position_embedding_type": "absolute",
+    "projection_dim": 512,
+    "projection_hidden_act": "relu",
+    "projection_hidden_size": 768,
+    "tf_legacy_loss": false,
+    "type_vocab_size": 1,
+    "use_bfloat16": false,
+    "use_cache": true,
+    "vocab_size": 50265
+  },
+  "transformers_version": "4.57.0.dev0"
+}

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "model_type": "SentenceTransformer",
+  "__version__": {
+    "sentence_transformers": "5.2.0.dev0",
+    "transformers": "4.57.0.dev0",
+    "pytorch": "2.8.0+cu128"
+  },
+  "prompts": {
+    "query": "",
+    "document": ""
+  },
+  "default_prompt_name": null,
+  "similarity_fn_name": "cosine"
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:27bd71bf31393a9b92f9789f03a41767ec315a8bb0f5769fb6dc4826f5f84081
+size 614496152

modules.json ADDED Viewed

	@@ -0,0 +1,8 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  }
+]

sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+    "transformer_task": "feature-extraction",
+    "modality_config": {
+        "text": {
+            "method": "get_text_features",
+            "method_output_name": null
+        },
+        "audio": {
+            "method": "get_audio_features",
+            "method_output_name": null
+        }
+    },
+    "module_output_name": "sentence_embedding"
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50264": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "max_length": null,
+  "model_max_length": 512,
+  "pad_to_multiple_of": null,
+  "pad_token": "<pad>",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "processor_class": "ClapProcessor",
+  "sep_token": "</s>",
+  "tokenizer_class": "RobertaTokenizer",
+  "trim_offsets": true,
+  "unk_token": "<unk>"
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff