|
|
--- |
|
|
tags: |
|
|
- sentence-transformers |
|
|
- sentence-similarity |
|
|
- feature-extraction |
|
|
- generated_from_trainer |
|
|
- dataset_size:86648 |
|
|
- loss:MSELoss |
|
|
widget: |
|
|
- source_sentence: Familienberaterin |
|
|
sentences: |
|
|
- electric power station operator |
|
|
- venue booker & promoter |
|
|
- betrieblicher Aus- und Weiterbildner/betriebliche Aus- und Weiterbildnerin |
|
|
- source_sentence: high school RS teacher |
|
|
sentences: |
|
|
- infantryman |
|
|
- Schnellbedienungsrestaurantteamleiter |
|
|
- drill setup operator |
|
|
- source_sentence: lighting designer |
|
|
sentences: |
|
|
- software support manager |
|
|
- 直升机维护协调员 |
|
|
- bus maintenance supervisor |
|
|
- source_sentence: 机场消防员 |
|
|
sentences: |
|
|
- Flake操作员 |
|
|
- técnico en gestión de residuos peligrosos/técnica en gestión de residuos peligrosos |
|
|
- 专门学校老师 |
|
|
- source_sentence: Entwicklerin für mobile Anwendungen |
|
|
sentences: |
|
|
- fashion design expert |
|
|
- Mergers-and-Acquisitions-Analyst/Mergers-and-Acquisitions-Analystin |
|
|
- commercial bid manager |
|
|
pipeline_tag: sentence-similarity |
|
|
library_name: sentence-transformers |
|
|
metrics: |
|
|
- cosine_accuracy@1 |
|
|
- cosine_accuracy@20 |
|
|
- cosine_accuracy@50 |
|
|
- cosine_accuracy@100 |
|
|
- cosine_accuracy@150 |
|
|
- cosine_accuracy@200 |
|
|
- cosine_precision@1 |
|
|
- cosine_precision@20 |
|
|
- cosine_precision@50 |
|
|
- cosine_precision@100 |
|
|
- cosine_precision@150 |
|
|
- cosine_precision@200 |
|
|
- cosine_recall@1 |
|
|
- cosine_recall@20 |
|
|
- cosine_recall@50 |
|
|
- cosine_recall@100 |
|
|
- cosine_recall@150 |
|
|
- cosine_recall@200 |
|
|
- cosine_ndcg@1 |
|
|
- cosine_ndcg@20 |
|
|
- cosine_ndcg@50 |
|
|
- cosine_ndcg@100 |
|
|
- cosine_ndcg@150 |
|
|
- cosine_ndcg@200 |
|
|
- cosine_mrr@1 |
|
|
- cosine_mrr@20 |
|
|
- cosine_mrr@50 |
|
|
- cosine_mrr@100 |
|
|
- cosine_mrr@150 |
|
|
- cosine_mrr@200 |
|
|
- cosine_map@1 |
|
|
- cosine_map@20 |
|
|
- cosine_map@50 |
|
|
- cosine_map@100 |
|
|
- cosine_map@150 |
|
|
- cosine_map@200 |
|
|
- cosine_map@500 |
|
|
model-index: |
|
|
- name: SentenceTransformer |
|
|
results: |
|
|
- task: |
|
|
type: information-retrieval |
|
|
name: Information Retrieval |
|
|
dataset: |
|
|
name: full en |
|
|
type: full_en |
|
|
metrics: |
|
|
- type: cosine_accuracy@1 |
|
|
value: 0.6476190476190476 |
|
|
name: Cosine Accuracy@1 |
|
|
- type: cosine_accuracy@20 |
|
|
value: 0.9714285714285714 |
|
|
name: Cosine Accuracy@20 |
|
|
- type: cosine_accuracy@50 |
|
|
value: 0.9904761904761905 |
|
|
name: Cosine Accuracy@50 |
|
|
- type: cosine_accuracy@100 |
|
|
value: 0.9904761904761905 |
|
|
name: Cosine Accuracy@100 |
|
|
- type: cosine_accuracy@150 |
|
|
value: 0.9904761904761905 |
|
|
name: Cosine Accuracy@150 |
|
|
- type: cosine_accuracy@200 |
|
|
value: 0.9904761904761905 |
|
|
name: Cosine Accuracy@200 |
|
|
- type: cosine_precision@1 |
|
|
value: 0.6476190476190476 |
|
|
name: Cosine Precision@1 |
|
|
- type: cosine_precision@20 |
|
|
value: 0.47952380952380946 |
|
|
name: Cosine Precision@20 |
|
|
- type: cosine_precision@50 |
|
|
value: 0.28838095238095235 |
|
|
name: Cosine Precision@50 |
|
|
- type: cosine_precision@100 |
|
|
value: 0.17304761904761906 |
|
|
name: Cosine Precision@100 |
|
|
- type: cosine_precision@150 |
|
|
value: 0.12444444444444444 |
|
|
name: Cosine Precision@150 |
|
|
- type: cosine_precision@200 |
|
|
value: 0.09857142857142859 |
|
|
name: Cosine Precision@200 |
|
|
- type: cosine_recall@1 |
|
|
value: 0.06609801577496094 |
|
|
name: Cosine Recall@1 |
|
|
- type: cosine_recall@20 |
|
|
value: 0.5122224752770898 |
|
|
name: Cosine Recall@20 |
|
|
- type: cosine_recall@50 |
|
|
value: 0.6835205863376973 |
|
|
name: Cosine Recall@50 |
|
|
- type: cosine_recall@100 |
|
|
value: 0.7899550177449521 |
|
|
name: Cosine Recall@100 |
|
|
- type: cosine_recall@150 |
|
|
value: 0.8399901051245952 |
|
|
name: Cosine Recall@150 |
|
|
- type: cosine_recall@200 |
|
|
value: 0.875868212220809 |
|
|
name: Cosine Recall@200 |
|
|
- type: cosine_ndcg@1 |
|
|
value: 0.6476190476190476 |
|
|
name: Cosine Ndcg@1 |
|
|
- type: cosine_ndcg@20 |
|
|
value: 0.6467537144833913 |
|
|
name: Cosine Ndcg@20 |
|
|
- type: cosine_ndcg@50 |
|
|
value: 0.6579566361404572 |
|
|
name: Cosine Ndcg@50 |
|
|
- type: cosine_ndcg@100 |
|
|
value: 0.7095129047395976 |
|
|
name: Cosine Ndcg@100 |
|
|
- type: cosine_ndcg@150 |
|
|
value: 0.7310060454392588 |
|
|
name: Cosine Ndcg@150 |
|
|
- type: cosine_ndcg@200 |
|
|
value: 0.746053293561821 |
|
|
name: Cosine Ndcg@200 |
|
|
- type: cosine_mrr@1 |
|
|
value: 0.6476190476190476 |
|
|
name: Cosine Mrr@1 |
|
|
- type: cosine_mrr@20 |
|
|
value: 0.7901817137111254 |
|
|
name: Cosine Mrr@20 |
|
|
- type: cosine_mrr@50 |
|
|
value: 0.7909547501984476 |
|
|
name: Cosine Mrr@50 |
|
|
- type: cosine_mrr@100 |
|
|
value: 0.7909547501984476 |
|
|
name: Cosine Mrr@100 |
|
|
- type: cosine_mrr@150 |
|
|
value: 0.7909547501984476 |
|
|
name: Cosine Mrr@150 |
|
|
- type: cosine_mrr@200 |
|
|
value: 0.7909547501984476 |
|
|
name: Cosine Mrr@200 |
|
|
- type: cosine_map@1 |
|
|
value: 0.6476190476190476 |
|
|
name: Cosine Map@1 |
|
|
- type: cosine_map@20 |
|
|
value: 0.5025649155749793 |
|
|
name: Cosine Map@20 |
|
|
- type: cosine_map@50 |
|
|
value: 0.48398477448194993 |
|
|
name: Cosine Map@50 |
|
|
- type: cosine_map@100 |
|
|
value: 0.5117703759309522 |
|
|
name: Cosine Map@100 |
|
|
- type: cosine_map@150 |
|
|
value: 0.520199435224254 |
|
|
name: Cosine Map@150 |
|
|
- type: cosine_map@200 |
|
|
value: 0.5249113393002316 |
|
|
name: Cosine Map@200 |
|
|
- type: cosine_map@500 |
|
|
value: 0.5304170344184883 |
|
|
name: Cosine Map@500 |
|
|
- task: |
|
|
type: information-retrieval |
|
|
name: Information Retrieval |
|
|
dataset: |
|
|
name: full es |
|
|
type: full_es |
|
|
metrics: |
|
|
- type: cosine_accuracy@1 |
|
|
value: 0.11891891891891893 |
|
|
name: Cosine Accuracy@1 |
|
|
- type: cosine_accuracy@20 |
|
|
value: 1.0 |
|
|
name: Cosine Accuracy@20 |
|
|
- type: cosine_accuracy@50 |
|
|
value: 1.0 |
|
|
name: Cosine Accuracy@50 |
|
|
- type: cosine_accuracy@100 |
|
|
value: 1.0 |
|
|
name: Cosine Accuracy@100 |
|
|
- type: cosine_accuracy@150 |
|
|
value: 1.0 |
|
|
name: Cosine Accuracy@150 |
|
|
- type: cosine_accuracy@200 |
|
|
value: 1.0 |
|
|
name: Cosine Accuracy@200 |
|
|
- type: cosine_precision@1 |
|
|
value: 0.11891891891891893 |
|
|
name: Cosine Precision@1 |
|
|
- type: cosine_precision@20 |
|
|
value: 0.5267567567567567 |
|
|
name: Cosine Precision@20 |
|
|
- type: cosine_precision@50 |
|
|
value: 0.3437837837837838 |
|
|
name: Cosine Precision@50 |
|
|
- type: cosine_precision@100 |
|
|
value: 0.21897297297297297 |
|
|
name: Cosine Precision@100 |
|
|
- type: cosine_precision@150 |
|
|
value: 0.1658018018018018 |
|
|
name: Cosine Precision@150 |
|
|
- type: cosine_precision@200 |
|
|
value: 0.1332972972972973 |
|
|
name: Cosine Precision@200 |
|
|
- type: cosine_recall@1 |
|
|
value: 0.0035840147528632613 |
|
|
name: Cosine Recall@1 |
|
|
- type: cosine_recall@20 |
|
|
value: 0.35407760203362965 |
|
|
name: Cosine Recall@20 |
|
|
- type: cosine_recall@50 |
|
|
value: 0.5097999383006715 |
|
|
name: Cosine Recall@50 |
|
|
- type: cosine_recall@100 |
|
|
value: 0.6076073817878247 |
|
|
name: Cosine Recall@100 |
|
|
- type: cosine_recall@150 |
|
|
value: 0.6705429838138021 |
|
|
name: Cosine Recall@150 |
|
|
- type: cosine_recall@200 |
|
|
value: 0.7125464731776301 |
|
|
name: Cosine Recall@200 |
|
|
- type: cosine_ndcg@1 |
|
|
value: 0.11891891891891893 |
|
|
name: Cosine Ndcg@1 |
|
|
- type: cosine_ndcg@20 |
|
|
value: 0.5708144272431339 |
|
|
name: Cosine Ndcg@20 |
|
|
- type: cosine_ndcg@50 |
|
|
value: 0.535516963498245 |
|
|
name: Cosine Ndcg@50 |
|
|
- type: cosine_ndcg@100 |
|
|
value: 0.558980163264909 |
|
|
name: Cosine Ndcg@100 |
|
|
- type: cosine_ndcg@150 |
|
|
value: 0.5900024611410689 |
|
|
name: Cosine Ndcg@150 |
|
|
- type: cosine_ndcg@200 |
|
|
value: 0.609478782549869 |
|
|
name: Cosine Ndcg@200 |
|
|
- type: cosine_mrr@1 |
|
|
value: 0.11891891891891893 |
|
|
name: Cosine Mrr@1 |
|
|
- type: cosine_mrr@20 |
|
|
value: 0.5531531531531532 |
|
|
name: Cosine Mrr@20 |
|
|
- type: cosine_mrr@50 |
|
|
value: 0.5531531531531532 |
|
|
name: Cosine Mrr@50 |
|
|
- type: cosine_mrr@100 |
|
|
value: 0.5531531531531532 |
|
|
name: Cosine Mrr@100 |
|
|
- type: cosine_mrr@150 |
|
|
value: 0.5531531531531532 |
|
|
name: Cosine Mrr@150 |
|
|
- type: cosine_mrr@200 |
|
|
value: 0.5531531531531532 |
|
|
name: Cosine Mrr@200 |
|
|
- type: cosine_map@1 |
|
|
value: 0.11891891891891893 |
|
|
name: Cosine Map@1 |
|
|
- type: cosine_map@20 |
|
|
value: 0.4379349002801489 |
|
|
name: Cosine Map@20 |
|
|
- type: cosine_map@50 |
|
|
value: 0.3739269627118989 |
|
|
name: Cosine Map@50 |
|
|
- type: cosine_map@100 |
|
|
value: 0.37629843599877466 |
|
|
name: Cosine Map@100 |
|
|
- type: cosine_map@150 |
|
|
value: 0.3891828650842837 |
|
|
name: Cosine Map@150 |
|
|
- type: cosine_map@200 |
|
|
value: 0.39584338663408436 |
|
|
name: Cosine Map@200 |
|
|
- type: cosine_map@500 |
|
|
value: 0.4062909401616274 |
|
|
name: Cosine Map@500 |
|
|
- task: |
|
|
type: information-retrieval |
|
|
name: Information Retrieval |
|
|
dataset: |
|
|
name: full de |
|
|
type: full_de |
|
|
metrics: |
|
|
- type: cosine_accuracy@1 |
|
|
value: 0.2955665024630542 |
|
|
name: Cosine Accuracy@1 |
|
|
- type: cosine_accuracy@20 |
|
|
value: 0.9704433497536946 |
|
|
name: Cosine Accuracy@20 |
|
|
- type: cosine_accuracy@50 |
|
|
value: 0.9753694581280788 |
|
|
name: Cosine Accuracy@50 |
|
|
- type: cosine_accuracy@100 |
|
|
value: 0.9901477832512315 |
|
|
name: Cosine Accuracy@100 |
|
|
- type: cosine_accuracy@150 |
|
|
value: 0.9901477832512315 |
|
|
name: Cosine Accuracy@150 |
|
|
- type: cosine_accuracy@200 |
|
|
value: 0.9901477832512315 |
|
|
name: Cosine Accuracy@200 |
|
|
- type: cosine_precision@1 |
|
|
value: 0.2955665024630542 |
|
|
name: Cosine Precision@1 |
|
|
- type: cosine_precision@20 |
|
|
value: 0.42906403940886706 |
|
|
name: Cosine Precision@20 |
|
|
- type: cosine_precision@50 |
|
|
value: 0.29802955665024633 |
|
|
name: Cosine Precision@50 |
|
|
- type: cosine_precision@100 |
|
|
value: 0.19433497536945815 |
|
|
name: Cosine Precision@100 |
|
|
- type: cosine_precision@150 |
|
|
value: 0.14824302134646963 |
|
|
name: Cosine Precision@150 |
|
|
- type: cosine_precision@200 |
|
|
value: 0.1197783251231527 |
|
|
name: Cosine Precision@200 |
|
|
- type: cosine_recall@1 |
|
|
value: 0.01108543831680986 |
|
|
name: Cosine Recall@1 |
|
|
- type: cosine_recall@20 |
|
|
value: 0.26675038089672504 |
|
|
name: Cosine Recall@20 |
|
|
- type: cosine_recall@50 |
|
|
value: 0.40921566733257536 |
|
|
name: Cosine Recall@50 |
|
|
- type: cosine_recall@100 |
|
|
value: 0.5097664540706716 |
|
|
name: Cosine Recall@100 |
|
|
- type: cosine_recall@150 |
|
|
value: 0.5728593162394238 |
|
|
name: Cosine Recall@150 |
|
|
- type: cosine_recall@200 |
|
|
value: 0.6120176690658915 |
|
|
name: Cosine Recall@200 |
|
|
- type: cosine_ndcg@1 |
|
|
value: 0.2955665024630542 |
|
|
name: Cosine Ndcg@1 |
|
|
- type: cosine_ndcg@20 |
|
|
value: 0.46962753993631184 |
|
|
name: Cosine Ndcg@20 |
|
|
- type: cosine_ndcg@50 |
|
|
value: 0.444898497416845 |
|
|
name: Cosine Ndcg@50 |
|
|
- type: cosine_ndcg@100 |
|
|
value: 0.466960324034805 |
|
|
name: Cosine Ndcg@100 |
|
|
- type: cosine_ndcg@150 |
|
|
value: 0.49816218513136795 |
|
|
name: Cosine Ndcg@150 |
|
|
- type: cosine_ndcg@200 |
|
|
value: 0.5165485300965951 |
|
|
name: Cosine Ndcg@200 |
|
|
- type: cosine_mrr@1 |
|
|
value: 0.2955665024630542 |
|
|
name: Cosine Mrr@1 |
|
|
- type: cosine_mrr@20 |
|
|
value: 0.5046767633988724 |
|
|
name: Cosine Mrr@20 |
|
|
- type: cosine_mrr@50 |
|
|
value: 0.50477528556636 |
|
|
name: Cosine Mrr@50 |
|
|
- type: cosine_mrr@100 |
|
|
value: 0.5049589761635289 |
|
|
name: Cosine Mrr@100 |
|
|
- type: cosine_mrr@150 |
|
|
value: 0.5049589761635289 |
|
|
name: Cosine Mrr@150 |
|
|
- type: cosine_mrr@200 |
|
|
value: 0.5049589761635289 |
|
|
name: Cosine Mrr@200 |
|
|
- type: cosine_map@1 |
|
|
value: 0.2955665024630542 |
|
|
name: Cosine Map@1 |
|
|
- type: cosine_map@20 |
|
|
value: 0.33658821160388247 |
|
|
name: Cosine Map@20 |
|
|
- type: cosine_map@50 |
|
|
value: 0.2853400586620685 |
|
|
name: Cosine Map@50 |
|
|
- type: cosine_map@100 |
|
|
value: 0.2817732307206079 |
|
|
name: Cosine Map@100 |
|
|
- type: cosine_map@150 |
|
|
value: 0.2931317333364438 |
|
|
name: Cosine Map@150 |
|
|
- type: cosine_map@200 |
|
|
value: 0.2988160532231927 |
|
|
name: Cosine Map@200 |
|
|
- type: cosine_map@500 |
|
|
value: 0.31093362375086947 |
|
|
name: Cosine Map@500 |
|
|
- task: |
|
|
type: information-retrieval |
|
|
name: Information Retrieval |
|
|
dataset: |
|
|
name: full zh |
|
|
type: full_zh |
|
|
metrics: |
|
|
- type: cosine_accuracy@1 |
|
|
value: 0.6601941747572816 |
|
|
name: Cosine Accuracy@1 |
|
|
- type: cosine_accuracy@20 |
|
|
value: 0.970873786407767 |
|
|
name: Cosine Accuracy@20 |
|
|
- type: cosine_accuracy@50 |
|
|
value: 0.9902912621359223 |
|
|
name: Cosine Accuracy@50 |
|
|
- type: cosine_accuracy@100 |
|
|
value: 0.9902912621359223 |
|
|
name: Cosine Accuracy@100 |
|
|
- type: cosine_accuracy@150 |
|
|
value: 0.9902912621359223 |
|
|
name: Cosine Accuracy@150 |
|
|
- type: cosine_accuracy@200 |
|
|
value: 0.9902912621359223 |
|
|
name: Cosine Accuracy@200 |
|
|
- type: cosine_precision@1 |
|
|
value: 0.6601941747572816 |
|
|
name: Cosine Precision@1 |
|
|
- type: cosine_precision@20 |
|
|
value: 0.44805825242718444 |
|
|
name: Cosine Precision@20 |
|
|
- type: cosine_precision@50 |
|
|
value: 0.27126213592233006 |
|
|
name: Cosine Precision@50 |
|
|
- type: cosine_precision@100 |
|
|
value: 0.16650485436893206 |
|
|
name: Cosine Precision@100 |
|
|
- type: cosine_precision@150 |
|
|
value: 0.1211003236245955 |
|
|
name: Cosine Precision@150 |
|
|
- type: cosine_precision@200 |
|
|
value: 0.09529126213592234 |
|
|
name: Cosine Precision@200 |
|
|
- type: cosine_recall@1 |
|
|
value: 0.06611246215014785 |
|
|
name: Cosine Recall@1 |
|
|
- type: cosine_recall@20 |
|
|
value: 0.48409390608352504 |
|
|
name: Cosine Recall@20 |
|
|
- type: cosine_recall@50 |
|
|
value: 0.6568473638827299 |
|
|
name: Cosine Recall@50 |
|
|
- type: cosine_recall@100 |
|
|
value: 0.7685416895166794 |
|
|
name: Cosine Recall@100 |
|
|
- type: cosine_recall@150 |
|
|
value: 0.8277686060133904 |
|
|
name: Cosine Recall@150 |
|
|
- type: cosine_recall@200 |
|
|
value: 0.8616979590623105 |
|
|
name: Cosine Recall@200 |
|
|
- type: cosine_ndcg@1 |
|
|
value: 0.6601941747572816 |
|
|
name: Cosine Ndcg@1 |
|
|
- type: cosine_ndcg@20 |
|
|
value: 0.6231250904534316 |
|
|
name: Cosine Ndcg@20 |
|
|
- type: cosine_ndcg@50 |
|
|
value: 0.6383496204608501 |
|
|
name: Cosine Ndcg@50 |
|
|
- type: cosine_ndcg@100 |
|
|
value: 0.6917257705456975 |
|
|
name: Cosine Ndcg@100 |
|
|
- type: cosine_ndcg@150 |
|
|
value: 0.7167434657424917 |
|
|
name: Cosine Ndcg@150 |
|
|
- type: cosine_ndcg@200 |
|
|
value: 0.7303448958665071 |
|
|
name: Cosine Ndcg@200 |
|
|
- type: cosine_mrr@1 |
|
|
value: 0.6601941747572816 |
|
|
name: Cosine Mrr@1 |
|
|
- type: cosine_mrr@20 |
|
|
value: 0.8015776699029126 |
|
|
name: Cosine Mrr@20 |
|
|
- type: cosine_mrr@50 |
|
|
value: 0.8020876238109248 |
|
|
name: Cosine Mrr@50 |
|
|
- type: cosine_mrr@100 |
|
|
value: 0.8020876238109248 |
|
|
name: Cosine Mrr@100 |
|
|
- type: cosine_mrr@150 |
|
|
value: 0.8020876238109248 |
|
|
name: Cosine Mrr@150 |
|
|
- type: cosine_mrr@200 |
|
|
value: 0.8020876238109248 |
|
|
name: Cosine Mrr@200 |
|
|
- type: cosine_map@1 |
|
|
value: 0.6601941747572816 |
|
|
name: Cosine Map@1 |
|
|
- type: cosine_map@20 |
|
|
value: 0.4750205237443607 |
|
|
name: Cosine Map@20 |
|
|
- type: cosine_map@50 |
|
|
value: 0.45785161483741715 |
|
|
name: Cosine Map@50 |
|
|
- type: cosine_map@100 |
|
|
value: 0.4848085275553208 |
|
|
name: Cosine Map@100 |
|
|
- type: cosine_map@150 |
|
|
value: 0.4937216396074153 |
|
|
name: Cosine Map@150 |
|
|
- type: cosine_map@200 |
|
|
value: 0.49777622471594557 |
|
|
name: Cosine Map@200 |
|
|
- type: cosine_map@500 |
|
|
value: 0.5039795405740248 |
|
|
name: Cosine Map@500 |
|
|
- task: |
|
|
type: information-retrieval |
|
|
name: Information Retrieval |
|
|
dataset: |
|
|
name: mix es |
|
|
type: mix_es |
|
|
metrics: |
|
|
- type: cosine_accuracy@1 |
|
|
value: 0.6297451898075923 |
|
|
name: Cosine Accuracy@1 |
|
|
- type: cosine_accuracy@20 |
|
|
value: 0.9105564222568903 |
|
|
name: Cosine Accuracy@20 |
|
|
- type: cosine_accuracy@50 |
|
|
value: 0.9495579823192928 |
|
|
name: Cosine Accuracy@50 |
|
|
- type: cosine_accuracy@100 |
|
|
value: 0.9729589183567343 |
|
|
name: Cosine Accuracy@100 |
|
|
- type: cosine_accuracy@150 |
|
|
value: 0.983359334373375 |
|
|
name: Cosine Accuracy@150 |
|
|
- type: cosine_accuracy@200 |
|
|
value: 0.9901196047841914 |
|
|
name: Cosine Accuracy@200 |
|
|
- type: cosine_precision@1 |
|
|
value: 0.6297451898075923 |
|
|
name: Cosine Precision@1 |
|
|
- type: cosine_precision@20 |
|
|
value: 0.11167446697867915 |
|
|
name: Cosine Precision@20 |
|
|
- type: cosine_precision@50 |
|
|
value: 0.04850754030161208 |
|
|
name: Cosine Precision@50 |
|
|
- type: cosine_precision@100 |
|
|
value: 0.02535101404056163 |
|
|
name: Cosine Precision@100 |
|
|
- type: cosine_precision@150 |
|
|
value: 0.0172300225342347 |
|
|
name: Cosine Precision@150 |
|
|
- type: cosine_precision@200 |
|
|
value: 0.0130811232449298 |
|
|
name: Cosine Precision@200 |
|
|
- type: cosine_recall@1 |
|
|
value: 0.24340068840848872 |
|
|
name: Cosine Recall@1 |
|
|
- type: cosine_recall@20 |
|
|
value: 0.8288215338137336 |
|
|
name: Cosine Recall@20 |
|
|
- type: cosine_recall@50 |
|
|
value: 0.8986566129311838 |
|
|
name: Cosine Recall@50 |
|
|
- type: cosine_recall@100 |
|
|
value: 0.9398509273704282 |
|
|
name: Cosine Recall@100 |
|
|
- type: cosine_recall@150 |
|
|
value: 0.9576876408389668 |
|
|
name: Cosine Recall@150 |
|
|
- type: cosine_recall@200 |
|
|
value: 0.9695267810712429 |
|
|
name: Cosine Recall@200 |
|
|
- type: cosine_ndcg@1 |
|
|
value: 0.6297451898075923 |
|
|
name: Cosine Ndcg@1 |
|
|
- type: cosine_ndcg@20 |
|
|
value: 0.7010427232190379 |
|
|
name: Cosine Ndcg@20 |
|
|
- type: cosine_ndcg@50 |
|
|
value: 0.7200844211181043 |
|
|
name: Cosine Ndcg@50 |
|
|
- type: cosine_ndcg@100 |
|
|
value: 0.7290848607488584 |
|
|
name: Cosine Ndcg@100 |
|
|
- type: cosine_ndcg@150 |
|
|
value: 0.7325985285606116 |
|
|
name: Cosine Ndcg@150 |
|
|
- type: cosine_ndcg@200 |
|
|
value: 0.7347463892077523 |
|
|
name: Cosine Ndcg@200 |
|
|
- type: cosine_mrr@1 |
|
|
value: 0.6297451898075923 |
|
|
name: Cosine Mrr@1 |
|
|
- type: cosine_mrr@20 |
|
|
value: 0.7036709577939534 |
|
|
name: Cosine Mrr@20 |
|
|
- type: cosine_mrr@50 |
|
|
value: 0.7049808414398148 |
|
|
name: Cosine Mrr@50 |
|
|
- type: cosine_mrr@100 |
|
|
value: 0.7053260954286938 |
|
|
name: Cosine Mrr@100 |
|
|
- type: cosine_mrr@150 |
|
|
value: 0.7054145837924506 |
|
|
name: Cosine Mrr@150 |
|
|
- type: cosine_mrr@200 |
|
|
value: 0.7054541569954363 |
|
|
name: Cosine Mrr@200 |
|
|
- type: cosine_map@1 |
|
|
value: 0.6297451898075923 |
|
|
name: Cosine Map@1 |
|
|
- type: cosine_map@20 |
|
|
value: 0.6194189058349782 |
|
|
name: Cosine Map@20 |
|
|
- type: cosine_map@50 |
|
|
value: 0.6244340507841626 |
|
|
name: Cosine Map@50 |
|
|
- type: cosine_map@100 |
|
|
value: 0.6256943736433496 |
|
|
name: Cosine Map@100 |
|
|
- type: cosine_map@150 |
|
|
value: 0.6260195205413376 |
|
|
name: Cosine Map@150 |
|
|
- type: cosine_map@200 |
|
|
value: 0.6261650797332174 |
|
|
name: Cosine Map@200 |
|
|
- type: cosine_map@500 |
|
|
value: 0.6263452093477304 |
|
|
name: Cosine Map@500 |
|
|
- task: |
|
|
type: information-retrieval |
|
|
name: Information Retrieval |
|
|
dataset: |
|
|
name: mix de |
|
|
type: mix_de |
|
|
metrics: |
|
|
- type: cosine_accuracy@1 |
|
|
value: 0.5564222568902756 |
|
|
name: Cosine Accuracy@1 |
|
|
- type: cosine_accuracy@20 |
|
|
value: 0.8866354654186167 |
|
|
name: Cosine Accuracy@20 |
|
|
- type: cosine_accuracy@50 |
|
|
value: 0.9381175247009881 |
|
|
name: Cosine Accuracy@50 |
|
|
- type: cosine_accuracy@100 |
|
|
value: 0.9594383775351014 |
|
|
name: Cosine Accuracy@100 |
|
|
- type: cosine_accuracy@150 |
|
|
value: 0.9708788351534061 |
|
|
name: Cosine Accuracy@150 |
|
|
- type: cosine_accuracy@200 |
|
|
value: 0.9776391055642226 |
|
|
name: Cosine Accuracy@200 |
|
|
- type: cosine_precision@1 |
|
|
value: 0.5564222568902756 |
|
|
name: Cosine Precision@1 |
|
|
- type: cosine_precision@20 |
|
|
value: 0.109464378575143 |
|
|
name: Cosine Precision@20 |
|
|
- type: cosine_precision@50 |
|
|
value: 0.048060322412896525 |
|
|
name: Cosine Precision@50 |
|
|
- type: cosine_precision@100 |
|
|
value: 0.025273010920436823 |
|
|
name: Cosine Precision@100 |
|
|
- type: cosine_precision@150 |
|
|
value: 0.017313225862367825 |
|
|
name: Cosine Precision@150 |
|
|
- type: cosine_precision@200 |
|
|
value: 0.013143525741029644 |
|
|
name: Cosine Precision@200 |
|
|
- type: cosine_recall@1 |
|
|
value: 0.20931703934824059 |
|
|
name: Cosine Recall@1 |
|
|
- type: cosine_recall@20 |
|
|
value: 0.7988992893049055 |
|
|
name: Cosine Recall@20 |
|
|
- type: cosine_recall@50 |
|
|
value: 0.8741029641185647 |
|
|
name: Cosine Recall@50 |
|
|
- type: cosine_recall@100 |
|
|
value: 0.9173426937077482 |
|
|
name: Cosine Recall@100 |
|
|
- type: cosine_recall@150 |
|
|
value: 0.9424076963078523 |
|
|
name: Cosine Recall@150 |
|
|
- type: cosine_recall@200 |
|
|
value: 0.953631478592477 |
|
|
name: Cosine Recall@200 |
|
|
- type: cosine_ndcg@1 |
|
|
value: 0.5564222568902756 |
|
|
name: Cosine Ndcg@1 |
|
|
- type: cosine_ndcg@20 |
|
|
value: 0.6541310877479573 |
|
|
name: Cosine Ndcg@20 |
|
|
- type: cosine_ndcg@50 |
|
|
value: 0.674790854916742 |
|
|
name: Cosine Ndcg@50 |
|
|
- type: cosine_ndcg@100 |
|
|
value: 0.6844997445798996 |
|
|
name: Cosine Ndcg@100 |
|
|
- type: cosine_ndcg@150 |
|
|
value: 0.6894214573457343 |
|
|
name: Cosine Ndcg@150 |
|
|
- type: cosine_ndcg@200 |
|
|
value: 0.6914881284159038 |
|
|
name: Cosine Ndcg@200 |
|
|
- type: cosine_mrr@1 |
|
|
value: 0.5564222568902756 |
|
|
name: Cosine Mrr@1 |
|
|
- type: cosine_mrr@20 |
|
|
value: 0.6476945170199107 |
|
|
name: Cosine Mrr@20 |
|
|
- type: cosine_mrr@50 |
|
|
value: 0.6493649946597936 |
|
|
name: Cosine Mrr@50 |
|
|
- type: cosine_mrr@100 |
|
|
value: 0.6496801333421218 |
|
|
name: Cosine Mrr@100 |
|
|
- type: cosine_mrr@150 |
|
|
value: 0.6497778366579644 |
|
|
name: Cosine Mrr@150 |
|
|
- type: cosine_mrr@200 |
|
|
value: 0.6498156890114056 |
|
|
name: Cosine Mrr@200 |
|
|
- type: cosine_map@1 |
|
|
value: 0.5564222568902756 |
|
|
name: Cosine Map@1 |
|
|
- type: cosine_map@20 |
|
|
value: 0.5648326970643027 |
|
|
name: Cosine Map@20 |
|
|
- type: cosine_map@50 |
|
|
value: 0.57003456255067 |
|
|
name: Cosine Map@50 |
|
|
- type: cosine_map@100 |
|
|
value: 0.5714370828517599 |
|
|
name: Cosine Map@100 |
|
|
- type: cosine_map@150 |
|
|
value: 0.5719002990233493 |
|
|
name: Cosine Map@150 |
|
|
- type: cosine_map@200 |
|
|
value: 0.5720497397197026 |
|
|
name: Cosine Map@200 |
|
|
- type: cosine_map@500 |
|
|
value: 0.5723109788233504 |
|
|
name: Cosine Map@500 |
|
|
- task: |
|
|
type: information-retrieval |
|
|
name: Information Retrieval |
|
|
dataset: |
|
|
name: mix zh |
|
|
type: mix_zh |
|
|
metrics: |
|
|
- type: cosine_accuracy@1 |
|
|
value: 0.6085594989561587 |
|
|
name: Cosine Accuracy@1 |
|
|
- type: cosine_accuracy@20 |
|
|
value: 0.9592901878914405 |
|
|
name: Cosine Accuracy@20 |
|
|
- type: cosine_accuracy@50 |
|
|
value: 0.9791231732776617 |
|
|
name: Cosine Accuracy@50 |
|
|
- type: cosine_accuracy@100 |
|
|
value: 0.9874739039665971 |
|
|
name: Cosine Accuracy@100 |
|
|
- type: cosine_accuracy@150 |
|
|
value: 0.9911273486430062 |
|
|
name: Cosine Accuracy@150 |
|
|
- type: cosine_accuracy@200 |
|
|
value: 0.9937369519832986 |
|
|
name: Cosine Accuracy@200 |
|
|
- type: cosine_precision@1 |
|
|
value: 0.6085594989561587 |
|
|
name: Cosine Precision@1 |
|
|
- type: cosine_precision@20 |
|
|
value: 0.12656576200417535 |
|
|
name: Cosine Precision@20 |
|
|
- type: cosine_precision@50 |
|
|
value: 0.05518789144050106 |
|
|
name: Cosine Precision@50 |
|
|
- type: cosine_precision@100 |
|
|
value: 0.028747390396659713 |
|
|
name: Cosine Precision@100 |
|
|
- type: cosine_precision@150 |
|
|
value: 0.019425887265135697 |
|
|
name: Cosine Precision@150 |
|
|
- type: cosine_precision@200 |
|
|
value: 0.014705114822546978 |
|
|
name: Cosine Precision@200 |
|
|
- type: cosine_recall@1 |
|
|
value: 0.2043804056069192 |
|
|
name: Cosine Recall@1 |
|
|
- type: cosine_recall@20 |
|
|
value: 0.8346468336812805 |
|
|
name: Cosine Recall@20 |
|
|
- type: cosine_recall@50 |
|
|
value: 0.9095772442588727 |
|
|
name: Cosine Recall@50 |
|
|
- type: cosine_recall@100 |
|
|
value: 0.9475643702157271 |
|
|
name: Cosine Recall@100 |
|
|
- type: cosine_recall@150 |
|
|
value: 0.9609168406402228 |
|
|
name: Cosine Recall@150 |
|
|
- type: cosine_recall@200 |
|
|
value: 0.9697807933194154 |
|
|
name: Cosine Recall@200 |
|
|
- type: cosine_ndcg@1 |
|
|
value: 0.6085594989561587 |
|
|
name: Cosine Ndcg@1 |
|
|
- type: cosine_ndcg@20 |
|
|
value: 0.6853247290079303 |
|
|
name: Cosine Ndcg@20 |
|
|
- type: cosine_ndcg@50 |
|
|
value: 0.7066940880968873 |
|
|
name: Cosine Ndcg@50 |
|
|
- type: cosine_ndcg@100 |
|
|
value: 0.715400790265437 |
|
|
name: Cosine Ndcg@100 |
|
|
- type: cosine_ndcg@150 |
|
|
value: 0.7180808450243259 |
|
|
name: Cosine Ndcg@150 |
|
|
- type: cosine_ndcg@200 |
|
|
value: 0.7197629642909036 |
|
|
name: Cosine Ndcg@200 |
|
|
- type: cosine_mrr@1 |
|
|
value: 0.6085594989561587 |
|
|
name: Cosine Mrr@1 |
|
|
- type: cosine_mrr@20 |
|
|
value: 0.7236528792595264 |
|
|
name: Cosine Mrr@20 |
|
|
- type: cosine_mrr@50 |
|
|
value: 0.7243308740364213 |
|
|
name: Cosine Mrr@50 |
|
|
- type: cosine_mrr@100 |
|
|
value: 0.7244524590415827 |
|
|
name: Cosine Mrr@100 |
|
|
- type: cosine_mrr@150 |
|
|
value: 0.7244814620971008 |
|
|
name: Cosine Mrr@150 |
|
|
- type: cosine_mrr@200 |
|
|
value: 0.7244960285685315 |
|
|
name: Cosine Mrr@200 |
|
|
- type: cosine_map@1 |
|
|
value: 0.6085594989561587 |
|
|
name: Cosine Map@1 |
|
|
- type: cosine_map@20 |
|
|
value: 0.5652211952239553 |
|
|
name: Cosine Map@20 |
|
|
- type: cosine_map@50 |
|
|
value: 0.5716374350069462 |
|
|
name: Cosine Map@50 |
|
|
- type: cosine_map@100 |
|
|
value: 0.5730756815932735 |
|
|
name: Cosine Map@100 |
|
|
- type: cosine_map@150 |
|
|
value: 0.5733543252173214 |
|
|
name: Cosine Map@150 |
|
|
- type: cosine_map@200 |
|
|
value: 0.5734860037813889 |
|
|
name: Cosine Map@200 |
|
|
- type: cosine_map@500 |
|
|
value: 0.5736416699680624 |
|
|
name: Cosine Map@500 |
|
|
--- |
|
|
|
|
|
# Job - Job matching Alibaba-NLP/gte-multilingual-base pruned |
|
|
|
|
|
Top performing model on [TalentCLEF 2025](https://talentclef.github.io/talentclef/) Task A. Use it for multilingual job title matching |
|
|
|
|
|
## Model Details |
|
|
|
|
|
### Model Description |
|
|
- **Model Type:** Sentence Transformer |
|
|
<!-- - **Base model:** [Unknown](https://huggingface.co/unknown) --> |
|
|
- **Maximum Sequence Length:** 512 tokens |
|
|
- **Output Dimensionality:** 768 dimensions |
|
|
- **Similarity Function:** Cosine Similarity |
|
|
<!-- - **Training Dataset:** Unknown --> |
|
|
<!-- - **Language:** Unknown --> |
|
|
<!-- - **License:** Unknown --> |
|
|
|
|
|
### Model Sources |
|
|
|
|
|
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net) |
|
|
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers) |
|
|
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers) |
|
|
|
|
|
### Full Model Architecture |
|
|
|
|
|
``` |
|
|
SentenceTransformer( |
|
|
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: NewModel |
|
|
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True}) |
|
|
(2): Normalize() |
|
|
) |
|
|
``` |
|
|
|
|
|
## Usage |
|
|
|
|
|
### Direct Usage (Sentence Transformers) |
|
|
|
|
|
First install the Sentence Transformers library: |
|
|
|
|
|
```bash |
|
|
pip install -U sentence-transformers |
|
|
``` |
|
|
|
|
|
Then you can load this model and run inference. |
|
|
```python |
|
|
from sentence_transformers import SentenceTransformer |
|
|
|
|
|
# Download from the 🤗 Hub |
|
|
model = SentenceTransformer("pj-mathematician/JobGTE-multilingual-base-pruned") |
|
|
# Run inference |
|
|
sentences = [ |
|
|
'Entwicklerin für mobile Anwendungen', |
|
|
'Mergers-and-Acquisitions-Analyst/Mergers-and-Acquisitions-Analystin', |
|
|
'fashion design expert', |
|
|
] |
|
|
embeddings = model.encode(sentences) |
|
|
print(embeddings.shape) |
|
|
# [3, 768] |
|
|
|
|
|
# Get the similarity scores for the embeddings |
|
|
similarities = model.similarity(embeddings, embeddings) |
|
|
print(similarities.shape) |
|
|
# [3, 3] |
|
|
``` |
|
|
|
|
|
<!-- |
|
|
### Direct Usage (Transformers) |
|
|
|
|
|
<details><summary>Click to see the direct usage in Transformers</summary> |
|
|
|
|
|
</details> |
|
|
--> |
|
|
|
|
|
<!-- |
|
|
### Downstream Usage (Sentence Transformers) |
|
|
|
|
|
You can finetune this model on your own dataset. |
|
|
|
|
|
<details><summary>Click to expand</summary> |
|
|
|
|
|
</details> |
|
|
--> |
|
|
|
|
|
<!-- |
|
|
### Out-of-Scope Use |
|
|
|
|
|
*List how the model may foreseeably be misused and address what users ought not to do with the model.* |
|
|
--> |
|
|
|
|
|
## Evaluation |
|
|
|
|
|
### Metrics |
|
|
|
|
|
#### Information Retrieval |
|
|
|
|
|
* Datasets: `full_en`, `full_es`, `full_de`, `full_zh`, `mix_es`, `mix_de` and `mix_zh` |
|
|
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) |
|
|
|
|
|
| Metric | full_en | full_es | full_de | full_zh | mix_es | mix_de | mix_zh | |
|
|
|:---------------------|:-----------|:-----------|:-----------|:-----------|:-----------|:-----------|:-----------| |
|
|
| cosine_accuracy@1 | 0.6476 | 0.1189 | 0.2956 | 0.6602 | 0.6297 | 0.5564 | 0.6086 | |
|
|
| cosine_accuracy@20 | 0.9714 | 1.0 | 0.9704 | 0.9709 | 0.9106 | 0.8866 | 0.9593 | |
|
|
| cosine_accuracy@50 | 0.9905 | 1.0 | 0.9754 | 0.9903 | 0.9496 | 0.9381 | 0.9791 | |
|
|
| cosine_accuracy@100 | 0.9905 | 1.0 | 0.9901 | 0.9903 | 0.973 | 0.9594 | 0.9875 | |
|
|
| cosine_accuracy@150 | 0.9905 | 1.0 | 0.9901 | 0.9903 | 0.9834 | 0.9709 | 0.9911 | |
|
|
| cosine_accuracy@200 | 0.9905 | 1.0 | 0.9901 | 0.9903 | 0.9901 | 0.9776 | 0.9937 | |
|
|
| cosine_precision@1 | 0.6476 | 0.1189 | 0.2956 | 0.6602 | 0.6297 | 0.5564 | 0.6086 | |
|
|
| cosine_precision@20 | 0.4795 | 0.5268 | 0.4291 | 0.4481 | 0.1117 | 0.1095 | 0.1266 | |
|
|
| cosine_precision@50 | 0.2884 | 0.3438 | 0.298 | 0.2713 | 0.0485 | 0.0481 | 0.0552 | |
|
|
| cosine_precision@100 | 0.173 | 0.219 | 0.1943 | 0.1665 | 0.0254 | 0.0253 | 0.0287 | |
|
|
| cosine_precision@150 | 0.1244 | 0.1658 | 0.1482 | 0.1211 | 0.0172 | 0.0173 | 0.0194 | |
|
|
| cosine_precision@200 | 0.0986 | 0.1333 | 0.1198 | 0.0953 | 0.0131 | 0.0131 | 0.0147 | |
|
|
| cosine_recall@1 | 0.0661 | 0.0036 | 0.0111 | 0.0661 | 0.2434 | 0.2093 | 0.2044 | |
|
|
| cosine_recall@20 | 0.5122 | 0.3541 | 0.2668 | 0.4841 | 0.8288 | 0.7989 | 0.8346 | |
|
|
| cosine_recall@50 | 0.6835 | 0.5098 | 0.4092 | 0.6568 | 0.8987 | 0.8741 | 0.9096 | |
|
|
| cosine_recall@100 | 0.79 | 0.6076 | 0.5098 | 0.7685 | 0.9399 | 0.9173 | 0.9476 | |
|
|
| cosine_recall@150 | 0.84 | 0.6705 | 0.5729 | 0.8278 | 0.9577 | 0.9424 | 0.9609 | |
|
|
| cosine_recall@200 | 0.8759 | 0.7125 | 0.612 | 0.8617 | 0.9695 | 0.9536 | 0.9698 | |
|
|
| cosine_ndcg@1 | 0.6476 | 0.1189 | 0.2956 | 0.6602 | 0.6297 | 0.5564 | 0.6086 | |
|
|
| cosine_ndcg@20 | 0.6468 | 0.5708 | 0.4696 | 0.6231 | 0.701 | 0.6541 | 0.6853 | |
|
|
| cosine_ndcg@50 | 0.658 | 0.5355 | 0.4449 | 0.6383 | 0.7201 | 0.6748 | 0.7067 | |
|
|
| cosine_ndcg@100 | 0.7095 | 0.559 | 0.467 | 0.6917 | 0.7291 | 0.6845 | 0.7154 | |
|
|
| cosine_ndcg@150 | 0.731 | 0.59 | 0.4982 | 0.7167 | 0.7326 | 0.6894 | 0.7181 | |
|
|
| **cosine_ndcg@200** | **0.7461** | **0.6095** | **0.5165** | **0.7303** | **0.7347** | **0.6915** | **0.7198** | |
|
|
| cosine_mrr@1 | 0.6476 | 0.1189 | 0.2956 | 0.6602 | 0.6297 | 0.5564 | 0.6086 | |
|
|
| cosine_mrr@20 | 0.7902 | 0.5532 | 0.5047 | 0.8016 | 0.7037 | 0.6477 | 0.7237 | |
|
|
| cosine_mrr@50 | 0.791 | 0.5532 | 0.5048 | 0.8021 | 0.705 | 0.6494 | 0.7243 | |
|
|
| cosine_mrr@100 | 0.791 | 0.5532 | 0.505 | 0.8021 | 0.7053 | 0.6497 | 0.7245 | |
|
|
| cosine_mrr@150 | 0.791 | 0.5532 | 0.505 | 0.8021 | 0.7054 | 0.6498 | 0.7245 | |
|
|
| cosine_mrr@200 | 0.791 | 0.5532 | 0.505 | 0.8021 | 0.7055 | 0.6498 | 0.7245 | |
|
|
| cosine_map@1 | 0.6476 | 0.1189 | 0.2956 | 0.6602 | 0.6297 | 0.5564 | 0.6086 | |
|
|
| cosine_map@20 | 0.5026 | 0.4379 | 0.3366 | 0.475 | 0.6194 | 0.5648 | 0.5652 | |
|
|
| cosine_map@50 | 0.484 | 0.3739 | 0.2853 | 0.4579 | 0.6244 | 0.57 | 0.5716 | |
|
|
| cosine_map@100 | 0.5118 | 0.3763 | 0.2818 | 0.4848 | 0.6257 | 0.5714 | 0.5731 | |
|
|
| cosine_map@150 | 0.5202 | 0.3892 | 0.2931 | 0.4937 | 0.626 | 0.5719 | 0.5734 | |
|
|
| cosine_map@200 | 0.5249 | 0.3958 | 0.2988 | 0.4978 | 0.6262 | 0.572 | 0.5735 | |
|
|
| cosine_map@500 | 0.5304 | 0.4063 | 0.3109 | 0.504 | 0.6263 | 0.5723 | 0.5736 | |
|
|
|
|
|
<!-- |
|
|
## Bias, Risks and Limitations |
|
|
|
|
|
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.* |
|
|
--> |
|
|
|
|
|
<!-- |
|
|
### Recommendations |
|
|
|
|
|
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.* |
|
|
--> |
|
|
|
|
|
## Training Details |
|
|
|
|
|
### Training Dataset |
|
|
|
|
|
#### Unnamed Dataset |
|
|
|
|
|
* Size: 86,648 training samples |
|
|
* Columns: <code>sentence</code> and <code>label</code> |
|
|
* Approximate statistics based on the first 1000 samples: |
|
|
| | sentence | label | |
|
|
|:--------|:---------------------------------------------------------------------------------|:-------------------------------------| |
|
|
| type | string | list | |
|
|
| details | <ul><li>min: 2 tokens</li><li>mean: 8.25 tokens</li><li>max: 54 tokens</li></ul> | <ul><li>size: 768 elements</li></ul> | |
|
|
* Samples: |
|
|
| sentence | label | |
|
|
|:-----------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------| |
|
|
| <code></code> | <code>[-0.07171934843063354, 0.03595816716551781, -0.029780959710478783, 0.006593302357941866, 0.040611181408166885, ...]</code> | |
|
|
| <code>airport environment officer</code> | <code>[-0.022075481712818146, 0.02999737113714218, -0.02189866080880165, 0.016531817615032196, 0.012234307825565338, ...]</code> | |
|
|
| <code>Flake操作员</code> | <code>[-0.04815564677119255, 0.023524893447756767, -0.01583661139011383, 0.042527906596660614, 0.03815540298819542, ...]</code> | |
|
|
* Loss: [<code>MSELoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#mseloss) |
|
|
|
|
|
### Training Hyperparameters |
|
|
#### Non-Default Hyperparameters |
|
|
|
|
|
- `eval_strategy`: steps |
|
|
- `per_device_train_batch_size`: 128 |
|
|
- `per_device_eval_batch_size`: 128 |
|
|
- `gradient_accumulation_steps`: 2 |
|
|
- `learning_rate`: 0.0001 |
|
|
- `num_train_epochs`: 5 |
|
|
- `warmup_ratio`: 0.05 |
|
|
- `log_on_each_node`: False |
|
|
- `fp16`: True |
|
|
- `dataloader_num_workers`: 4 |
|
|
- `ddp_find_unused_parameters`: True |
|
|
- `batch_sampler`: no_duplicates |
|
|
|
|
|
#### All Hyperparameters |
|
|
<details><summary>Click to expand</summary> |
|
|
|
|
|
- `overwrite_output_dir`: False |
|
|
- `do_predict`: False |
|
|
- `eval_strategy`: steps |
|
|
- `prediction_loss_only`: True |
|
|
- `per_device_train_batch_size`: 128 |
|
|
- `per_device_eval_batch_size`: 128 |
|
|
- `per_gpu_train_batch_size`: None |
|
|
- `per_gpu_eval_batch_size`: None |
|
|
- `gradient_accumulation_steps`: 2 |
|
|
- `eval_accumulation_steps`: None |
|
|
- `torch_empty_cache_steps`: None |
|
|
- `learning_rate`: 0.0001 |
|
|
- `weight_decay`: 0.0 |
|
|
- `adam_beta1`: 0.9 |
|
|
- `adam_beta2`: 0.999 |
|
|
- `adam_epsilon`: 1e-08 |
|
|
- `max_grad_norm`: 1.0 |
|
|
- `num_train_epochs`: 5 |
|
|
- `max_steps`: -1 |
|
|
- `lr_scheduler_type`: linear |
|
|
- `lr_scheduler_kwargs`: {} |
|
|
- `warmup_ratio`: 0.05 |
|
|
- `warmup_steps`: 0 |
|
|
- `log_level`: passive |
|
|
- `log_level_replica`: warning |
|
|
- `log_on_each_node`: False |
|
|
- `logging_nan_inf_filter`: True |
|
|
- `save_safetensors`: True |
|
|
- `save_on_each_node`: False |
|
|
- `save_only_model`: False |
|
|
- `restore_callback_states_from_checkpoint`: False |
|
|
- `no_cuda`: False |
|
|
- `use_cpu`: False |
|
|
- `use_mps_device`: False |
|
|
- `seed`: 42 |
|
|
- `data_seed`: None |
|
|
- `jit_mode_eval`: False |
|
|
- `use_ipex`: False |
|
|
- `bf16`: False |
|
|
- `fp16`: True |
|
|
- `fp16_opt_level`: O1 |
|
|
- `half_precision_backend`: auto |
|
|
- `bf16_full_eval`: False |
|
|
- `fp16_full_eval`: False |
|
|
- `tf32`: None |
|
|
- `local_rank`: 0 |
|
|
- `ddp_backend`: None |
|
|
- `tpu_num_cores`: None |
|
|
- `tpu_metrics_debug`: False |
|
|
- `debug`: [] |
|
|
- `dataloader_drop_last`: True |
|
|
- `dataloader_num_workers`: 4 |
|
|
- `dataloader_prefetch_factor`: None |
|
|
- `past_index`: -1 |
|
|
- `disable_tqdm`: False |
|
|
- `remove_unused_columns`: True |
|
|
- `label_names`: None |
|
|
- `load_best_model_at_end`: False |
|
|
- `ignore_data_skip`: False |
|
|
- `fsdp`: [] |
|
|
- `fsdp_min_num_params`: 0 |
|
|
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False} |
|
|
- `tp_size`: 0 |
|
|
- `fsdp_transformer_layer_cls_to_wrap`: None |
|
|
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None} |
|
|
- `deepspeed`: None |
|
|
- `label_smoothing_factor`: 0.0 |
|
|
- `optim`: adamw_torch |
|
|
- `optim_args`: None |
|
|
- `adafactor`: False |
|
|
- `group_by_length`: False |
|
|
- `length_column_name`: length |
|
|
- `ddp_find_unused_parameters`: True |
|
|
- `ddp_bucket_cap_mb`: None |
|
|
- `ddp_broadcast_buffers`: False |
|
|
- `dataloader_pin_memory`: True |
|
|
- `dataloader_persistent_workers`: False |
|
|
- `skip_memory_metrics`: True |
|
|
- `use_legacy_prediction_loop`: False |
|
|
- `push_to_hub`: False |
|
|
- `resume_from_checkpoint`: None |
|
|
- `hub_model_id`: None |
|
|
- `hub_strategy`: every_save |
|
|
- `hub_private_repo`: None |
|
|
- `hub_always_push`: False |
|
|
- `gradient_checkpointing`: False |
|
|
- `gradient_checkpointing_kwargs`: None |
|
|
- `include_inputs_for_metrics`: False |
|
|
- `include_for_metrics`: [] |
|
|
- `eval_do_concat_batches`: True |
|
|
- `fp16_backend`: auto |
|
|
- `push_to_hub_model_id`: None |
|
|
- `push_to_hub_organization`: None |
|
|
- `mp_parameters`: |
|
|
- `auto_find_batch_size`: False |
|
|
- `full_determinism`: False |
|
|
- `torchdynamo`: None |
|
|
- `ray_scope`: last |
|
|
- `ddp_timeout`: 1800 |
|
|
- `torch_compile`: False |
|
|
- `torch_compile_backend`: None |
|
|
- `torch_compile_mode`: None |
|
|
- `include_tokens_per_second`: False |
|
|
- `include_num_input_tokens_seen`: False |
|
|
- `neftune_noise_alpha`: None |
|
|
- `optim_target_modules`: None |
|
|
- `batch_eval_metrics`: False |
|
|
- `eval_on_start`: False |
|
|
- `use_liger_kernel`: False |
|
|
- `eval_use_gather_object`: False |
|
|
- `average_tokens_across_devices`: False |
|
|
- `prompts`: None |
|
|
- `batch_sampler`: no_duplicates |
|
|
- `multi_dataset_batch_sampler`: proportional |
|
|
|
|
|
</details> |
|
|
|
|
|
### Training Logs |
|
|
| Epoch | Step | Training Loss | full_en_cosine_ndcg@200 | full_es_cosine_ndcg@200 | full_de_cosine_ndcg@200 | full_zh_cosine_ndcg@200 | mix_es_cosine_ndcg@200 | mix_de_cosine_ndcg@200 | mix_zh_cosine_ndcg@200 | |
|
|
|:------:|:----:|:-------------:|:-----------------------:|:-----------------------:|:-----------------------:|:-----------------------:|:----------------------:|:----------------------:|:----------------------:| |
|
|
| -1 | -1 | - | 0.5348 | 0.4311 | 0.3678 | 0.5333 | 0.2580 | 0.1924 | 0.2871 | |
|
|
| 0.0030 | 1 | 0.0017 | - | - | - | - | - | - | - | |
|
|
| 0.2959 | 100 | 0.001 | - | - | - | - | - | - | - | |
|
|
| 0.5917 | 200 | 0.0005 | 0.6702 | 0.5287 | 0.4566 | 0.6809 | 0.5864 | 0.5302 | 0.4739 | |
|
|
| 0.8876 | 300 | 0.0004 | - | - | - | - | - | - | - | |
|
|
| 1.1834 | 400 | 0.0004 | 0.7057 | 0.5643 | 0.4790 | 0.7033 | 0.6604 | 0.6055 | 0.6003 | |
|
|
| 1.4793 | 500 | 0.0004 | - | - | - | - | - | - | - | |
|
|
| 1.7751 | 600 | 0.0003 | 0.7184 | 0.5783 | 0.4910 | 0.7127 | 0.6927 | 0.6416 | 0.6485 | |
|
|
| 2.0710 | 700 | 0.0003 | - | - | - | - | - | - | - | |
|
|
| 2.3669 | 800 | 0.0003 | 0.7307 | 0.5938 | 0.5023 | 0.7233 | 0.7125 | 0.6639 | 0.6847 | |
|
|
| 2.6627 | 900 | 0.0003 | - | - | - | - | - | - | - | |
|
|
| 2.9586 | 1000 | 0.0003 | 0.7371 | 0.6002 | 0.5085 | 0.7228 | 0.7222 | 0.6761 | 0.6998 | |
|
|
| 3.2544 | 1100 | 0.0003 | - | - | - | - | - | - | - | |
|
|
| 3.5503 | 1200 | 0.0003 | 0.7402 | 0.6059 | 0.5109 | 0.7279 | 0.7285 | 0.6841 | 0.7120 | |
|
|
| 3.8462 | 1300 | 0.0003 | - | - | - | - | - | - | - | |
|
|
| 4.1420 | 1400 | 0.0003 | 0.7449 | 0.6083 | 0.5154 | 0.7294 | 0.7333 | 0.6894 | 0.7176 | |
|
|
| 4.4379 | 1500 | 0.0003 | - | - | - | - | - | - | - | |
|
|
| 4.7337 | 1600 | 0.0003 | 0.7461 | 0.6095 | 0.5165 | 0.7303 | 0.7347 | 0.6915 | 0.7198 | |
|
|
|
|
|
|
|
|
### Framework Versions |
|
|
- Python: 3.11.11 |
|
|
- Sentence Transformers: 4.1.0 |
|
|
- Transformers: 4.51.3 |
|
|
- PyTorch: 2.6.0+cu124 |
|
|
- Accelerate: 1.6.0 |
|
|
- Datasets: 3.5.0 |
|
|
- Tokenizers: 0.21.1 |
|
|
|
|
|
## Citation |
|
|
|
|
|
### BibTeX |
|
|
|
|
|
#### Sentence Transformers |
|
|
```bibtex |
|
|
@inproceedings{reimers-2019-sentence-bert, |
|
|
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks", |
|
|
author = "Reimers, Nils and Gurevych, Iryna", |
|
|
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing", |
|
|
month = "11", |
|
|
year = "2019", |
|
|
publisher = "Association for Computational Linguistics", |
|
|
url = "https://arxiv.org/abs/1908.10084", |
|
|
} |
|
|
``` |
|
|
|
|
|
#### MSELoss |
|
|
```bibtex |
|
|
@inproceedings{reimers-2020-multilingual-sentence-bert, |
|
|
title = "Making Monolingual Sentence Embeddings Multilingual using Knowledge Distillation", |
|
|
author = "Reimers, Nils and Gurevych, Iryna", |
|
|
booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing", |
|
|
month = "11", |
|
|
year = "2020", |
|
|
publisher = "Association for Computational Linguistics", |
|
|
url = "https://arxiv.org/abs/2004.09813", |
|
|
} |
|
|
``` |
|
|
|
|
|
<!-- |
|
|
## Glossary |
|
|
|
|
|
*Clearly define terms in order to be accessible across audiences.* |
|
|
--> |
|
|
|
|
|
<!-- |
|
|
## Model Card Authors |
|
|
|
|
|
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.* |
|
|
--> |
|
|
|
|
|
<!-- |
|
|
## Model Card Contact |
|
|
|
|
|
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.* |
|
|
--> |