diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..6fbf3b054ce35e3e17f04aa41a0986e4973dd980
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,59 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+NVIDIA-Nemotron-3-Nano-30B-A3B-BF16-new-chat-template/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+NVIDIA-Nemotron-Nano-12B-v2/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+NVIDIA-Nemotron-Nano-9B-v2/acc-vs-budget.png filter=lfs diff=lfs merge=lfs -text
+NVIDIA-Nemotron-Nano-9B-v2/accuracy_chart.png filter=lfs diff=lfs merge=lfs -text
+NVIDIA-Nemotron-Nano-9B-v2/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+NVIDIA-Nemotron-Nano-9B-v2-BF16/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+NVIDIA-Nemotron-Nano-9B-v2-EA/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+NVIDIA-Nemotron-Nano-9B-v2-NGC/acc-vs-budget.png filter=lfs diff=lfs merge=lfs -text
+NVIDIA-Nemotron-Nano-9B-v2-NGC/accuracy_chart.png filter=lfs diff=lfs merge=lfs -text
+NVIDIA-Nemotron-Nano-9B-v2-NGC/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+NVIDIA-Nemotron-Nano-9B-v2-rc0807/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+NVIDIA-Nemotron-Nano-9B-v2-rc0807-safetensors/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+NVIDIA-Nemotron-Nano-9B-v2.bak/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+Nemotron-H-4B-Base-8K/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+Nemotron-H-4B-Instruct-128K/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+Nemotron-H-8B-Base-8K/nemo/context/nemo_tokenizer/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+Nemotron-H-8B-Base-8K/nemo/weights/.metadata filter=lfs diff=lfs merge=lfs -text
+Nemotron-H-8B-Base-8K/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+Nemotron-Nano-3-30B-A3.5B-dev-1016/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+Nvidia-Nemotron-Nano-v2-9B-0805/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+Qwen3-14B/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+Qwen3-8B/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+llama-3.1-nemoguard-8b-content-safety/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+nano-12b-v2-finalcheckpoint/tokenizer.json filter=lfs diff=lfs merge=lfs -text
diff --git a/EuroLLM-9B/.gitattributes b/EuroLLM-9B/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b
--- /dev/null
+++ b/EuroLLM-9B/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/EuroLLM-9B/README.md b/EuroLLM-9B/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..bd3eb95a83df0a6cea3a379f761d8b77ac74ddb2
--- /dev/null
+++ b/EuroLLM-9B/README.md
@@ -0,0 +1,126 @@
+---
+license: apache-2.0
+language:
+- en
+- de
+- es
+- fr
+- it
+- pt
+- pl
+- nl
+- tr
+- sv
+- cs
+- el
+- hu
+- ro
+- fi
+- uk
+- sl
+- sk
+- da
+- lt
+- lv
+- et
+- bg
+- 'no'
+- ca
+- hr
+- ga
+- mt
+- gl
+- zh
+- ru
+- ko
+- ja
+- ar
+- hi
+library_name: transformers
+---
+
+# Model Card for EuroLLM-9B
+
+
+This is the model card for EuroLLM-9B. You can also check the instruction tuned version: [EuroLLM-9B-Instruct](https://huggingface.co/utter-project/EuroLLM-9B-Instruct).
+
+- **Developed by:** Unbabel, Instituto Superior Técnico, Instituto de Telecomunicações, University of Edinburgh, Aveni, University of Paris-Saclay, University of Amsterdam, Naver Labs, Sorbonne Université.
+- **Funded by:** European Union.
+- **Model type:** A 9B parameter multilingual transfomer LLM.
+- **Language(s) (NLP):** Bulgarian, Croatian, Czech, Danish, Dutch, English, Estonian, Finnish, French, German, Greek, Hungarian, Irish, Italian, Latvian, Lithuanian, Maltese, Polish, Portuguese, Romanian, Slovak, Slovenian, Spanish, Swedish, Arabic, Catalan, Chinese, Galician, Hindi, Japanese, Korean, Norwegian, Russian, Turkish, and Ukrainian.
+- **License:** Apache License 2.0.
+
+## Model Details
+
+The EuroLLM project has the goal of creating a suite of LLMs capable of understanding and generating text in all European Union languages as well as some additional relevant languages.
+EuroLLM-9B is a 9B parameter model trained on 4 trillion tokens divided across the considered languages and several data sources: Web data, parallel data (en-xx and xx-en), and high-quality datasets.
+EuroLLM-9B-Instruct was further instruction tuned on EuroBlocks, an instruction tuning dataset with focus on general instruction-following and machine translation.
+
+
+### Model Description
+
+EuroLLM uses a standard, dense Transformer architecture:
+- We use grouped query attention (GQA) with 8 key-value heads, since it has been shown to increase speed at inference time while maintaining downstream performance.
+- We perform pre-layer normalization, since it improves the training stability, and use the RMSNorm, which is faster.
+- We use the SwiGLU activation function, since it has been shown to lead to good results on downstream tasks.
+- We use rotary positional embeddings (RoPE) in every layer, since these have been shown to lead to good performances while allowing the extension of the context length.
+
+For pre-training, we use 400 Nvidia H100 GPUs of the Marenostrum 5 supercomputer, training the model with a constant batch size of 2,800 sequences, which corresponds to approximately 12 million tokens, using the Adam optimizer, and BF16 precision.
+Here is a summary of the model hyper-parameters:
+| | |
+|--------------------------------------|----------------------|
+| Sequence Length | 4,096 |
+| Number of Layers | 42 |
+| Embedding Size | 4,096 |
+| FFN Hidden Size | 12,288 |
+| Number of Heads | 32 |
+| Number of KV Heads (GQA) | 8 |
+| Activation Function | SwiGLU |
+| Position Encodings | RoPE (\Theta=10,000) |
+| Layer Norm | RMSNorm |
+| Tied Embeddings | No |
+| Embedding Parameters | 0.524B |
+| LM Head Parameters | 0.524B |
+| Non-embedding Parameters | 8.105B |
+| Total Parameters | 9.154B |
+
+## Run the model
+
+ from transformers import AutoModelForCausalLM, AutoTokenizer
+
+ model_id = "utter-project/EuroLLM-9B"
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
+ model = AutoModelForCausalLM.from_pretrained(model_id)
+
+ text = "English: My name is EuroLLM. Portuguese:"
+
+ inputs = tokenizer(text, return_tensors="pt")
+ outputs = model.generate(**inputs, max_new_tokens=20)
+ print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+
+
+## Results
+
+### EU Languages
+
+
+
+**Table 1:** Comparison of open-weight LLMs on multilingual benchmarks. The borda count corresponds to the average ranking of the models (see ([Colombo et al., 2022](https://arxiv.org/abs/2202.03799))). For Arc-challenge, Hellaswag, and MMLU we are using Okapi datasets ([Lai et al., 2023](https://aclanthology.org/2023.emnlp-demo.28/)) which include 11 languages. For MMLU-Pro and MUSR we translate the English version with Tower ([Alves et al., 2024](https://arxiv.org/abs/2402.17733)) to 6 EU languages.
+\* As there are no public versions of the pre-trained models, we evaluated them using the post-trained versions.
+
+The results in Table 1 highlight EuroLLM-9B's superior performance on multilingual tasks compared to other European-developed models (as shown by the Borda count of 1.0), as well as its strong competitiveness with non-European models, achieving results comparable to Gemma-2-9B and outperforming the rest on most benchmarks.
+
+### English
+
+
+
+
+**Table 2:** Comparison of open-weight LLMs on English general benchmarks.
+\* As there are no public versions of the pre-trained models, we evaluated them using the post-trained versions.
+
+The results in Table 2 demonstrate EuroLLM's strong performance on English tasks, surpassing most European-developed models and matching the performance of Mistral-7B (obtaining the same Borda count).
+
+
+## Bias, Risks, and Limitations
+
+EuroLLM-9B has not been aligned to human preferences, so the model may generate problematic outputs (e.g., hallucinations, harmful content, or false statements).
diff --git a/EuroLLM-9B/config.json b/EuroLLM-9B/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..357ea58a5553f6f5089abfff0d0285cb913ee4cc
--- /dev/null
+++ b/EuroLLM-9B/config.json
@@ -0,0 +1,27 @@
+{
+ "architectures": [
+ "LlamaForCausalLM"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.0,
+ "bos_token_id": 1,
+ "eos_token_id": 2,
+ "hidden_act": "silu",
+ "hidden_size": 4096,
+ "initializer_range": 0.02,
+ "intermediate_size": 12288,
+ "max_position_embeddings": 4096,
+ "model_type": "llama",
+ "num_attention_heads": 32,
+ "num_hidden_layers": 42,
+ "num_key_value_heads": 8,
+ "pretraining_tp": 1,
+ "rms_norm_eps": 1e-05,
+ "rope_scaling": null,
+ "rope_theta": 10000.0,
+ "tie_word_embeddings": false,
+ "torch_dtype": "bfloat16",
+ "transformers_version": "4.40.1",
+ "use_cache": true,
+ "vocab_size": 128000
+}
diff --git a/EuroLLM-9B/generation_config.json b/EuroLLM-9B/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a0505d8393c7cf690949e7d46272ba7f6f1b491
--- /dev/null
+++ b/EuroLLM-9B/generation_config.json
@@ -0,0 +1,6 @@
+{
+ "_from_model_config": true,
+ "bos_token_id": 1,
+ "eos_token_id": 2,
+ "transformers_version": "4.40.1"
+}
diff --git a/EuroLLM-9B/special_tokens_map.json b/EuroLLM-9B/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..451134b2ddc2e78555d1e857518c54b4bdc2e87d
--- /dev/null
+++ b/EuroLLM-9B/special_tokens_map.json
@@ -0,0 +1,23 @@
+{
+ "bos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "unk_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/EuroLLM-9B/tokenizer.json b/EuroLLM-9B/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..7ee13b31906cdd2b96c8e6c5000326a3776019af
--- /dev/null
+++ b/EuroLLM-9B/tokenizer.json
@@ -0,0 +1,388805 @@
+{
+ "version": "1.0",
+ "truncation": null,
+ "padding": null,
+ "added_tokens": [
+ {
+ "id": 0,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 1,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 2,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ }
+ ],
+ "normalizer": {
+ "type": "Sequence",
+ "normalizers": [
+ {
+ "type": "Prepend",
+ "prepend": "▁"
+ },
+ {
+ "type": "Replace",
+ "pattern": {
+ "String": " "
+ },
+ "content": "▁"
+ }
+ ]
+ },
+ "pre_tokenizer": null,
+ "post_processor": {
+ "type": "TemplateProcessing",
+ "single": [
+ {
+ "SpecialToken": {
+ "id": "",
+ "type_id": 0
+ }
+ },
+ {
+ "Sequence": {
+ "id": "A",
+ "type_id": 0
+ }
+ }
+ ],
+ "pair": [
+ {
+ "SpecialToken": {
+ "id": "",
+ "type_id": 0
+ }
+ },
+ {
+ "Sequence": {
+ "id": "A",
+ "type_id": 0
+ }
+ },
+ {
+ "SpecialToken": {
+ "id": "",
+ "type_id": 1
+ }
+ },
+ {
+ "Sequence": {
+ "id": "B",
+ "type_id": 1
+ }
+ }
+ ],
+ "special_tokens": {
+ "": {
+ "id": "",
+ "ids": [
+ 1
+ ],
+ "tokens": [
+ ""
+ ]
+ }
+ }
+ },
+ "decoder": {
+ "type": "Sequence",
+ "decoders": [
+ {
+ "type": "Replace",
+ "pattern": {
+ "String": "▁"
+ },
+ "content": " "
+ },
+ {
+ "type": "ByteFallback"
+ },
+ {
+ "type": "Fuse"
+ },
+ {
+ "type": "Strip",
+ "content": " ",
+ "start": 1,
+ "stop": 0
+ }
+ ]
+ },
+ "model": {
+ "type": "BPE",
+ "dropout": null,
+ "unk_token": "",
+ "continuing_subword_prefix": null,
+ "end_of_word_suffix": null,
+ "fuse_unk": true,
+ "byte_fallback": true,
+ "ignore_merges": false,
+ "vocab": {
+ "": 0,
+ "": 1,
+ "": 2,
+ "<|im_start|>": 3,
+ "<|im_end|>": 4,
+ "": 5,
+ "": 6,
+ "": 7,
+ "": 8,
+ "": 9,
+ "": 10,
+ "": 11,
+ "": 12,
+ "": 13,
+ "": 14,
+ "": 15,
+ "": 16,
+ "": 17,
+ "": 18,
+ "": 19,
+ "": 20,
+ "": 21,
+ "": 22,
+ "": 23,
+ "": 24,
+ "": 25,
+ "": 26,
+ "": 27,
+ "": 28,
+ "": 29,
+ "": 30,
+ "": 31,
+ "": 32,
+ "": 33,
+ "": 34,
+ "": 35,
+ "": 36,
+ "": 37,
+ "": 38,
+ "": 39,
+ "": 40,
+ "": 41,
+ "": 42,
+ "": 43,
+ "": 44,
+ "": 45,
+ "": 46,
+ "": 47,
+ "": 48,
+ "": 49,
+ "": 50,
+ "": 51,
+ "": 52,
+ "": 53,
+ "": 54,
+ "": 55,
+ "": 56,
+ "": 57,
+ "": 58,
+ "": 59,
+ "": 60,
+ "": 61,
+ "": 62,
+ "": 63,
+ "": 64,
+ "": 65,
+ "": 66,
+ "": 67,
+ "": 68,
+ "": 69,
+ "": 70,
+ "": 71,
+ "": 72,
+ "": 73,
+ "": 74,
+ "": 75,
+ "": 76,
+ "": 77,
+ "": 78,
+ "": 79,
+ "": 80,
+ "": 81,
+ "": 82,
+ "": 83,
+ "": 84,
+ "": 85,
+ "": 86,
+ "": 87,
+ "": 88,
+ "": 89,
+ "": 90,
+ "": 91,
+ "": 92,
+ "": 93,
+ "": 94,
+ "": 95,
+ "": 96,
+ "": 97,
+ "": 98,
+ "": 99,
+ "": 100,
+ "": 101,
+ "": 102,
+ "": 103,
+ "": 104,
+ "": 105,
+ "": 106,
+ "": 107,
+ "": 108,
+ "": 109,
+ "": 110,
+ "": 111,
+ "": 112,
+ "": 113,
+ "": 114,
+ "": 115,
+ "": 116,
+ "": 117,
+ "": 118,
+ "": 119,
+ "": 120,
+ "": 121,
+ "": 122,
+ "": 123,
+ "": 124,
+ "": 125,
+ "": 126,
+ "": 127,
+ "": 128,
+ "": 129,
+ "": 130,
+ "": 131,
+ "": 132,
+ "": 133,
+ "": 134,
+ "": 135,
+ "": 136,
+ "": 137,
+ "": 138,
+ "": 139,
+ "": 140,
+ "": 141,
+ "": 142,
+ "": 143,
+ "": 144,
+ "": 145,
+ "": 146,
+ "": 147,
+ "": 148,
+ "": 149,
+ "": 150,
+ "": 151,
+ "": 152,
+ "": 153,
+ "": 154,
+ "": 155,
+ "": 156,
+ "": 157,
+ "": 158,
+ "": 159,
+ "": 160,
+ "": 161,
+ "": 162,
+ "": 163,
+ "": 164,
+ "": 165,
+ "": 166,
+ "": 167,
+ "": 168,
+ "": 169,
+ "": 170,
+ "": 171,
+ "": 172,
+ "": 173,
+ "": 174,
+ "": 175,
+ "": 176,
+ "": 177,
+ "": 178,
+ "": 179,
+ "": 180,
+ "": 181,
+ "": 182,
+ "": 183,
+ "": 184,
+ "": 185,
+ "": 186,
+ "": 187,
+ "": 188,
+ "": 189,
+ "": 190,
+ "": 191,
+ "": 192,
+ "": 193,
+ "": 194,
+ "": 195,
+ "": 196,
+ "": 197,
+ "": 198,
+ "": 199,
+ "": 200,
+ "": 201,
+ "": 202,
+ "": 203,
+ "": 204,
+ "": 205,
+ "": 206,
+ "": 207,
+ "": 208,
+ "": 209,
+ "": 210,
+ "": 211,
+ "": 212,
+ "": 213,
+ "": 214,
+ "": 215,
+ "": 216,
+ "": 217,
+ "": 218,
+ "": 219,
+ "": 220,
+ "": 221,
+ "": 222,
+ "": 223,
+ "": 224,
+ "": 225,
+ "": 226,
+ "": 227,
+ "": 228,
+ "": 229,
+ "": 230,
+ "": 231,
+ "": 232,
+ "": 233,
+ "": 234,
+ "": 235,
+ "": 236,
+ "": 237,
+ "": 238,
+ "": 239,
+ "": 240,
+ "": 241,
+ "": 242,
+ "": 243,
+ "": 244,
+ "": 245,
+ "": 246,
+ "": 247,
+ "": 248,
+ "