mlx-community
/

embeddinggemma-300m-4bit

Sentence Similarity

sentence-transformers

feature-extraction

Model card Files Files and versions

prince-canuma commited on Sep 4

Commit

5d9ef07

·

verified ·

1 Parent(s): ee30c32

Update README.md

Files changed (1) hide show

README.md +32 -2

README.md CHANGED Viewed

@@ -30,8 +30,20 @@ import mlx.core as mx
 model, tokenizer = load("mlx-community/embeddinggemma-300m-4bit")
-# For text embeddings
-output = generate(model, processor, texts=["I like grapes", "I like fruits"])
 embeddings = output.text_embeds  # Normalized embeddings
 # Compute dot product between normalized embeddings
@@ -41,4 +53,22 @@ print("Similarity matrix between texts:")
 print(similarity_matrix)
 ```

 model, tokenizer = load("mlx-community/embeddinggemma-300m-4bit")
+# For text embedding
+sentences = [
+    "task: sentence similarity | query: Nothing really matters.",
+    "task: sentence similarity | query: The dog is barking.",
+    "task: sentence similarity | query: The dog is barking.",
+]
+encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='mlx')
+# Compute token embeddings
+input_ids = encoded_input['input_ids']
+attention_mask = encoded_input['attention_mask']
+output = model(input_ids, attention_mask)
 embeddings = output.text_embeds  # Normalized embeddings
 # Compute dot product between normalized embeddings
 print(similarity_matrix)
+# You can use these task-specific prefixes for different tasks
+task_prefixes = {
+    "BitextMining": "task: search result | query: ",
+    "Clustering": "task: clustering | query: ",
+    "Classification": "task: classification | query: ",
+    "MultilabelClassification": "task: classification | query: ",
+    "PairClassification": "task: sentence similarity | query: ",
+    "InstructionRetrieval": "task: code retrieval | query: ",
+    "Reranking": "task: search result | query: ",
+    "Retrieval": "task: search result | query: ",
+    "Retrieval-query": "task: search result | query: ",
+    "Retrieval-document": "title: none | text: ",
+    "STS": "task: sentence similarity | query: ",
+    "Summarization": "task: summarization | query: ",
+    "document": "title: none | text: "
+}
 ```