Norod78 commited on
Commit
6041787
verified
1 Parent(s): 07522a8

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +54 -3
README.md CHANGED
@@ -1,3 +1,54 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ datasets:
4
+ - Norod78/hebrew_lyrics_prompting
5
+ - Norod78/hebrew_lyrics_prompting_finetune
6
+ language:
7
+ - he
8
+ base_model:
9
+ - google/gemma-2-2b-it
10
+ ---
11
+
12
+ # 诪讞讜诇诇 砖讬专讬诐 诪讟讜驻砖讬诐 :)
13
+
14
+ ```python
15
+ from transformers import AutoTokenizer, AutoModelForCausalLM
16
+ from transformers import TextStreamer
17
+ import torch
18
+
19
+
20
+ model_id = "./hebrew_lyrics-gemma2_2b"
21
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
22
+ model = AutoModelForCausalLM.from_pretrained(
23
+ model_id,
24
+ device_map="auto",
25
+ torch_dtype=torch.bfloat16,
26
+ )
27
+
28
+ print(f"model.device = {model.device}")
29
+ input_text = "讻转讜讘 诇讬 砖讬专 注诇 转驻讜讞 讗讚诪讛 注诐 讞专讚讛 讞讘专转讬转"
30
+
31
+ input_template = tokenizer.apply_chat_template([{"role": "user", "content": input_text}], tokenize=False, add_generation_prompt=True)
32
+ input_ids = tokenizer(input_template, return_tensors="pt").to(model.device)
33
+ outputs = model.generate(**input_ids, max_new_tokens=256, repetition_penalty=1.05, temperature=0.5, no_repeat_ngram_size = 4, do_sample = True)
34
+ decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
35
+ result = decoded_output.replace("user\n", "诪砖转诪砖:\n").replace("model\n", "\n诪讜讚诇:\n")
36
+ print("result = ", result)
37
+
38
+
39
+ chat = [
40
+ {"role": "user", "content": input_text},
41
+ {"role": "asistant"}
42
+ ]
43
+ chat_with_template = tokenizer.apply_chat_template(chat, tokenize=False)
44
+ inputs = tokenizer(
45
+ [
46
+ chat_with_template
47
+ ], return_tensors = "pt").to(model.device)
48
+
49
+
50
+ text_streamer = TextStreamer(tokenizer)
51
+ _ = model.generate(**inputs, streamer = text_streamer, max_new_tokens=256 , repetition_penalty=1.1, temperature=0.6, top_p=0.4, top_k=40, do_sample = True)
52
+
53
+
54
+ ```