Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- tests/general/test_dataset.py +90 -0
- tests/general/test_model.py +30 -0
- tests/general/test_stream.py +20 -0
- tests/general/test_template.py +74 -0
- tests/hub/__init__.py +0 -0
- tests/hub/test_check_model.py +24 -0
- tests/infer/test_agent.py +30 -0
- tests/infer/test_infer.py +73 -0
- tests/infer/test_logprobs.py +71 -0
- tests/infer/test_main.py +73 -0
- tests/infer/test_max_memory.py +10 -0
- tests/infer/test_mllm.py +79 -0
- tests/llm/__init__.py +0 -0
- tests/llm/config/infer.json +5 -0
- tests/llm/config/sft.json +7 -0
- tests/llm/data/alpaca.csv +4 -0
- tests/llm/data/alpaca.jsonl +3 -0
- tests/llm/data/alpaca2.csv +4 -0
- tests/llm/data/chatml.jsonl +3 -0
- tests/llm/data/conversations.jsonl +3 -0
- tests/llm/data/multi_modal_1.jsonl +3 -0
- tests/llm/data/multi_modal_2.jsonl +3 -0
- tests/llm/data/multi_modal_3.jsonl +3 -0
- tests/llm/data/sharegpt.jsonl +3 -0
- tests/llm/data/swift_multi.json +3 -0
- tests/llm/data/swift_multi.jsonl +3 -0
- tests/llm/data/swift_pre.csv +4 -0
- tests/llm/data/swift_pre.jsonl +3 -0
- tests/llm/data/swift_single.csv +4 -0
- tests/llm/data/swift_single.jsonl +3 -0
- tests/llm/load_model.py +45 -0
- tests/llm/load_template.py +138 -0
- tests/llm/test_custom.py +74 -0
- tests/llm/test_dataset.py +19 -0
- tests/llm/test_ollama_export.py +80 -0
- tests/llm/test_run.py +458 -0
- tests/llm/test_run3.py +172 -0
- tests/llm/test_template.py +104 -0
- tests/llm/test_utils.py +28 -0
- tests/megatron/test_align/test_llm.py +94 -0
- tests/megatron/test_export.py +64 -0
- tests/megatron/test_model.py +65 -0
- tests/megatron/test_save.py +61 -0
- tests/megatron/test_train.py +37 -0
- tests/models/test_flash_attn.py +8 -0
- tests/models/test_llm.py +16 -0
- tests/models/test_mllm.py +16 -0
- tests/sample/test_client.py +35 -0
- tests/test_align/test_cls.py +60 -0
- tests/test_align/test_lmdeploy_vlm.py +80 -0
tests/general/test_dataset.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List
|
| 2 |
+
|
| 3 |
+
from swift.llm import load_dataset
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def _test_dataset(datasets: List[str], num_proc: int = 1, strict: bool = False, **kwargs):
|
| 7 |
+
dataset = load_dataset(datasets, num_proc=num_proc, strict=strict, **kwargs)
|
| 8 |
+
print(f'dataset[0]: {dataset[0]}')
|
| 9 |
+
print(f'dataset[1]: {dataset[1]}')
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def test_sft():
|
| 13 |
+
# swift/SlimOrca swift/cosmopedia-100k
|
| 14 |
+
# _test_dataset(['lvjianjin/AdvertiseGen'])
|
| 15 |
+
# _test_dataset(['AI-ModelScope/Duet-v0.5'])
|
| 16 |
+
# _test_dataset(['swift/SlimOrca', 'swift/cosmopedia-100k'])
|
| 17 |
+
# _test_dataset(['OmniData/Zhihu-KOL-More-Than-100-Upvotes'])
|
| 18 |
+
# _test_dataset(['OmniData/Zhihu-KOL'])
|
| 19 |
+
_test_dataset([
|
| 20 |
+
'AI-ModelScope/alpaca-gpt4-data-zh#1000', 'AI-ModelScope/alpaca-gpt4-data-en#1000',
|
| 21 |
+
'AI-ModelScope/LongAlpaca-12k#1000'
|
| 22 |
+
])
|
| 23 |
+
# _test_dataset(['swift/Infinity-Instruct:all'])
|
| 24 |
+
# _test_dataset(['swift/sharegpt:all'])
|
| 25 |
+
# _test_dataset(['AI-ModelScope/sharegpt_gpt4:all'])
|
| 26 |
+
# _test_dataset(['iic/ms_bench'])
|
| 27 |
+
# _test_dataset(['swift/tagengo-gpt4'])
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def test_mllm():
|
| 31 |
+
# _test_dataset(['AI-ModelScope/ShareGPT4V:all'])
|
| 32 |
+
# _test_dataset(['AI-ModelScope/LLaVA-Pretrain'])
|
| 33 |
+
# _test_dataset(['swift/TextCaps'])
|
| 34 |
+
# _test_dataset(['swift/RLAIF-V-Dataset:all'])
|
| 35 |
+
# _test_dataset(['swift/OK-VQA_train'])
|
| 36 |
+
# _test_dataset(['swift/OCR-VQA'])
|
| 37 |
+
# _test_dataset(['swift/A-OKVQA'])
|
| 38 |
+
# _test_dataset(['AI-ModelScope/MovieChat-1K-test'])
|
| 39 |
+
_test_dataset([
|
| 40 |
+
'AI-ModelScope/LaTeX_OCR:all', 'modelscope/coco_2014_caption:validation',
|
| 41 |
+
'speech_asr/speech_asr_aishell1_trainsets:validation'
|
| 42 |
+
],
|
| 43 |
+
strict=False)
|
| 44 |
+
# _test_dataset(['swift/VideoChatGPT:all'])
|
| 45 |
+
# _test_dataset(['speech_asr/speech_asr_aishell1_trainsets:validation'])
|
| 46 |
+
# _test_dataset(['AI-ModelScope/captcha-images'])
|
| 47 |
+
# _test_dataset(['swift/gpt4v-dataset:all'])
|
| 48 |
+
# _test_dataset(['modelscope/coco_2014_caption:validation'])
|
| 49 |
+
# _test_dataset(['AI-ModelScope/LLaVA-Instruct-150K'], num_proc=16)
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def test_agent():
|
| 53 |
+
_test_dataset(['swift/ToolBench'])
|
| 54 |
+
# _test_dataset(['AI-ModelScope/ms_agent_for_agentfabric:all'])
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def test_dpo():
|
| 58 |
+
_test_dataset(['AI-ModelScope/orpo-dpo-mix-40k'])
|
| 59 |
+
_test_dataset(['AI-ModelScope/hh-rlhf:all'])
|
| 60 |
+
_test_dataset(['AI-ModelScope/hh_rlhf_cn:all'])
|
| 61 |
+
_test_dataset(['hjh0119/shareAI-Llama3-DPO-zh-en-emoji:all'])
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def test_kto():
|
| 65 |
+
_test_dataset(['AI-ModelScope/ultrafeedback-binarized-preferences-cleaned-kto'])
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def test_pretrain():
|
| 69 |
+
_test_dataset(['AI-ModelScope/ruozhiba:all'])
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def test_dataset_info():
|
| 73 |
+
_test_dataset(['swift/self-cognition#500'], model_name='xiao huang', model_author='swift')
|
| 74 |
+
# _test_dataset(['codefuse-ai/CodeExercise-Python-27k'])
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def test_cls():
|
| 78 |
+
_test_dataset(['simpleai/HC3-Chinese:baike'])
|
| 79 |
+
_test_dataset(['simpleai/HC3-Chinese:baike_cls'])
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
if __name__ == '__main__':
|
| 83 |
+
# test_sft()
|
| 84 |
+
# test_agent()
|
| 85 |
+
# test_dpo()
|
| 86 |
+
# test_kto()
|
| 87 |
+
test_mllm()
|
| 88 |
+
# test_pretrain()
|
| 89 |
+
# test_dataset_info()
|
| 90 |
+
# test_cls()
|
tests/general/test_model.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
import torch
|
| 4 |
+
|
| 5 |
+
from swift.utils import get_device
|
| 6 |
+
|
| 7 |
+
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def test_qwen2():
|
| 11 |
+
import os
|
| 12 |
+
from swift.llm import get_model_tokenizer
|
| 13 |
+
model, tokenizer = get_model_tokenizer('Qwen/Qwen2-7B-Instruct', load_model=False)
|
| 14 |
+
print(f'model: {model}, tokenizer: {tokenizer}')
|
| 15 |
+
# test hf
|
| 16 |
+
model, tokenizer = get_model_tokenizer('Qwen/Qwen2-7B-Instruct', load_model=False, use_hf=True)
|
| 17 |
+
|
| 18 |
+
model, tokenizer = get_model_tokenizer(
|
| 19 |
+
'Qwen/Qwen2-7B-Instruct', torch.float32, device_map=get_device(), attn_impl='flash_attn')
|
| 20 |
+
print(f'model: {model}, tokenizer: {tokenizer}')
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def test_modelscope_hub():
|
| 24 |
+
from swift.llm import get_model_tokenizer
|
| 25 |
+
model, tokenizer = get_model_tokenizer('Qwen/Qwen2___5-Math-1___5B-Instruct/', load_model=False)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
if __name__ == '__main__':
|
| 29 |
+
test_qwen2()
|
| 30 |
+
# test_modelscope_hub()
|
tests/general/test_stream.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from swift.llm import load_dataset
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def test_local_dataset():
|
| 5 |
+
# please use git clone
|
| 6 |
+
from swift.llm import git_clone_github
|
| 7 |
+
model_dir = git_clone_github('https://www.modelscope.cn/datasets/swift/swift-sft-mixture.git')
|
| 8 |
+
dataset = load_dataset(datasets=[f'{model_dir}:firefly'], streaming=True)[0]
|
| 9 |
+
print(next(iter(dataset)))
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def test_hub_dataset():
|
| 13 |
+
local_dataset = 'swift/swift-sft-mixture:firefly'
|
| 14 |
+
dataset = load_dataset(datasets=[local_dataset], streaming=True)[0]
|
| 15 |
+
print(next(iter(dataset)))
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
if __name__ == '__main__':
|
| 19 |
+
test_local_dataset()
|
| 20 |
+
# test_hub_dataset()
|
tests/general/test_template.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datasets import Dataset
|
| 2 |
+
|
| 3 |
+
from swift.llm import EncodePreprocessor, TemplateInputs, get_model_tokenizer, get_template, load_dataset
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def test_template():
|
| 7 |
+
_, tokenizer = get_model_tokenizer('Qwen/Qwen2-7B-Instruct', load_model=False)
|
| 8 |
+
template = get_template(tokenizer.model_meta.template, tokenizer)
|
| 9 |
+
template_inputs = TemplateInputs([{
|
| 10 |
+
'role': 'system',
|
| 11 |
+
'content': 'AAA'
|
| 12 |
+
}, {
|
| 13 |
+
'role': 'user',
|
| 14 |
+
'content': 'BBB'
|
| 15 |
+
}, {
|
| 16 |
+
'role': 'assistant',
|
| 17 |
+
'content': 'CCC'
|
| 18 |
+
}, {
|
| 19 |
+
'role': 'user',
|
| 20 |
+
'content': 'DDD'
|
| 21 |
+
}])
|
| 22 |
+
inputs = template.encode(template_inputs)
|
| 23 |
+
print(f'inputs.keys(): {inputs.keys()}')
|
| 24 |
+
print(tokenizer.decode(inputs['input_ids']))
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def test_mllm():
|
| 28 |
+
_, tokenizer = get_model_tokenizer('Qwen/Qwen2-VL-7B-Instruct', load_model=False)
|
| 29 |
+
template = get_template(tokenizer.model_meta.template, tokenizer)
|
| 30 |
+
template_inputs = TemplateInputs([{
|
| 31 |
+
'role': 'system',
|
| 32 |
+
'content': 'AAA'
|
| 33 |
+
}, {
|
| 34 |
+
'role': 'user',
|
| 35 |
+
'content': '<image>BBB'
|
| 36 |
+
}, {
|
| 37 |
+
'role': 'assistant',
|
| 38 |
+
'content': 'CCC'
|
| 39 |
+
}, {
|
| 40 |
+
'role': 'user',
|
| 41 |
+
'content': 'DDD'
|
| 42 |
+
}],
|
| 43 |
+
images=['http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/cat.png'])
|
| 44 |
+
inputs = template.encode(template_inputs)
|
| 45 |
+
print(f'inputs.keys(): {inputs.keys()}')
|
| 46 |
+
print(template.safe_decode(inputs['input_ids']))
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def _test_dataset_map(model_id: str, dataset_id: str):
|
| 50 |
+
_, tokenizer = get_model_tokenizer(model_id, load_model=False)
|
| 51 |
+
template = get_template(tokenizer.model_meta.template, tokenizer)
|
| 52 |
+
dataset = load_dataset([dataset_id], num_proc=2)[0]
|
| 53 |
+
|
| 54 |
+
# 1: 1500
|
| 55 |
+
# 16: 10766.36 examples/s
|
| 56 |
+
new_dataset = EncodePreprocessor(template)(dataset, num_proc=4)
|
| 57 |
+
print(f'new_dataset: {new_dataset}')
|
| 58 |
+
print(template.safe_decode(new_dataset[0]['input_ids']))
|
| 59 |
+
print(template.safe_decode(new_dataset[1]['input_ids']))
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def test_llm_dataset_map():
|
| 63 |
+
_test_dataset_map('Qwen/Qwen2-7B-Instruct', 'AI-ModelScope/alpaca-gpt4-data-zh')
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def test_mllm_dataset_map():
|
| 67 |
+
_test_dataset_map('Qwen/Qwen2-VL-7B-Instruct', 'modelscope/coco_2014_caption:validation#100')
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
if __name__ == '__main__':
|
| 71 |
+
# test_template()
|
| 72 |
+
# test_mllm()
|
| 73 |
+
# test_llm_dataset_map()
|
| 74 |
+
test_mllm_dataset_map()
|
tests/hub/__init__.py
ADDED
|
File without changes
|
tests/hub/test_check_model.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import shutil
|
| 3 |
+
import tempfile
|
| 4 |
+
import unittest
|
| 5 |
+
|
| 6 |
+
from modelscope import Model, check_local_model_is_latest
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class TestCheckModel(unittest.TestCase):
|
| 10 |
+
|
| 11 |
+
def setUp(self):
|
| 12 |
+
print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
|
| 13 |
+
self.tmp_dir = tempfile.TemporaryDirectory().name
|
| 14 |
+
if not os.path.exists(self.tmp_dir):
|
| 15 |
+
os.makedirs(self.tmp_dir)
|
| 16 |
+
|
| 17 |
+
def tearDown(self):
|
| 18 |
+
import peft
|
| 19 |
+
shutil.rmtree(self.tmp_dir)
|
| 20 |
+
super().tearDown()
|
| 21 |
+
|
| 22 |
+
def test_check_model(self):
|
| 23 |
+
model = Model.from_pretrained('damo/nlp_corom_sentence-embedding_chinese-base', revision='v1.0.0')
|
| 24 |
+
self.assertFalse(check_local_model_is_latest(model.model_dir))
|
tests/infer/test_agent.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
import torch
|
| 4 |
+
|
| 5 |
+
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
|
| 6 |
+
|
| 7 |
+
kwargs = {
|
| 8 |
+
'per_device_train_batch_size': 2,
|
| 9 |
+
'save_steps': 50,
|
| 10 |
+
'gradient_accumulation_steps': 4,
|
| 11 |
+
'num_train_epochs': 1,
|
| 12 |
+
}
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def test_sft():
|
| 16 |
+
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
|
| 17 |
+
from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
|
| 18 |
+
sft_main(
|
| 19 |
+
TrainArguments(model='Qwen/Qwen2-7B-Instruct', dataset=['iic/ms_agent#2000'], loss_scale='react', **kwargs))
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def test_infer():
|
| 23 |
+
from swift.llm import infer_main, InferArguments
|
| 24 |
+
ckpt_dir = 'output/Qwen2-7B-Instruct/v229-20241126-133152/checkpoint-100'
|
| 25 |
+
infer_main(InferArguments(ckpt_dir=ckpt_dir))
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
if __name__ == '__main__':
|
| 29 |
+
test_sft()
|
| 30 |
+
# test_infer()
|
tests/infer/test_infer.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from typing import Literal
|
| 3 |
+
|
| 4 |
+
import torch
|
| 5 |
+
|
| 6 |
+
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def _prepare(infer_backend: Literal['vllm', 'pt', 'lmdeploy']):
|
| 10 |
+
from swift.llm import InferRequest, get_template
|
| 11 |
+
if infer_backend == 'lmdeploy':
|
| 12 |
+
from swift.llm import LmdeployEngine
|
| 13 |
+
engine = LmdeployEngine('OpenGVLab/InternVL2_5-2B', torch.float32)
|
| 14 |
+
elif infer_backend == 'pt':
|
| 15 |
+
from swift.llm import PtEngine
|
| 16 |
+
engine = PtEngine('Qwen/Qwen2-7B-Instruct', max_batch_size=16)
|
| 17 |
+
elif infer_backend == 'vllm':
|
| 18 |
+
from swift.llm import VllmEngine
|
| 19 |
+
engine = VllmEngine('Qwen/Qwen2-7B-Instruct')
|
| 20 |
+
template = get_template(engine.model_meta.template, engine.tokenizer)
|
| 21 |
+
infer_requests = [
|
| 22 |
+
# InferRequest([{'role': 'user', 'content': '晚上睡不着觉怎么办'}]) for i in range(100)
|
| 23 |
+
InferRequest([{
|
| 24 |
+
'role': 'user',
|
| 25 |
+
'content': 'hello! who are you'
|
| 26 |
+
}]) for i in range(100)
|
| 27 |
+
]
|
| 28 |
+
return engine, template, infer_requests
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def test_infer(infer_backend):
|
| 32 |
+
from swift.llm import RequestConfig
|
| 33 |
+
from swift.plugin import InferStats
|
| 34 |
+
engine, template, infer_requests = _prepare(infer_backend=infer_backend)
|
| 35 |
+
request_config = RequestConfig(temperature=0)
|
| 36 |
+
infer_stats = InferStats()
|
| 37 |
+
|
| 38 |
+
response_list = engine.infer(
|
| 39 |
+
infer_requests, template=template, request_config=request_config, metrics=[infer_stats])
|
| 40 |
+
|
| 41 |
+
for response in response_list[:2]:
|
| 42 |
+
print(response.choices[0].message.content)
|
| 43 |
+
print(infer_stats.compute())
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def test_stream(infer_backend):
|
| 47 |
+
from swift.llm import RequestConfig
|
| 48 |
+
from swift.plugin import InferStats
|
| 49 |
+
engine, template, infer_requests = _prepare(infer_backend=infer_backend)
|
| 50 |
+
infer_stats = InferStats()
|
| 51 |
+
request_config = RequestConfig(temperature=0, stream=True, logprobs=True)
|
| 52 |
+
|
| 53 |
+
gen_list = engine.infer(infer_requests, template=template, request_config=request_config, metrics=[infer_stats])
|
| 54 |
+
|
| 55 |
+
for response in gen_list[0]:
|
| 56 |
+
if response is None:
|
| 57 |
+
continue
|
| 58 |
+
print(response.choices[0].delta.content, end='', flush=True)
|
| 59 |
+
print()
|
| 60 |
+
print(infer_stats.compute())
|
| 61 |
+
|
| 62 |
+
gen_list = engine.infer(
|
| 63 |
+
infer_requests, template=template, request_config=request_config, use_tqdm=True, metrics=[infer_stats])
|
| 64 |
+
|
| 65 |
+
for response in gen_list[0]:
|
| 66 |
+
pass
|
| 67 |
+
|
| 68 |
+
print(infer_stats.compute())
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
if __name__ == '__main__':
|
| 72 |
+
test_infer('pt')
|
| 73 |
+
# test_stream('pt')
|
tests/infer/test_logprobs.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from typing import Literal
|
| 3 |
+
|
| 4 |
+
import torch
|
| 5 |
+
|
| 6 |
+
if __name__ == '__main__':
|
| 7 |
+
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def _prepare(infer_backend: Literal['vllm', 'pt', 'lmdeploy']):
|
| 11 |
+
from swift.llm import InferRequest, get_template
|
| 12 |
+
|
| 13 |
+
if infer_backend == 'lmdeploy':
|
| 14 |
+
from swift.llm import LmdeployEngine
|
| 15 |
+
engine = LmdeployEngine('Qwen/Qwen2-7B-Instruct', torch.float32)
|
| 16 |
+
elif infer_backend == 'pt':
|
| 17 |
+
from swift.llm import PtEngine
|
| 18 |
+
engine = PtEngine('Qwen/Qwen2-7B-Instruct')
|
| 19 |
+
elif infer_backend == 'vllm':
|
| 20 |
+
from swift.llm import VllmEngine
|
| 21 |
+
engine = VllmEngine('Qwen/Qwen2-7B-Instruct')
|
| 22 |
+
template = get_template(engine.model_meta.template, engine.tokenizer)
|
| 23 |
+
infer_requests = [
|
| 24 |
+
InferRequest([{
|
| 25 |
+
'role': 'user',
|
| 26 |
+
'content': '晚上睡不着觉怎么办'
|
| 27 |
+
}]),
|
| 28 |
+
InferRequest([{
|
| 29 |
+
'role': 'user',
|
| 30 |
+
'content': 'hello! who are you'
|
| 31 |
+
}])
|
| 32 |
+
]
|
| 33 |
+
return engine, template, infer_requests
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def test_infer(engine, template, infer_requests):
|
| 37 |
+
from swift.llm import RequestConfig
|
| 38 |
+
from swift.plugin import InferStats
|
| 39 |
+
|
| 40 |
+
request_config = RequestConfig(temperature=0, logprobs=True, top_logprobs=2)
|
| 41 |
+
infer_stats = InferStats()
|
| 42 |
+
|
| 43 |
+
response_list = engine.infer(
|
| 44 |
+
infer_requests, template=template, request_config=request_config, metrics=[infer_stats])
|
| 45 |
+
|
| 46 |
+
for response in response_list[:2]:
|
| 47 |
+
print(response.choices[0].message.content)
|
| 48 |
+
print(infer_stats.compute())
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def test_stream(engine, template, infer_requests):
|
| 52 |
+
from swift.llm import RequestConfig
|
| 53 |
+
from swift.plugin import InferStats
|
| 54 |
+
|
| 55 |
+
infer_stats = InferStats()
|
| 56 |
+
request_config = RequestConfig(temperature=0, stream=True, logprobs=True, top_logprobs=2)
|
| 57 |
+
|
| 58 |
+
gen_list = engine.infer(infer_requests, template=template, request_config=request_config, metrics=[infer_stats])
|
| 59 |
+
|
| 60 |
+
for response in gen_list[0]:
|
| 61 |
+
if response is None:
|
| 62 |
+
continue
|
| 63 |
+
print(response.choices[0].delta.content, end='', flush=True)
|
| 64 |
+
|
| 65 |
+
print(infer_stats.compute())
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
if __name__ == '__main__':
|
| 69 |
+
engine, template, infer_requests = _prepare(infer_backend='pt')
|
| 70 |
+
test_infer(engine, template, infer_requests)
|
| 71 |
+
test_stream(engine, template, infer_requests)
|
tests/infer/test_main.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def test_cli(infer_backend):
|
| 7 |
+
from swift.llm import infer_main, InferArguments
|
| 8 |
+
args = InferArguments(model='Qwen/Qwen2-VL-7B-Instruct', infer_backend=infer_backend)
|
| 9 |
+
infer_main(args)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def test_cli_jinja(infer_backend):
|
| 13 |
+
from swift.llm import infer_main, InferArguments
|
| 14 |
+
args = InferArguments(model='Qwen/Qwen2-VL-7B-Instruct', infer_backend=infer_backend, template_backend='jinja')
|
| 15 |
+
infer_main(args)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def test_dataset(infer_backend):
|
| 19 |
+
from swift.llm import infer_main, InferArguments
|
| 20 |
+
args = InferArguments(
|
| 21 |
+
model='Qwen/Qwen2-7B-Instruct',
|
| 22 |
+
infer_backend=infer_backend,
|
| 23 |
+
val_dataset=['AI-ModelScope/alpaca-gpt4-data-zh#10'],
|
| 24 |
+
stream=True)
|
| 25 |
+
infer_main(args)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def test_mllm_dataset(infer_backend):
|
| 29 |
+
from swift.llm import infer_main, InferArguments
|
| 30 |
+
args = InferArguments(
|
| 31 |
+
model='Qwen/Qwen2-VL-7B-Instruct',
|
| 32 |
+
infer_backend=infer_backend,
|
| 33 |
+
val_dataset=['modelscope/coco_2014_caption:validation#1000'],
|
| 34 |
+
stream=True)
|
| 35 |
+
infer_main(args)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def test_dataset_ddp():
|
| 39 |
+
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3'
|
| 40 |
+
from swift.llm import infer_main, InferArguments
|
| 41 |
+
args = InferArguments(
|
| 42 |
+
model='Qwen/Qwen2-7B-Instruct', max_batch_size=64, val_dataset=['AI-ModelScope/alpaca-gpt4-data-zh#1000'])
|
| 43 |
+
infer_main(args)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def test_dataset_mp_ddp():
|
| 47 |
+
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3'
|
| 48 |
+
from swift.llm import infer_main, InferArguments
|
| 49 |
+
args = InferArguments(
|
| 50 |
+
model='Qwen/Qwen2-7B-Instruct', max_batch_size=64, val_dataset=['AI-ModelScope/alpaca-gpt4-data-zh#1000'])
|
| 51 |
+
infer_main(args)
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def test_emu3_gen(infer_backend):
|
| 55 |
+
from swift.llm import infer_main, InferArguments
|
| 56 |
+
args = InferArguments(
|
| 57 |
+
model='BAAI/Emu3-Gen',
|
| 58 |
+
infer_backend=infer_backend,
|
| 59 |
+
stream=False,
|
| 60 |
+
use_chat_template=False,
|
| 61 |
+
top_k=2048,
|
| 62 |
+
max_new_tokens=40960)
|
| 63 |
+
infer_main(args)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
if __name__ == '__main__':
|
| 67 |
+
# test_cli('pt')
|
| 68 |
+
# test_cli_jinja('pt')
|
| 69 |
+
# test_dataset('pt')
|
| 70 |
+
# test_mllm_dataset('pt')
|
| 71 |
+
# test_dataset_ddp()
|
| 72 |
+
# test_dataset_mp_ddp()
|
| 73 |
+
test_emu3_gen('pt')
|
tests/infer/test_max_memory.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from swift.llm import InferArguments, infer_main
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def test_max_memory():
|
| 5 |
+
infer_main(
|
| 6 |
+
InferArguments(model='Qwen/Qwen2.5-7B-Instruct', max_memory='{0: "50GB", 1: "5GB"}', device_map='sequential'))
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
if __name__ == '__main__':
|
| 10 |
+
test_max_memory()
|
tests/infer/test_mllm.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from typing import Literal
|
| 3 |
+
|
| 4 |
+
import torch
|
| 5 |
+
|
| 6 |
+
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def _prepare(infer_backend: Literal['vllm', 'pt', 'lmdeploy']):
|
| 10 |
+
from swift.llm import InferRequest, get_template
|
| 11 |
+
if infer_backend == 'lmdeploy':
|
| 12 |
+
from swift.llm import LmdeployEngine
|
| 13 |
+
engine = LmdeployEngine('Qwen/Qwen-VL-Chat', torch.float32)
|
| 14 |
+
elif infer_backend == 'pt':
|
| 15 |
+
from swift.llm import PtEngine
|
| 16 |
+
engine = PtEngine('Qwen/Qwen2-VL-7B-Instruct')
|
| 17 |
+
elif infer_backend == 'vllm':
|
| 18 |
+
from swift.llm import VllmEngine
|
| 19 |
+
engine = VllmEngine('Qwen/Qwen2-VL-7B-Instruct')
|
| 20 |
+
template = get_template(engine.model_meta.template, engine.processor)
|
| 21 |
+
infer_requests = [
|
| 22 |
+
InferRequest([{
|
| 23 |
+
'role': 'user',
|
| 24 |
+
'content': '晚上睡不着觉怎么办'
|
| 25 |
+
}]),
|
| 26 |
+
InferRequest([{
|
| 27 |
+
'role':
|
| 28 |
+
'user',
|
| 29 |
+
'content': [{
|
| 30 |
+
'type': 'image_url',
|
| 31 |
+
'image_url': 'http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/cat.png'
|
| 32 |
+
}]
|
| 33 |
+
}])
|
| 34 |
+
]
|
| 35 |
+
return engine, template, infer_requests
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def test_infer(engine, template, infer_requests):
|
| 39 |
+
from swift.llm import RequestConfig
|
| 40 |
+
from swift.plugin import InferStats
|
| 41 |
+
request_config = RequestConfig(temperature=0)
|
| 42 |
+
infer_stats = InferStats()
|
| 43 |
+
|
| 44 |
+
response_list = engine.infer(
|
| 45 |
+
infer_requests, template=template, request_config=request_config, metrics=[infer_stats])
|
| 46 |
+
|
| 47 |
+
for response in response_list[:2]:
|
| 48 |
+
print(response.choices[0].message.content)
|
| 49 |
+
print(infer_stats.compute())
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def test_stream(engine, template, infer_requests):
|
| 53 |
+
from swift.llm import RequestConfig
|
| 54 |
+
from swift.plugin import InferStats
|
| 55 |
+
infer_stats = InferStats()
|
| 56 |
+
request_config = RequestConfig(temperature=0, stream=True, logprobs=True)
|
| 57 |
+
|
| 58 |
+
gen_list = engine.infer(infer_requests, template=template, request_config=request_config, metrics=[infer_stats])
|
| 59 |
+
|
| 60 |
+
for response in gen_list[0]:
|
| 61 |
+
if response is None:
|
| 62 |
+
continue
|
| 63 |
+
print(response.choices[0].delta.content, end='', flush=True)
|
| 64 |
+
print()
|
| 65 |
+
print(infer_stats.compute())
|
| 66 |
+
|
| 67 |
+
gen_list = engine.infer(
|
| 68 |
+
infer_requests, template=template, request_config=request_config, use_tqdm=True, metrics=[infer_stats])
|
| 69 |
+
|
| 70 |
+
for response in gen_list[0]:
|
| 71 |
+
pass
|
| 72 |
+
|
| 73 |
+
print(infer_stats.compute())
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
if __name__ == '__main__':
|
| 77 |
+
engine, template, infer_requests = _prepare(infer_backend='pt')
|
| 78 |
+
test_infer(engine, template, infer_requests)
|
| 79 |
+
test_stream(engine, template, infer_requests)
|
tests/llm/__init__.py
ADDED
|
File without changes
|
tests/llm/config/infer.json
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"ckpt_dir": "/mnt/workspace/yzhao/modelscope/swift/output/pai_test/checkpoint-6",
|
| 3 |
+
"val_dataset_sample": 2,
|
| 4 |
+
"load_dataset_config": true
|
| 5 |
+
}
|
tests/llm/config/sft.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_type": "qwen-1_8b-chat",
|
| 3 |
+
"dataset": "jd-sentiment-zh",
|
| 4 |
+
"output_dir": "output/pai_test",
|
| 5 |
+
"train_dataset_sample": 100,
|
| 6 |
+
"eval_steps": 5
|
| 7 |
+
}
|
tests/llm/data/alpaca.csv
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
system,instruction,input,output
|
| 2 |
+
00000,11111,22222,3.3
|
| 3 |
+
,aaaaa,,ccccc
|
| 4 |
+
,AAAAA,BBBBB,CCCCC
|
tests/llm/data/alpaca.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"instruction": "11111", "input": "22222", "output": "33333", "history": [["aaaaa", "bbbbb"]], "system": "system123"}
|
| 2 |
+
{"instruction": "aaaaa", "output": "ccccc"}
|
| 3 |
+
{"instruction": "AAAAA", "input": "BBBBB", "output": "CCCCC"}
|
tests/llm/data/alpaca2.csv
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
instruction,output
|
| 2 |
+
11111,33333
|
| 3 |
+
aaaaa,ccccc
|
| 4 |
+
AAAAA,CCCCC
|
tests/llm/data/chatml.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"messages": [{"role": "system", "content": "00000"}, {"role": "user", "content": "11111"}, {"role": "assistant", "content": "22222"}]}
|
| 2 |
+
{"messages": [{"role": "user", "content": "aaaaa"}, {"role": "assistant", "content": "bbbbb"}, {"role": "user", "content": "ccccc"}, {"role": "assistant", "content": "ddddd"}]}
|
| 3 |
+
{"messages": [{"role": "user", "content": "AAAAA"}, {"role": "assistant", "content": "BBBBB"}, {"role": "user", "content": "CCCCC"}, {"role": "assistant", "content": "DDDDD"}]}
|
tests/llm/data/conversations.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"conversations": [{"from": "system", "value": "00000"}, {"from": "user", "value": "11111"}, {"from": "assistant", "value": "22222"}]}
|
| 2 |
+
{"conversations": [{"from": "user", "value": "aaaaa"}, {"from": "assistant", "value": "bbbbb"}, {"from": "user", "value": "ccccc"}, {"from": "assistant", "value": "ddddd"}]}
|
| 3 |
+
{"conversations": [{"from": "user", "value": "AAAAA"}, {"from": "assistant", "value": "BBBBB"}, {"from": "user", "value": "CCCCC"}, {"from": "assistant", "value": "DDDDD"}]}
|
tests/llm/data/multi_modal_1.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"query": "<img>https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg</img>55555", "response": "66666"}
|
| 2 |
+
{"query": "<img>https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg</img><img>https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg</img>eeeee", "response": "fffff", "history": [["hello", "123"]]}
|
| 3 |
+
{"query": "EEEEE", "response": "FFFFF", "history": [["AAAAA", "BBBBB"], ["CCCCC", "DDDDD"]]}
|
tests/llm/data/multi_modal_2.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"query": "55555", "response": "66666", "images": ["https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"]}
|
| 2 |
+
{"query": "eeeee", "response": "fffff", "history": [], "images": ["https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"]}
|
| 3 |
+
{"query": "EEEEE", "response": "FFFFF", "history": [["AAAAA", "BBBBB"], ["CCCCC", "DDDDD"]], "images": ["https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg", "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg", "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"]}
|
tests/llm/data/multi_modal_3.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"query": "55555", "response": "66666", "images": ["https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"]}
|
| 2 |
+
{"query": "eeeee", "response": "fffff", "history": [], "images": ["https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"]}
|
| 3 |
+
{"query": "EEEEE", "response": "FFFFF", "history": [["AAAAA", "BBBBB"], ["CCCCC", "DDDDD"]], "images": ["https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"]}
|
tests/llm/data/sharegpt.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"system": "00000", "conversation": [{"human": "11111", "assistant": "22222"}]}
|
| 2 |
+
{"conversation": [{"human": "aaaaa", "assistant": "bbbbb"}]}
|
| 3 |
+
{"conversation": [{"human": "AAAAA", "assistant": "BBBBB"}, {"human": "CCCCC", "assistant": "DDDDD"}, {"human": "EEEEE", "assistant": "FFFFF"}]}
|
tests/llm/data/swift_multi.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[{"system": "00000", "query": "55555", "response": "66666"},
|
| 2 |
+
{"query": "eeeee", "response": "fffff", "history": []},
|
| 3 |
+
{"query": "EEEEE", "response": "FFFFF", "history": [["AAAAA", "BBBBB"], ["CCCCC", "DDDDD"]]}]
|
tests/llm/data/swift_multi.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"system": "00000", "query": "55555", "response": "66666"}
|
| 2 |
+
{"query": "eeeee", "response": "fffff", "history": []}
|
| 3 |
+
{"query": "EEEEE", "response": "FFFFF", "history": [["AAAAA", "BBBBB"], ["CCCCC", "DDDDD"]]}
|
tests/llm/data/swift_pre.csv
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
response
|
| 2 |
+
11111
|
| 3 |
+
aaaaa
|
| 4 |
+
AAAAA
|
tests/llm/data/swift_pre.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"response": "11111"}
|
| 2 |
+
{"response": "aaaaa"}
|
| 3 |
+
{"response": "AAAAA"}
|
tests/llm/data/swift_single.csv
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
system,query,response
|
| 2 |
+
00000,11111,22222
|
| 3 |
+
,aaaaa,bbbbb
|
| 4 |
+
,AAAAA,BBBBB
|
tests/llm/data/swift_single.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"system": "00000", "query": "11111", "response": "22222"}
|
| 2 |
+
{"query": "aaaaa", "response": "bbbbb"}
|
| 3 |
+
{"query": "AAAAA", "response": "BBBBB"}
|
tests/llm/load_model.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
from dataclasses import fields
|
| 3 |
+
|
| 4 |
+
import torch
|
| 5 |
+
|
| 6 |
+
from swift.llm import MODEL_ARCH_MAPPING, ModelKeys, get_model_tokenizer
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def get_model_and_tokenizer(ms_model_id, model_arch=None):
|
| 10 |
+
try:
|
| 11 |
+
import transformers
|
| 12 |
+
print(f'Test model: {ms_model_id} with transformers version: {transformers.__version__}')
|
| 13 |
+
model_ins, tokenizer = get_model_tokenizer(ms_model_id)
|
| 14 |
+
model_ins: torch.nn.Module
|
| 15 |
+
if model_arch:
|
| 16 |
+
model_arch: ModelKeys = MODEL_ARCH_MAPPING[model_arch]
|
| 17 |
+
for f in fields(model_arch):
|
| 18 |
+
value = getattr(model_arch, f.name)
|
| 19 |
+
if value is not None and f.name != 'arch_name':
|
| 20 |
+
if isinstance(value, str):
|
| 21 |
+
value = [value]
|
| 22 |
+
for v in value:
|
| 23 |
+
v = v.replace('{}', '0')
|
| 24 |
+
model_ins.get_submodule(v)
|
| 25 |
+
except Exception:
|
| 26 |
+
import traceback
|
| 27 |
+
print(traceback.format_exc())
|
| 28 |
+
raise
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
if __name__ == '__main__':
|
| 32 |
+
parser = argparse.ArgumentParser()
|
| 33 |
+
parser.add_argument(
|
| 34 |
+
'--ms_model_id',
|
| 35 |
+
type=str,
|
| 36 |
+
required=True,
|
| 37 |
+
)
|
| 38 |
+
parser.add_argument(
|
| 39 |
+
'--model_arch',
|
| 40 |
+
type=str,
|
| 41 |
+
required=True,
|
| 42 |
+
)
|
| 43 |
+
args = parser.parse_args()
|
| 44 |
+
|
| 45 |
+
get_model_and_tokenizer(args.ms_model_id, args.model_arch)
|
tests/llm/load_template.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
from collections.abc import Mapping
|
| 3 |
+
|
| 4 |
+
import json
|
| 5 |
+
import torch
|
| 6 |
+
from transformers import PreTrainedTokenizerBase
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def to_list(input_ids):
|
| 10 |
+
if isinstance(input_ids, torch.Tensor):
|
| 11 |
+
input_ids = input_ids.cpu().numpy().tolist()
|
| 12 |
+
if isinstance(input_ids, list) and isinstance(input_ids[0], list):
|
| 13 |
+
input_ids = input_ids[0]
|
| 14 |
+
return input_ids
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def load_ds(ds):
|
| 18 |
+
from swift.llm import load_dataset
|
| 19 |
+
train_dataset, val_dataset = load_dataset(
|
| 20 |
+
ds,
|
| 21 |
+
split_dataset_ratio=0.0,
|
| 22 |
+
strict=False,
|
| 23 |
+
num_proc=1,
|
| 24 |
+
model_name=['小黄', 'Xiao Huang'],
|
| 25 |
+
model_author=['魔搭', 'ModelScope'])
|
| 26 |
+
return train_dataset.select(range(1))
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def load_and_tokenize(ms_model_id, template):
|
| 30 |
+
from swift.llm import EncodePreprocessor, get_model_tokenizer, get_template
|
| 31 |
+
try:
|
| 32 |
+
vl_fields = ['vl', 'video', 'minicpmv', 'llava', 'vision', 'emu', 'florence']
|
| 33 |
+
model_ins, tokenizer = get_model_tokenizer(ms_model_id, load_model='mplug' in ms_model_id.lower())
|
| 34 |
+
template_ins = get_template(template, tokenizer)
|
| 35 |
+
if template_ins.use_model:
|
| 36 |
+
model_ins, _ = get_model_tokenizer(ms_model_id, load_model=True)
|
| 37 |
+
template_ins.model = model_ins
|
| 38 |
+
template_ins.set_mode('train')
|
| 39 |
+
if 'audio' in template_ins.__class__.__name__.lower():
|
| 40 |
+
output = EncodePreprocessor(template_ins)(
|
| 41 |
+
load_ds('speech_asr/speech_asr_aishell1_trainsets:validation/test'))
|
| 42 |
+
input_ids = output[0].get('input_ids')
|
| 43 |
+
elif any([vl in template for vl in vl_fields]):
|
| 44 |
+
for row in load_ds('modelscope/coco_2014_caption:validation'):
|
| 45 |
+
output = template_ins.encode(row)
|
| 46 |
+
input_ids = output.get('input_ids')
|
| 47 |
+
# output = EncodePreprocessor(template_ins)(load_ds('swift/OK-VQA_train'))
|
| 48 |
+
if model_ins is not None and model_ins.model_meta.is_multimodal:
|
| 49 |
+
inputs = template_ins.pre_data_collator([output], model=model_ins)
|
| 50 |
+
_, output = template_ins.pre_forward_hook(model_ins, None, inputs)
|
| 51 |
+
else:
|
| 52 |
+
output = EncodePreprocessor(template_ins)(load_ds('modelscope/DuReader_robust-QG'))
|
| 53 |
+
input_ids = output[0].get('input_ids')
|
| 54 |
+
if isinstance(output, Mapping):
|
| 55 |
+
assert output.get('input_ids') is not None or output.get('inputs_embeds') is not None
|
| 56 |
+
else:
|
| 57 |
+
assert output[0].get('input_ids') is not None or output[0].get('inputs_embeds') is not None
|
| 58 |
+
input_ids = to_list(input_ids)
|
| 59 |
+
sent = ''
|
| 60 |
+
try:
|
| 61 |
+
if not isinstance(tokenizer, PreTrainedTokenizerBase) and hasattr(tokenizer, 'tokenizer'):
|
| 62 |
+
tokenizer = tokenizer.tokenizer
|
| 63 |
+
sent = tokenizer.decode(input_ids)
|
| 64 |
+
except Exception:
|
| 65 |
+
pass
|
| 66 |
+
return input_ids, sent
|
| 67 |
+
except Exception:
|
| 68 |
+
import traceback
|
| 69 |
+
print(traceback.format_exc())
|
| 70 |
+
raise
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def load_ds_old(ds):
|
| 74 |
+
from swift.llm import load_dataset
|
| 75 |
+
train_dataset, val_dataset = load_dataset(ds, split_dataset_ratio=0.0)
|
| 76 |
+
return train_dataset.select(range(1))
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def load_and_tokenize_old(ms_model_id, template):
|
| 80 |
+
model_type = None
|
| 81 |
+
model_info = None
|
| 82 |
+
from swift.llm import get_model_tokenizer
|
| 83 |
+
from swift.llm import get_template, MODEL_MAPPING
|
| 84 |
+
found = False
|
| 85 |
+
for model_type, model_info in MODEL_MAPPING.items():
|
| 86 |
+
if model_info['model_id_or_path'].lower() == ms_model_id.lower():
|
| 87 |
+
found = True
|
| 88 |
+
break
|
| 89 |
+
|
| 90 |
+
if not found:
|
| 91 |
+
raise ValueError(f'No model_type found: {ms_model_id}')
|
| 92 |
+
|
| 93 |
+
vl_fields = ['vl', 'video', 'minicpm-v', 'llava', 'vision', 'emu', 'florence']
|
| 94 |
+
model_ins, tokenizer = get_model_tokenizer(model_type, load_model=True)
|
| 95 |
+
|
| 96 |
+
if model_info['template'] == 'default-generation':
|
| 97 |
+
model_info['template'] = template.replace('_', '-')
|
| 98 |
+
template_ins = get_template(model_info['template'], tokenizer)
|
| 99 |
+
template_ins.model = model_ins
|
| 100 |
+
if 'audio' in model_info['template']:
|
| 101 |
+
output = template_ins.encode(load_ds_old('aishell1-zh-mini')[0])
|
| 102 |
+
elif any([vl in model_info['template'] for vl in vl_fields]):
|
| 103 |
+
output = template_ins.encode(load_ds_old('coco-en-mini')[0])
|
| 104 |
+
else:
|
| 105 |
+
output = template_ins.encode(load_ds_old('dureader-robust-zh')[0])
|
| 106 |
+
input_ids = to_list(output[0]['input_ids'])
|
| 107 |
+
sent = ''
|
| 108 |
+
try:
|
| 109 |
+
sent = tokenizer.decode(input_ids)
|
| 110 |
+
except Exception:
|
| 111 |
+
pass
|
| 112 |
+
return input_ids, sent
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
if __name__ == '__main__':
|
| 116 |
+
parser = argparse.ArgumentParser()
|
| 117 |
+
parser.add_argument(
|
| 118 |
+
'--ms_model_id',
|
| 119 |
+
type=str,
|
| 120 |
+
required=True,
|
| 121 |
+
)
|
| 122 |
+
parser.add_argument(
|
| 123 |
+
'--template',
|
| 124 |
+
type=str,
|
| 125 |
+
required=True,
|
| 126 |
+
)
|
| 127 |
+
parser.add_argument('--new', type=str, required=False, default='1')
|
| 128 |
+
args = parser.parse_args()
|
| 129 |
+
|
| 130 |
+
is_new = args.new == '1'
|
| 131 |
+
if is_new:
|
| 132 |
+
input_ids, sent = load_and_tokenize(args.ms_model_id, args.template)
|
| 133 |
+
else:
|
| 134 |
+
input_ids, sent = load_and_tokenize_old(args.ms_model_id, args.template)
|
| 135 |
+
file = 'new_input_ids.txt' if is_new else 'old_input_ids.txt'
|
| 136 |
+
if input_ids is not None:
|
| 137 |
+
with open(file, 'w') as f:
|
| 138 |
+
json.dump({'input_ids': input_ids, 'sent': sent}, f)
|
tests/llm/test_custom.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
| 2 |
+
import unittest
|
| 3 |
+
from typing import Any, Dict, Optional
|
| 4 |
+
|
| 5 |
+
import torch
|
| 6 |
+
|
| 7 |
+
from swift.llm import (DatasetMeta, InferRequest, Model, ModelGroup, ModelMeta, PtEngine, RequestConfig,
|
| 8 |
+
ResponsePreprocessor, TemplateMeta, get_model_tokenizer_with_flash_attn, load_dataset,
|
| 9 |
+
register_dataset, register_model, register_template)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class CustomPreprocessor(ResponsePreprocessor):
|
| 13 |
+
prompt = """Task: Based on the given two sentences, provide a similarity score between 0.0 and 5.0.
|
| 14 |
+
Sentence 1: {text1}
|
| 15 |
+
Sentence 2: {text2}
|
| 16 |
+
Similarity score: """
|
| 17 |
+
|
| 18 |
+
def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
| 19 |
+
return super().preprocess({
|
| 20 |
+
'query': self.prompt.format(text1=row['text1'], text2=row['text2']),
|
| 21 |
+
'response': f"{row['label']:.1f}"
|
| 22 |
+
})
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
register_dataset(
|
| 26 |
+
DatasetMeta(
|
| 27 |
+
ms_dataset_id='swift/stsb',
|
| 28 |
+
hf_dataset_id='SetFit/stsb',
|
| 29 |
+
preprocess_func=CustomPreprocessor(),
|
| 30 |
+
))
|
| 31 |
+
|
| 32 |
+
register_template(
|
| 33 |
+
TemplateMeta(
|
| 34 |
+
template_type='custom',
|
| 35 |
+
prefix=['<extra_id_0>System\n{{SYSTEM}}\n'],
|
| 36 |
+
prompt=['<extra_id_1>User\n{{QUERY}}\n<extra_id_1>Assistant\n'],
|
| 37 |
+
chat_sep=['\n']))
|
| 38 |
+
|
| 39 |
+
register_model(
|
| 40 |
+
ModelMeta(
|
| 41 |
+
model_type='custom',
|
| 42 |
+
model_groups=[
|
| 43 |
+
ModelGroup([Model('AI-ModelScope/Nemotron-Mini-4B-Instruct', 'nvidia/Nemotron-Mini-4B-Instruct')])
|
| 44 |
+
],
|
| 45 |
+
template='custom',
|
| 46 |
+
get_function=get_model_tokenizer_with_flash_attn,
|
| 47 |
+
ignore_patterns=['nemo']))
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
class TestCustom(unittest.TestCase):
|
| 51 |
+
|
| 52 |
+
def test_custom_model(self):
|
| 53 |
+
infer_request = InferRequest(messages=[{'role': 'user', 'content': 'who are you?'}])
|
| 54 |
+
request_config = RequestConfig(max_tokens=512, temperature=0)
|
| 55 |
+
engine = PtEngine('AI-ModelScope/Nemotron-Mini-4B-Instruct', torch.float16)
|
| 56 |
+
response = engine.infer([infer_request], request_config)
|
| 57 |
+
swift_response = response[0].choices[0].message.content
|
| 58 |
+
|
| 59 |
+
engine.default_template.template_backend = 'jinja'
|
| 60 |
+
response = engine.infer([infer_request], request_config)
|
| 61 |
+
jinja_response = response[0].choices[0].message.content
|
| 62 |
+
assert swift_response == jinja_response, (f'swift_response: {swift_response}\njinja_response: {jinja_response}')
|
| 63 |
+
print(f'response: {swift_response}')
|
| 64 |
+
|
| 65 |
+
def test_custom_dataset(self):
|
| 66 |
+
dataset = load_dataset(['swift/stsb'])[0]
|
| 67 |
+
assert len(dataset) == 5749
|
| 68 |
+
assert list(dataset[0].keys()) == ['messages']
|
| 69 |
+
print(f'dataset: {dataset}')
|
| 70 |
+
print(f'dataset[0]: {dataset[0]}')
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
if __name__ == '__main__':
|
| 74 |
+
unittest.main()
|
tests/llm/test_dataset.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import unittest
|
| 2 |
+
|
| 3 |
+
from swift.llm import load_dataset
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class TestDataset(unittest.TestCase):
|
| 7 |
+
|
| 8 |
+
def test_load_v_dataset(self):
|
| 9 |
+
if not __name__ == '__main__':
|
| 10 |
+
# ignore citest error in github
|
| 11 |
+
return
|
| 12 |
+
|
| 13 |
+
for ds in ['m3it#1000', 'mantis-instruct#1000', 'llava-med-zh-instruct#1000']:
|
| 14 |
+
ds = load_dataset(ds)
|
| 15 |
+
assert len(ds[0]) > 800
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
if __name__ == '__main__':
|
| 19 |
+
unittest.main()
|
tests/llm/test_ollama_export.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import shutil
|
| 3 |
+
import tempfile
|
| 4 |
+
import unittest
|
| 5 |
+
|
| 6 |
+
import transformers
|
| 7 |
+
from packaging import version
|
| 8 |
+
|
| 9 |
+
from swift.llm import ExportArguments, export_main
|
| 10 |
+
|
| 11 |
+
if __name__ == '__main__':
|
| 12 |
+
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class TestTemplate(unittest.TestCase):
|
| 16 |
+
|
| 17 |
+
def setUp(self):
|
| 18 |
+
print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
|
| 19 |
+
self.tmp_dir = tempfile.TemporaryDirectory().name
|
| 20 |
+
|
| 21 |
+
def tearDown(self):
|
| 22 |
+
if os.path.exists(self.tmp_dir):
|
| 23 |
+
shutil.rmtree(self.tmp_dir)
|
| 24 |
+
super().tearDown()
|
| 25 |
+
|
| 26 |
+
@unittest.skip('swift2.0')
|
| 27 |
+
def test_llama3(self):
|
| 28 |
+
args = ExportArguments(model_type='llama3-8b-instruct', to_ollama=True, ollama_output_dir=self.tmp_dir)
|
| 29 |
+
export_main(args)
|
| 30 |
+
|
| 31 |
+
template = ('TEMPLATE """{{ if .System }}<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n'
|
| 32 |
+
'{{ .System }}<|eot_id|>{{ else }}<|begin_of_text|>{{ end }}{{ if .Prompt }}<|start_header_id|>user'
|
| 33 |
+
'<|end_header_id|>\n\n{{ .Prompt }}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n'
|
| 34 |
+
'{{ end }}{{ .Response }}<|eot_id|>"""')
|
| 35 |
+
|
| 36 |
+
stop = 'PARAMETER stop "<|eot_id|>"'
|
| 37 |
+
|
| 38 |
+
with open(os.path.join(self.tmp_dir, 'Modelfile'), 'r') as f:
|
| 39 |
+
content = f.read()
|
| 40 |
+
self.assertTrue(template in content)
|
| 41 |
+
self.assertTrue(stop in content)
|
| 42 |
+
|
| 43 |
+
@unittest.skip('swift2.0')
|
| 44 |
+
def test_glm4(self):
|
| 45 |
+
if version.parse(transformers.__version__) >= version.parse('4.45'):
|
| 46 |
+
return
|
| 47 |
+
|
| 48 |
+
args = ExportArguments(model_type='glm4-9b-chat', to_ollama=True, ollama_output_dir=self.tmp_dir)
|
| 49 |
+
export_main(args)
|
| 50 |
+
|
| 51 |
+
template = ('TEMPLATE """{{ if .System }}[gMASK] <sop><|system|>\n{{ .System }}{{ else }}'
|
| 52 |
+
'[gMASK] <sop>{{ end }}{{ if .Prompt }}<|user|>\n{{ .Prompt }}<|assistant|>\n'
|
| 53 |
+
'{{ end }}{{ .Response }}<|user|>"""')
|
| 54 |
+
|
| 55 |
+
stop = 'PARAMETER stop "<|user|>"'
|
| 56 |
+
|
| 57 |
+
with open(os.path.join(self.tmp_dir, 'Modelfile'), 'r') as f:
|
| 58 |
+
content = f.read()
|
| 59 |
+
self.assertTrue(template in content)
|
| 60 |
+
self.assertTrue(stop in content)
|
| 61 |
+
|
| 62 |
+
@unittest.skip('swift2.0')
|
| 63 |
+
def test_qwen2(self):
|
| 64 |
+
args = ExportArguments(model_type='qwen2-7b-instruct', to_ollama=True, ollama_output_dir=self.tmp_dir)
|
| 65 |
+
export_main(args)
|
| 66 |
+
|
| 67 |
+
template = ('TEMPLATE """{{ if .System }}<|im_start|>system\n{{ .System }}<|im_end|>\n{{ else }}{{ end }}'
|
| 68 |
+
'{{ if .Prompt }}<|im_start|>user\n{{ .Prompt }}<|im_end|>\n<|im_start|>assistant\n'
|
| 69 |
+
'{{ end }}{{ .Response }}<|im_end|>"""')
|
| 70 |
+
|
| 71 |
+
stop = 'PARAMETER stop "<|im_end|>"'
|
| 72 |
+
|
| 73 |
+
with open(os.path.join(self.tmp_dir, 'Modelfile'), 'r') as f:
|
| 74 |
+
content = f.read()
|
| 75 |
+
self.assertTrue(template in content)
|
| 76 |
+
self.assertTrue(stop in content)
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
if __name__ == '__main__':
|
| 80 |
+
unittest.main()
|
tests/llm/test_run.py
ADDED
|
@@ -0,0 +1,458 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
if __name__ == '__main__':
|
| 2 |
+
import os
|
| 3 |
+
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
|
| 4 |
+
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import shutil
|
| 8 |
+
import tempfile
|
| 9 |
+
import unittest
|
| 10 |
+
from functools import partial
|
| 11 |
+
from typing import Any, Dict, List
|
| 12 |
+
|
| 13 |
+
import torch
|
| 14 |
+
from datasets import Dataset as HfDataset
|
| 15 |
+
from modelscope import Model, MsDataset, snapshot_download
|
| 16 |
+
from torch.nn.utils.rnn import pad_sequence
|
| 17 |
+
from transformers import AutoTokenizer
|
| 18 |
+
|
| 19 |
+
from swift import Trainer, TrainingArguments, get_logger
|
| 20 |
+
from swift.llm import (InferArguments, ModelType, RLHFArguments, TrainArguments, infer_main, merge_lora, rlhf_main,
|
| 21 |
+
sft_main)
|
| 22 |
+
|
| 23 |
+
NO_EVAL_HUMAN = True
|
| 24 |
+
|
| 25 |
+
logger = get_logger()
|
| 26 |
+
|
| 27 |
+
kwargs = {
|
| 28 |
+
'per_device_train_batch_size': 2,
|
| 29 |
+
'per_device_eval_batch_size': 2,
|
| 30 |
+
'save_steps': 5,
|
| 31 |
+
'gradient_accumulation_steps': 4,
|
| 32 |
+
'num_train_epochs': 1,
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
class TestRun(unittest.TestCase):
|
| 37 |
+
|
| 38 |
+
def setUp(self):
|
| 39 |
+
print(f'Testing {type(self).__name__}.{self._testMethodName}')
|
| 40 |
+
self._tmp_dir = tempfile.TemporaryDirectory()
|
| 41 |
+
self.tmp_dir = self._tmp_dir.name
|
| 42 |
+
|
| 43 |
+
def tearDown(self):
|
| 44 |
+
shutil.rmtree(self.tmp_dir)
|
| 45 |
+
|
| 46 |
+
def test_template(self):
|
| 47 |
+
if not __name__ == '__main__':
|
| 48 |
+
# ignore citest error in github
|
| 49 |
+
return
|
| 50 |
+
torch.cuda.empty_cache()
|
| 51 |
+
output = sft_main(
|
| 52 |
+
TrainArguments(
|
| 53 |
+
model='Qwen/Qwen1.5-0.5B',
|
| 54 |
+
train_type='full',
|
| 55 |
+
dataset='DAMO_NLP/jd',
|
| 56 |
+
val_dataset='DAMO_NLP/jd#20',
|
| 57 |
+
streaming=True,
|
| 58 |
+
max_steps=12,
|
| 59 |
+
**kwargs))
|
| 60 |
+
last_model_checkpoint = output['last_model_checkpoint']
|
| 61 |
+
torch.cuda.empty_cache()
|
| 62 |
+
result = infer_main(InferArguments(model=last_model_checkpoint, load_data_args=True, val_dataset_sample=2))
|
| 63 |
+
assert len(result[0]['response']) < 20
|
| 64 |
+
|
| 65 |
+
def test_hf_hub(self):
|
| 66 |
+
if not __name__ == '__main__':
|
| 67 |
+
# ignore citest error in github
|
| 68 |
+
return
|
| 69 |
+
torch.cuda.empty_cache()
|
| 70 |
+
train_dataset_fnames = [
|
| 71 |
+
'alpaca.csv', 'chatml.jsonl', 'swift_pre.jsonl', 'swift_single.csv', 'swift_multi.jsonl',
|
| 72 |
+
'swift_multi.json#2'
|
| 73 |
+
]
|
| 74 |
+
folder = os.path.join(os.path.dirname(__file__), 'data')
|
| 75 |
+
dataset = [
|
| 76 |
+
'llm-wizard/alpaca-gpt4-data-zh#20',
|
| 77 |
+
'shibing624/alpaca-zh#20',
|
| 78 |
+
] + [os.path.join(folder, fname) for fname in train_dataset_fnames]
|
| 79 |
+
output = sft_main(
|
| 80 |
+
TrainArguments(
|
| 81 |
+
model='Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int4', train_type='lora', dataset=dataset, use_hf=True, **kwargs))
|
| 82 |
+
last_model_checkpoint = output['last_model_checkpoint']
|
| 83 |
+
torch.cuda.empty_cache()
|
| 84 |
+
infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True, val_dataset_sample=2))
|
| 85 |
+
|
| 86 |
+
@unittest.skip('avoid ci error')
|
| 87 |
+
def test_basic(self):
|
| 88 |
+
output_dir = 'output'
|
| 89 |
+
quant_bits_list = [0, 4]
|
| 90 |
+
train_dataset_fnames = [
|
| 91 |
+
'alpaca.csv', 'chatml.jsonl', 'swift_pre.jsonl', 'swift_single.csv', 'swift_multi.jsonl',
|
| 92 |
+
'swift_multi.json#2'
|
| 93 |
+
]
|
| 94 |
+
folder = os.path.join(os.path.dirname(__file__), 'data')
|
| 95 |
+
dataset = [
|
| 96 |
+
'AI-ModelScope/alpaca-gpt4-data-zh#20',
|
| 97 |
+
'hurner/alpaca-gpt4-data-zh#20',
|
| 98 |
+
] + [os.path.join(folder, fname) for fname in train_dataset_fnames]
|
| 99 |
+
if not __name__ == '__main__':
|
| 100 |
+
output_dir = self.tmp_dir
|
| 101 |
+
quant_bits_list = [4]
|
| 102 |
+
dataset = dataset[:2]
|
| 103 |
+
for quant_bits in quant_bits_list:
|
| 104 |
+
if quant_bits == 0:
|
| 105 |
+
predict_with_generate = False
|
| 106 |
+
quant_method = None
|
| 107 |
+
else:
|
| 108 |
+
predict_with_generate = True
|
| 109 |
+
quant_method = 'bnb'
|
| 110 |
+
sft_args = TrainArguments(
|
| 111 |
+
model='Qwen/Qwen2-0.5B-Instruct',
|
| 112 |
+
quant_bits=quant_bits,
|
| 113 |
+
eval_steps=5,
|
| 114 |
+
adam_beta2=0.95,
|
| 115 |
+
quant_method=quant_method,
|
| 116 |
+
predict_with_generate=predict_with_generate,
|
| 117 |
+
dataset=dataset,
|
| 118 |
+
val_dataset='DAMO_NLP/jd#20',
|
| 119 |
+
output_dir=output_dir,
|
| 120 |
+
download_mode='force_redownload',
|
| 121 |
+
include_num_input_tokens_seen=True,
|
| 122 |
+
gradient_checkpointing=True,
|
| 123 |
+
**kwargs)
|
| 124 |
+
torch.cuda.empty_cache()
|
| 125 |
+
output = sft_main(sft_args)
|
| 126 |
+
print(output)
|
| 127 |
+
best_model_checkpoint = output['best_model_checkpoint']
|
| 128 |
+
print(f'best_model_checkpoint: {best_model_checkpoint}')
|
| 129 |
+
if __name__ == '__main__':
|
| 130 |
+
infer_args = InferArguments(
|
| 131 |
+
adapters=best_model_checkpoint,
|
| 132 |
+
merge_lora={
|
| 133 |
+
0: True,
|
| 134 |
+
4: False
|
| 135 |
+
}[quant_bits],
|
| 136 |
+
load_data_args=NO_EVAL_HUMAN,
|
| 137 |
+
val_dataset_sample=5)
|
| 138 |
+
torch.cuda.empty_cache()
|
| 139 |
+
result = infer_main(infer_args)
|
| 140 |
+
print(result)
|
| 141 |
+
# if __name__ == '__main__':
|
| 142 |
+
# app_ui_main(infer_args)
|
| 143 |
+
|
| 144 |
+
def test_vl_audio(self):
|
| 145 |
+
output_dir = 'output'
|
| 146 |
+
if not __name__ == '__main__':
|
| 147 |
+
# ignore citest error in github
|
| 148 |
+
return
|
| 149 |
+
model_type_list = ['Qwen/Qwen-VL-Chat', 'Qwen/Qwen-Audio-Chat']
|
| 150 |
+
dataset_list = [
|
| 151 |
+
'modelscope/coco_2014_caption:validation#100', 'speech_asr/speech_asr_aishell1_trainsets:validation#100'
|
| 152 |
+
]
|
| 153 |
+
for model, dataset in zip(model_type_list, dataset_list):
|
| 154 |
+
sft_args = TrainArguments(
|
| 155 |
+
model=model,
|
| 156 |
+
eval_steps=5,
|
| 157 |
+
dataset=[dataset],
|
| 158 |
+
output_dir=output_dir,
|
| 159 |
+
gradient_checkpointing=True,
|
| 160 |
+
lazy_tokenize=True,
|
| 161 |
+
disable_tqdm=True,
|
| 162 |
+
**kwargs)
|
| 163 |
+
torch.cuda.empty_cache()
|
| 164 |
+
output = sft_main(sft_args)
|
| 165 |
+
print(output)
|
| 166 |
+
best_model_checkpoint = output['best_model_checkpoint']
|
| 167 |
+
print(f'best_model_checkpoint: {best_model_checkpoint}')
|
| 168 |
+
infer_args = InferArguments(
|
| 169 |
+
adapters=best_model_checkpoint,
|
| 170 |
+
load_data_args=True,
|
| 171 |
+
stream={
|
| 172 |
+
'Qwen/Qwen-VL-Chat': True,
|
| 173 |
+
'Qwen/Qwen-Audio-Chat': False
|
| 174 |
+
}[model],
|
| 175 |
+
val_dataset_sample=5)
|
| 176 |
+
torch.cuda.empty_cache()
|
| 177 |
+
result = infer_main(infer_args)
|
| 178 |
+
print(result)
|
| 179 |
+
|
| 180 |
+
def test_custom_dataset(self):
|
| 181 |
+
if not __name__ == '__main__':
|
| 182 |
+
# ignore citest error in github
|
| 183 |
+
return
|
| 184 |
+
train_dataset_fnames = [
|
| 185 |
+
'alpaca.csv', 'chatml.jsonl', 'swift_pre.jsonl', 'swift_single.csv', 'swift_multi.jsonl',
|
| 186 |
+
'swift_multi.json', 'sharegpt.jsonl'
|
| 187 |
+
]
|
| 188 |
+
val_dataset_fnames = [
|
| 189 |
+
'alpaca.jsonl',
|
| 190 |
+
'alpaca2.csv',
|
| 191 |
+
'conversations.jsonl',
|
| 192 |
+
'swift_pre.csv',
|
| 193 |
+
'swift_single.jsonl',
|
| 194 |
+
# 'swift_#:#.jsonl#3'
|
| 195 |
+
]
|
| 196 |
+
folder = os.path.join(os.path.dirname(__file__), 'data')
|
| 197 |
+
resume_from_checkpoint = None
|
| 198 |
+
train_kwargs = kwargs.copy()
|
| 199 |
+
train_kwargs.pop('num_train_epochs')
|
| 200 |
+
for num_train_epochs in [1, 2]:
|
| 201 |
+
sft_args = TrainArguments(
|
| 202 |
+
model='Qwen/Qwen-7B-Chat',
|
| 203 |
+
dataset=['swift/self-cognition#20'] + [os.path.join(folder, fname) for fname in train_dataset_fnames],
|
| 204 |
+
val_dataset=[os.path.join(folder, fname) for fname in val_dataset_fnames],
|
| 205 |
+
resume_from_checkpoint=resume_from_checkpoint,
|
| 206 |
+
num_train_epochs=num_train_epochs,
|
| 207 |
+
model_name='小黄',
|
| 208 |
+
model_author='魔搭',
|
| 209 |
+
**train_kwargs)
|
| 210 |
+
|
| 211 |
+
torch.cuda.empty_cache()
|
| 212 |
+
result = sft_main(sft_args)
|
| 213 |
+
best_model_checkpoint = result['best_model_checkpoint']
|
| 214 |
+
resume_from_checkpoint = result['last_model_checkpoint']
|
| 215 |
+
|
| 216 |
+
for load_args in [True, False]:
|
| 217 |
+
infer_kwargs = {}
|
| 218 |
+
if load_args is False:
|
| 219 |
+
args_json = os.path.join(best_model_checkpoint, 'args.json')
|
| 220 |
+
assert os.path.exists(args_json)
|
| 221 |
+
os.remove(args_json)
|
| 222 |
+
infer_kwargs = {'model': 'Qwen/Qwen-7B-Chat'}
|
| 223 |
+
infer_args = InferArguments(
|
| 224 |
+
adapters=best_model_checkpoint,
|
| 225 |
+
load_data_args=load_args and NO_EVAL_HUMAN,
|
| 226 |
+
merge_lora=load_args,
|
| 227 |
+
val_dataset=[os.path.join(folder, fname) for fname in val_dataset_fnames],
|
| 228 |
+
**infer_kwargs)
|
| 229 |
+
torch.cuda.empty_cache()
|
| 230 |
+
infer_main(infer_args)
|
| 231 |
+
|
| 232 |
+
def test_rlhf(self):
|
| 233 |
+
if not __name__ == '__main__':
|
| 234 |
+
# ignore citest error in github
|
| 235 |
+
return
|
| 236 |
+
torch.cuda.empty_cache()
|
| 237 |
+
# llm rlhf
|
| 238 |
+
#
|
| 239 |
+
rlhf_types = ['dpo', 'orpo', 'simpo', 'kto', 'cpo', 'rm', 'ppo']
|
| 240 |
+
for rlhf_type in rlhf_types:
|
| 241 |
+
dataset = ('AI-ModelScope/hh_rlhf_cn:harmless_base_cn#100'
|
| 242 |
+
if rlhf_type != 'kto' else 'AI-ModelScope/ultrafeedback-binarized-preferences-cleaned-kto#100')
|
| 243 |
+
train_kwargs = {}
|
| 244 |
+
if rlhf_type == 'ppo':
|
| 245 |
+
train_kwargs['reward_model'] = 'Qwen/Qwen2-1.5B-Instruct'
|
| 246 |
+
output = rlhf_main(
|
| 247 |
+
RLHFArguments(
|
| 248 |
+
rlhf_type=rlhf_type,
|
| 249 |
+
model='Qwen/Qwen2-1.5B-Instruct',
|
| 250 |
+
dataset=dataset,
|
| 251 |
+
eval_steps=5,
|
| 252 |
+
split_dataset_ratio=0.05,
|
| 253 |
+
**train_kwargs,
|
| 254 |
+
**kwargs))
|
| 255 |
+
if rlhf_type == 'ppo':
|
| 256 |
+
model_checkpoint = output['last_model_checkpoint']
|
| 257 |
+
else:
|
| 258 |
+
model_checkpoint = output['best_model_checkpoint']
|
| 259 |
+
|
| 260 |
+
torch.cuda.empty_cache()
|
| 261 |
+
infer_main(InferArguments(adapters=model_checkpoint, load_data_args=True))
|
| 262 |
+
|
| 263 |
+
# mllm rlhf
|
| 264 |
+
visual_rlhf_types = ['dpo', 'orpo', 'simpo', 'cpo', 'rm']
|
| 265 |
+
test_model = [
|
| 266 |
+
'OpenGVLab/InternVL2-2B', 'Qwen/Qwen2-VL-2B-Instruct', 'llava-hf/llava-v1.6-mistral-7b-hf',
|
| 267 |
+
'AI-ModelScope/Florence-2-base-ft'
|
| 268 |
+
] # decoder only and encoder-decoder
|
| 269 |
+
for rlhf_type in visual_rlhf_types:
|
| 270 |
+
for model in test_model:
|
| 271 |
+
dataset_name = 'swift/RLAIF-V-Dataset#100'
|
| 272 |
+
output = rlhf_main(
|
| 273 |
+
RLHFArguments(
|
| 274 |
+
rlhf_type=rlhf_type,
|
| 275 |
+
model=model,
|
| 276 |
+
dataset=dataset_name,
|
| 277 |
+
eval_steps=5,
|
| 278 |
+
dataset_num_proc=16,
|
| 279 |
+
**kwargs))
|
| 280 |
+
best_model_checkpoint = output['best_model_checkpoint']
|
| 281 |
+
torch.cuda.empty_cache()
|
| 282 |
+
infer_main(InferArguments(adapters=best_model_checkpoint, load_data_args=True, val_dataset_sample=2))
|
| 283 |
+
|
| 284 |
+
def test_loss_matching(self):
|
| 285 |
+
output_dir = 'output'
|
| 286 |
+
if not __name__ == '__main__':
|
| 287 |
+
# ignore citest error in github
|
| 288 |
+
return
|
| 289 |
+
losses = []
|
| 290 |
+
for use_swift_lora in [False, True]:
|
| 291 |
+
bool_var = use_swift_lora
|
| 292 |
+
torch.cuda.empty_cache()
|
| 293 |
+
output = sft_main([
|
| 294 |
+
'--model', 'Qwen/Qwen-7B-Chat', '--save_steps', '5', '--dataset',
|
| 295 |
+
'AI-ModelScope/leetcode-solutions-python#200', '--output_dir', output_dir, '--gradient_checkpointing',
|
| 296 |
+
'true', '--max_new_tokens', '100', '--attn_impl', 'flash_attn', '--target_modules', 'all-linear',
|
| 297 |
+
'--seed', '0', '--lora_bias', 'all', '--modules_to_save', 'lm_head', '--use_swift_lora',
|
| 298 |
+
str(use_swift_lora), '--num_train_epochs', '1', '--gradient_accumulation_steps', '16'
|
| 299 |
+
])
|
| 300 |
+
best_model_checkpoint = output['best_model_checkpoint']
|
| 301 |
+
print(f'best_model_checkpoint: {best_model_checkpoint}')
|
| 302 |
+
load_data_args = str(bool_var or NO_EVAL_HUMAN)
|
| 303 |
+
if load_data_args:
|
| 304 |
+
val_dataset_sample = 2
|
| 305 |
+
else:
|
| 306 |
+
val_dataset_sample = -1
|
| 307 |
+
torch.cuda.empty_cache()
|
| 308 |
+
infer_main([
|
| 309 |
+
'--adapters', best_model_checkpoint, '--val_dataset_sample',
|
| 310 |
+
str(val_dataset_sample), '--max_new_tokens', '100', '--attn_impl', 'eager', '--merge_lora',
|
| 311 |
+
str(bool_var), '--load_data_args',
|
| 312 |
+
str(load_data_args)
|
| 313 |
+
])
|
| 314 |
+
loss = output['log_history'][-1]['train_loss']
|
| 315 |
+
losses.append(loss)
|
| 316 |
+
self.assertTrue(abs(losses[0] - losses[1]) < 5e-4)
|
| 317 |
+
print(f'swift_loss: {losses[0]}')
|
| 318 |
+
print(f'peft_loss: {losses[1]}')
|
| 319 |
+
self.assertTrue(0.95 <= losses[0] <= 1)
|
| 320 |
+
|
| 321 |
+
def test_pai_compat(self):
|
| 322 |
+
if not __name__ == '__main__':
|
| 323 |
+
# ignore citest error in github
|
| 324 |
+
return
|
| 325 |
+
from swift.llm import sft_main, infer_main
|
| 326 |
+
os.environ['PAI_TRAINING_JOB_ID'] = '123456'
|
| 327 |
+
folder = os.path.join(os.path.dirname(__file__), 'config')
|
| 328 |
+
tensorboard_dir = os.path.join('output/pai_test', 'pai_tensorboard')
|
| 329 |
+
os.environ['PAI_OUTPUT_TENSORBOARD'] = tensorboard_dir
|
| 330 |
+
sft_json = os.path.join(folder, 'sft.json')
|
| 331 |
+
infer_json = os.path.join(folder, 'infer.json')
|
| 332 |
+
torch.cuda.empty_cache()
|
| 333 |
+
output = sft_main([sft_json])
|
| 334 |
+
print()
|
| 335 |
+
infer_args = {
|
| 336 |
+
'adapters': output['best_model_checkpoint'],
|
| 337 |
+
'val_dataset_sample': 2,
|
| 338 |
+
'load_data_args': True,
|
| 339 |
+
}
|
| 340 |
+
import json
|
| 341 |
+
with open(infer_json, 'w') as f:
|
| 342 |
+
json.dump(infer_args, f, ensure_ascii=False, indent=4)
|
| 343 |
+
torch.cuda.empty_cache()
|
| 344 |
+
infer_main([infer_json])
|
| 345 |
+
os.environ.pop('PAI_TRAINING_JOB_ID')
|
| 346 |
+
|
| 347 |
+
|
| 348 |
+
def data_collate_fn(batch: List[Dict[str, Any]], tokenizer) -> Dict[str, torch.Tensor]:
|
| 349 |
+
# text-classification
|
| 350 |
+
assert tokenizer.pad_token_id is not None
|
| 351 |
+
input_ids = [torch.tensor(b['input_ids']) for b in batch]
|
| 352 |
+
labels = torch.tensor([b['labels'] for b in batch])
|
| 353 |
+
attention_mask = [torch.ones(len(input_ids[i]), dtype=torch.int64) for i in range(len(input_ids))]
|
| 354 |
+
|
| 355 |
+
input_ids = pad_sequence(input_ids, batch_first=True, padding_value=tokenizer.pad_token_id)
|
| 356 |
+
attention_mask = pad_sequence(attention_mask, batch_first=True, padding_value=0)
|
| 357 |
+
return {'input_ids': input_ids, 'attention_mask': attention_mask, 'labels': labels}
|
| 358 |
+
|
| 359 |
+
|
| 360 |
+
class BertTrainer(Trainer):
|
| 361 |
+
|
| 362 |
+
def compute_loss(self, model, inputs, return_outputs=False):
|
| 363 |
+
outputs = model(**inputs)
|
| 364 |
+
loss = outputs.loss
|
| 365 |
+
if loss is None:
|
| 366 |
+
logits, loss = list(outputs.logits)
|
| 367 |
+
return (loss, outputs) if return_outputs else loss
|
| 368 |
+
|
| 369 |
+
|
| 370 |
+
class TestTrainer(unittest.TestCase):
|
| 371 |
+
|
| 372 |
+
def setUp(self):
|
| 373 |
+
self._tmp_dir = tempfile.TemporaryDirectory()
|
| 374 |
+
self.tmp_dir = self._tmp_dir.name
|
| 375 |
+
# self.tmp_dir = 'test'
|
| 376 |
+
logger.info(f'self.tmp_dir: {self.tmp_dir}')
|
| 377 |
+
|
| 378 |
+
def tearDown(self):
|
| 379 |
+
if os.path.isdir(self.tmp_dir):
|
| 380 |
+
shutil.rmtree(self.tmp_dir)
|
| 381 |
+
# api = HubApi()
|
| 382 |
+
# api.delete_model(self.hub_model_id)
|
| 383 |
+
# logger.info(f'delete model: {self.hub_model_id}')
|
| 384 |
+
|
| 385 |
+
def test_trainer(self):
|
| 386 |
+
self.hub_model_id = 'test_trainer2'
|
| 387 |
+
logger.info(f'self.hub_model_id: {self.hub_model_id}')
|
| 388 |
+
self.tmp_dir = 'output/damo/nlp_structbert_backbone_base_std'
|
| 389 |
+
push_to_hub = True
|
| 390 |
+
if not __name__ == '__main__':
|
| 391 |
+
# ignore citest error in github
|
| 392 |
+
return
|
| 393 |
+
model_id = 'damo/nlp_structbert_backbone_base_std'
|
| 394 |
+
model_dir = snapshot_download(model_id, 'master')
|
| 395 |
+
tokenizer = AutoTokenizer.from_pretrained(model_dir)
|
| 396 |
+
dataset = MsDataset.load('clue', subset_name='tnews')
|
| 397 |
+
num_labels = max(dataset['train']['label']) + 1
|
| 398 |
+
model = Model.from_pretrained(model_dir, task='text-classification', num_labels=num_labels)
|
| 399 |
+
train_dataset, val_dataset = dataset['train'].to_hf_dataset(), dataset['validation'].to_hf_dataset()
|
| 400 |
+
train_dataset: HfDataset = train_dataset.select(range(100))
|
| 401 |
+
val_dataset: HfDataset = val_dataset.select(range(20))
|
| 402 |
+
|
| 403 |
+
#
|
| 404 |
+
def tokenize_func(examples):
|
| 405 |
+
data = tokenizer(examples['sentence'], return_attention_mask=False)
|
| 406 |
+
examples['input_ids'] = data['input_ids']
|
| 407 |
+
examples['labels'] = examples['label']
|
| 408 |
+
del examples['sentence'], examples['label']
|
| 409 |
+
return examples
|
| 410 |
+
|
| 411 |
+
train_dataset = train_dataset.map(tokenize_func)
|
| 412 |
+
val_dataset = val_dataset.map(tokenize_func)
|
| 413 |
+
|
| 414 |
+
data_collator = partial(data_collate_fn, tokenizer=tokenizer)
|
| 415 |
+
for save_only_model in [True, False]:
|
| 416 |
+
trainer_args = TrainingArguments(
|
| 417 |
+
self.tmp_dir,
|
| 418 |
+
do_train=True,
|
| 419 |
+
do_eval=True,
|
| 420 |
+
num_train_epochs=1,
|
| 421 |
+
evaluation_strategy='steps',
|
| 422 |
+
save_strategy='steps',
|
| 423 |
+
per_device_train_batch_size=4,
|
| 424 |
+
per_device_eval_batch_size=4,
|
| 425 |
+
push_to_hub=push_to_hub,
|
| 426 |
+
hub_token=None, # use env var
|
| 427 |
+
hub_private_repo=True,
|
| 428 |
+
hub_strategy='every_save',
|
| 429 |
+
hub_model_id=self.hub_model_id,
|
| 430 |
+
overwrite_output_dir=True,
|
| 431 |
+
save_steps=10,
|
| 432 |
+
save_total_limit=2,
|
| 433 |
+
metric_for_best_model='loss',
|
| 434 |
+
greater_is_better=False,
|
| 435 |
+
report_to=['tensorboard'],
|
| 436 |
+
gradient_accumulation_steps=1,
|
| 437 |
+
logging_steps=5,
|
| 438 |
+
eval_steps=10,
|
| 439 |
+
save_safetensors=False,
|
| 440 |
+
save_only_model=save_only_model)
|
| 441 |
+
trainer_args._n_gpu = 1
|
| 442 |
+
trainer = BertTrainer(model, trainer_args, data_collator, train_dataset, val_dataset, tokenizer)
|
| 443 |
+
self.hub_model_id = trainer_args.hub_model_id
|
| 444 |
+
trainer.train()
|
| 445 |
+
if trainer_args.push_to_hub:
|
| 446 |
+
trainer.push_to_hub()
|
| 447 |
+
|
| 448 |
+
|
| 449 |
+
if __name__ == '__main__':
|
| 450 |
+
# TestRun().test_template()
|
| 451 |
+
# TestRun().test_hf_hub()
|
| 452 |
+
# TestRun().test_basic()
|
| 453 |
+
# TestRun().test_custom_dataset()
|
| 454 |
+
# TestRun().test_vl_audio()
|
| 455 |
+
# TestRun().test_loss_matching()
|
| 456 |
+
#
|
| 457 |
+
# TestRun().test_rlhf()
|
| 458 |
+
unittest.main()
|
tests/llm/test_run3.py
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import shutil
|
| 3 |
+
import tempfile
|
| 4 |
+
import unittest
|
| 5 |
+
|
| 6 |
+
import json
|
| 7 |
+
import numpy as np
|
| 8 |
+
|
| 9 |
+
from swift.llm import MODEL_MAPPING, load_dataset
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class TestRun3(unittest.TestCase):
|
| 13 |
+
|
| 14 |
+
def setUp(self):
|
| 15 |
+
print(f'Testing {type(self).__name__}.{self._testMethodName}')
|
| 16 |
+
self._tmp_dir = tempfile.TemporaryDirectory()
|
| 17 |
+
self.tmp_dir = self._tmp_dir.name
|
| 18 |
+
|
| 19 |
+
def tearDown(self):
|
| 20 |
+
shutil.rmtree(self.tmp_dir)
|
| 21 |
+
|
| 22 |
+
def load_ds(self, ds):
|
| 23 |
+
train_dataset, val_dataset = load_dataset(
|
| 24 |
+
ds,
|
| 25 |
+
split_dataset_ratio=0.0,
|
| 26 |
+
strict=False,
|
| 27 |
+
num_proc=1,
|
| 28 |
+
model_name=['小黄', 'Xiao Huang'],
|
| 29 |
+
model_author=['魔搭', 'ModelScope'])
|
| 30 |
+
return train_dataset.select(range(min(50, len(train_dataset))))
|
| 31 |
+
|
| 32 |
+
# def test_model_load(self):
|
| 33 |
+
# if os.path.exists('./models.txt'):
|
| 34 |
+
# with open('./models.txt', 'r') as f:
|
| 35 |
+
# models = json.load(f)
|
| 36 |
+
# else:
|
| 37 |
+
# models = []
|
| 38 |
+
# for model_name, model_meta in MODEL_MAPPING.items():
|
| 39 |
+
# meta_requires = model_meta.requires or []
|
| 40 |
+
# for group in model_meta.model_groups:
|
| 41 |
+
# model = group.models[0]
|
| 42 |
+
# if 'skip_test' in (group.tags or []) or model.ms_model_id in models:
|
| 43 |
+
# break
|
| 44 |
+
# requires = meta_requires + (group.requires or [])
|
| 45 |
+
# for req in requires:
|
| 46 |
+
# os.system(f'pip install "{req}"')
|
| 47 |
+
# if not any(['transformers' in req for req in requires]):
|
| 48 |
+
# os.system('pip install transformers -U')
|
| 49 |
+
# if not any(['accelerate' in req for req in requires]):
|
| 50 |
+
# os.system('pip install accelerate -U')
|
| 51 |
+
# try:
|
| 52 |
+
# model_arch_args = ''
|
| 53 |
+
# if model_meta.model_arch:
|
| 54 |
+
# model_arch_args = f'--model_arch {model_meta.model_arch}'
|
| 55 |
+
# cmd = ('PYTHONPATH=. python tests/llm/load_model.py '
|
| 56 |
+
# f'--ms_model_id {model.ms_model_id} {model_arch_args}')
|
| 57 |
+
# if os.system(cmd) != 0:
|
| 58 |
+
# raise RuntimeError()
|
| 59 |
+
# except Exception:
|
| 60 |
+
# passed = False
|
| 61 |
+
# else:
|
| 62 |
+
# passed = True
|
| 63 |
+
# models.append(model.ms_model_id)
|
| 64 |
+
# finally:
|
| 65 |
+
# if passed:
|
| 66 |
+
# with open('./models.txt', 'w') as f:
|
| 67 |
+
# json.dump(models, f)
|
| 68 |
+
|
| 69 |
+
# def test_template_load(self):
|
| 70 |
+
# if os.path.exists('./templates.txt'):
|
| 71 |
+
# with open('./templates.txt', 'r') as f:
|
| 72 |
+
# templates = json.load(f)
|
| 73 |
+
# else:
|
| 74 |
+
# templates = []
|
| 75 |
+
# for model_name, model_meta in MODEL_MAPPING.items():
|
| 76 |
+
# template = model_meta.template
|
| 77 |
+
# meta_requires = model_meta.requires or []
|
| 78 |
+
# for group in model_meta.model_groups:
|
| 79 |
+
# model = group.models[0]
|
| 80 |
+
# if 'skip_test' in (group.tags or []) or template in templates:
|
| 81 |
+
# break
|
| 82 |
+
# requires = meta_requires + (group.requires or [])
|
| 83 |
+
# for req in requires:
|
| 84 |
+
# os.system(f'pip install "{req}"')
|
| 85 |
+
# if not any(['transformers' in req for req in requires]):
|
| 86 |
+
# os.system('pip install transformers -U')
|
| 87 |
+
# if not any(['accelerate' in req for req in requires]):
|
| 88 |
+
# os.system('pip install accelerate -U')
|
| 89 |
+
# try:
|
| 90 |
+
# cmd = ('PYTHONPATH=. python tests/llm/load_template.py '
|
| 91 |
+
# f'--ms_model_id {model.ms_model_id} --template {template}')
|
| 92 |
+
# if os.system(cmd) != 0:
|
| 93 |
+
# raise RuntimeError()
|
| 94 |
+
# except Exception:
|
| 95 |
+
# import traceback
|
| 96 |
+
# print(traceback.format_exc())
|
| 97 |
+
# passed = False
|
| 98 |
+
# else:
|
| 99 |
+
# passed = True
|
| 100 |
+
# templates.append(template)
|
| 101 |
+
# finally:
|
| 102 |
+
# if passed:
|
| 103 |
+
# with open('./templates.txt', 'w') as f:
|
| 104 |
+
# json.dump(templates, f)
|
| 105 |
+
|
| 106 |
+
@unittest.skip('skip')
|
| 107 |
+
def test_template_compare(self):
|
| 108 |
+
if os.path.exists('./templates.txt'):
|
| 109 |
+
with open('./templates.txt', 'r') as f:
|
| 110 |
+
templates = json.load(f)
|
| 111 |
+
else:
|
| 112 |
+
templates = []
|
| 113 |
+
skip_model_type = {
|
| 114 |
+
'grok', 'deepseek_moe', 'deepseek_v2', 'deepseek_v2_5', 'llama3_1_omni', 'llava_next_qwen_hf',
|
| 115 |
+
'llava1_6_yi', 'llava_next_qwen', 'mixtral', 'codefuse_codellama', 'wizardlm2', 'wizardlm2_awq',
|
| 116 |
+
'openbuddy_deepseek', 'sus', 'openbuddy_mixtral', 'openbuddy_llama', 'dbrx', 'nenotron', 'reflection',
|
| 117 |
+
'xverse_moe', 'qwen2_moe', 'yuan2', 'wizardlm2_moe', 'emu3_gen', 'llava1_6_mistral', 'mplug_owl3_241101',
|
| 118 |
+
'llava1_6_yi_hf'
|
| 119 |
+
}
|
| 120 |
+
for model_name, model_meta in MODEL_MAPPING.items():
|
| 121 |
+
if model_name in skip_model_type:
|
| 122 |
+
continue
|
| 123 |
+
template = model_meta.template
|
| 124 |
+
meta_requires = model_meta.requires or []
|
| 125 |
+
for group in model_meta.model_groups:
|
| 126 |
+
model = group.models[0]
|
| 127 |
+
if 'awq' in model.ms_model_id.lower() or 'gptq' in model.ms_model_id.lower():
|
| 128 |
+
break
|
| 129 |
+
if template in templates:
|
| 130 |
+
break
|
| 131 |
+
requires = meta_requires + (group.requires or [])
|
| 132 |
+
for req in requires:
|
| 133 |
+
os.system(f'pip install "{req}"')
|
| 134 |
+
if not any(['transformers' in req for req in requires]):
|
| 135 |
+
os.system('pip install transformers -U')
|
| 136 |
+
if not any(['accelerate' in req for req in requires]):
|
| 137 |
+
os.system('pip install accelerate -U')
|
| 138 |
+
try:
|
| 139 |
+
cmd = ('CUDA_VISIBLE_DEVICES=0 PYTHONPATH=. python tests/llm/load_template.py '
|
| 140 |
+
f'--ms_model_id {model.ms_model_id} --template {template}')
|
| 141 |
+
if os.system(cmd) != 0:
|
| 142 |
+
raise RuntimeError()
|
| 143 |
+
cmd = (
|
| 144 |
+
'CUDA_VISIBLE_DEVICES=0 PYTHONPATH=/mnt/workspace/yzhao/tastelikefeet/swift python tests/llm/load_template.py ' # noqa
|
| 145 |
+
f'--ms_model_id {model.ms_model_id} --template {template} --new 0')
|
| 146 |
+
if os.system(cmd) != 0:
|
| 147 |
+
raise RuntimeError()
|
| 148 |
+
with open('new_input_ids.txt', 'r') as f:
|
| 149 |
+
input_ids_new = json.load(f)
|
| 150 |
+
with open('old_input_ids.txt', 'r') as f:
|
| 151 |
+
input_ids_old = json.load(f)
|
| 152 |
+
print('model_id', model.ms_model_id, 'new:', input_ids_new, 'old:', input_ids_old)
|
| 153 |
+
self.assertTrue(np.allclose(input_ids_new['input_ids'], input_ids_old['input_ids']))
|
| 154 |
+
except Exception:
|
| 155 |
+
import traceback
|
| 156 |
+
print(traceback.format_exc())
|
| 157 |
+
passed = False
|
| 158 |
+
else:
|
| 159 |
+
passed = True
|
| 160 |
+
templates.append(template)
|
| 161 |
+
finally:
|
| 162 |
+
if passed:
|
| 163 |
+
with open('./templates.txt', 'w') as f:
|
| 164 |
+
json.dump(templates, f)
|
| 165 |
+
if os.path.exists('new_input_ids.txt'):
|
| 166 |
+
os.remove('new_input_ids.txt')
|
| 167 |
+
if os.path.exists('old_input_ids.txt'):
|
| 168 |
+
os.remove('old_input_ids.txt')
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
if __name__ == '__main__':
|
| 172 |
+
unittest.main()
|
tests/llm/test_template.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import unittest
|
| 3 |
+
|
| 4 |
+
from swift.llm import PtEngine, RequestConfig, get_model_tokenizer, get_template
|
| 5 |
+
from swift.utils import get_logger, seed_everything
|
| 6 |
+
|
| 7 |
+
# os.environ['CUDA_VISIBLE_DEVICES'] = '0'
|
| 8 |
+
os.environ['SWIFT_DEBUG'] = '1'
|
| 9 |
+
|
| 10 |
+
logger = get_logger()
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def _infer_model(pt_engine, system=None, messages=None):
|
| 14 |
+
seed_everything(42)
|
| 15 |
+
request_config = RequestConfig(max_tokens=128, temperature=0)
|
| 16 |
+
if messages is None:
|
| 17 |
+
messages = []
|
| 18 |
+
if system is not None:
|
| 19 |
+
messages += [{'role': 'system', 'content': system}]
|
| 20 |
+
messages += [{'role': 'user', 'content': '你好'}]
|
| 21 |
+
resp = pt_engine.infer([{'messages': messages}], request_config=request_config)
|
| 22 |
+
response = resp[0].choices[0].message.content
|
| 23 |
+
messages += [{'role': 'assistant', 'content': response}, {'role': 'user', 'content': '<image>这是什么'}]
|
| 24 |
+
resp = pt_engine.infer([{
|
| 25 |
+
'messages': messages,
|
| 26 |
+
}], request_config=request_config)
|
| 27 |
+
response = resp[0].choices[0].message.content
|
| 28 |
+
messages += [{'role': 'assistant', 'content': response}]
|
| 29 |
+
logger.info(f'model: {pt_engine.model_info.model_name}, messages: {messages}')
|
| 30 |
+
return response
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class TestTemplate(unittest.TestCase):
|
| 34 |
+
|
| 35 |
+
def test_template(self):
|
| 36 |
+
pt_engine = PtEngine('Qwen/Qwen2.5-3B-Instruct-GPTQ-Int4')
|
| 37 |
+
response = _infer_model(pt_engine)
|
| 38 |
+
pt_engine.default_template.template_backend = 'jinja'
|
| 39 |
+
response2 = _infer_model(pt_engine)
|
| 40 |
+
assert response == response2
|
| 41 |
+
|
| 42 |
+
def test_tool_message_join(self):
|
| 43 |
+
from copy import deepcopy
|
| 44 |
+
|
| 45 |
+
from swift.plugin import agent_templates
|
| 46 |
+
|
| 47 |
+
messages = [
|
| 48 |
+
# first round
|
| 49 |
+
{
|
| 50 |
+
'role': 'user',
|
| 51 |
+
'content': 'user1'
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
'role': 'assistant',
|
| 55 |
+
'content': 'assistant1'
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
'role': 'assistant',
|
| 59 |
+
'content': 'assistant2'
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
'role': 'tool',
|
| 63 |
+
'content': 'tool1'
|
| 64 |
+
},
|
| 65 |
+
# second round
|
| 66 |
+
{
|
| 67 |
+
'role': 'assistant',
|
| 68 |
+
'content': 'assistant3'
|
| 69 |
+
},
|
| 70 |
+
{
|
| 71 |
+
'role': 'tool',
|
| 72 |
+
'content': 'tool2'
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
'role': 'tool',
|
| 76 |
+
'content': 'tool3'
|
| 77 |
+
},
|
| 78 |
+
]
|
| 79 |
+
|
| 80 |
+
# testing two template type.
|
| 81 |
+
tokenizer = get_model_tokenizer('Qwen/Qwen2.5-7B-Instruct', load_model=False)[1]
|
| 82 |
+
template = get_template(tokenizer.model_meta.template, tokenizer)
|
| 83 |
+
for agent_template_type in ('react_zh', 'qwen_zh'):
|
| 84 |
+
agent_template = agent_templates[agent_template_type]()
|
| 85 |
+
template.agent_template = agent_template
|
| 86 |
+
observation = agent_template.keyword.observation
|
| 87 |
+
test_messages = deepcopy(messages)
|
| 88 |
+
test_messages[2]['content'] = 'assistant2' + observation
|
| 89 |
+
test_messages[4]['content'] = (
|
| 90 |
+
agent_template.keyword.action + agent_template.keyword.action_input + 'assistant3' + observation)
|
| 91 |
+
encoded = template.encode({'messages': test_messages})
|
| 92 |
+
res = template.safe_decode(encoded['input_ids'])
|
| 93 |
+
|
| 94 |
+
ground_truth = (
|
| 95 |
+
'<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n'
|
| 96 |
+
'<|im_start|>user\nuser1<|im_end|>\n'
|
| 97 |
+
f'<|im_start|>assistant\nassistant1assistant2{observation}tool1'
|
| 98 |
+
f'{agent_template.keyword.action}{agent_template.keyword.action_input}assistant3'
|
| 99 |
+
f'{observation}tool2\n{observation}tool3\n')
|
| 100 |
+
assert res == ground_truth
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
if __name__ == '__main__':
|
| 104 |
+
unittest.main()
|
tests/llm/test_utils.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import unittest
|
| 2 |
+
|
| 3 |
+
from swift.llm import load_dataset
|
| 4 |
+
from swift.utils import lower_bound
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class TestLlmUtils(unittest.TestCase):
|
| 8 |
+
|
| 9 |
+
def test_count_startswith(self):
|
| 10 |
+
arr = [-100] * 1000 + list(range(1000))
|
| 11 |
+
self.assertTrue(lower_bound(0, len(arr), lambda i: arr[i] != -100) == 1000)
|
| 12 |
+
|
| 13 |
+
def test_count_endswith(self):
|
| 14 |
+
arr = list(range(1000)) + [-100] * 1000
|
| 15 |
+
self.assertTrue(lower_bound(0, len(arr), lambda i: arr[i] == -100) == 1000)
|
| 16 |
+
|
| 17 |
+
@unittest.skip('avoid ci error')
|
| 18 |
+
def test_dataset(self):
|
| 19 |
+
dataset = load_dataset(['AI-ModelScope/alpaca-gpt4-data-zh#1000', 'AI-ModelScope/alpaca-gpt4-data-en#200'],
|
| 20 |
+
num_proc=4,
|
| 21 |
+
strict=False,
|
| 22 |
+
download_mode='force_redownload')
|
| 23 |
+
print(f'dataset[0]: {dataset[0]}')
|
| 24 |
+
print(f'dataset[1]: {dataset[1]}')
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
if __name__ == '__main__':
|
| 28 |
+
unittest.main()
|
tests/megatron/test_align/test_llm.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
import torch
|
| 4 |
+
|
| 5 |
+
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def _test_model(model_id):
|
| 9 |
+
from swift.llm import export_main, ExportArguments
|
| 10 |
+
if model_id.endswith('mcore'):
|
| 11 |
+
export_main(
|
| 12 |
+
ExportArguments(
|
| 13 |
+
mcore_model=model_id,
|
| 14 |
+
to_hf=True,
|
| 15 |
+
exist_ok=True,
|
| 16 |
+
test_convert_precision=True,
|
| 17 |
+
torch_dtype=torch.bfloat16))
|
| 18 |
+
else:
|
| 19 |
+
export_main(
|
| 20 |
+
ExportArguments(
|
| 21 |
+
model=model_id,
|
| 22 |
+
to_mcore=True,
|
| 23 |
+
exist_ok=True,
|
| 24 |
+
test_convert_precision=True,
|
| 25 |
+
torch_dtype=torch.bfloat16,
|
| 26 |
+
))
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def test_qwen2():
|
| 30 |
+
_test_model('Qwen/Qwen2-0.5B-Instruct')
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def test_llama2():
|
| 34 |
+
_test_model('modelscope/Llama-2-7b-chat-ms')
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def test_llama3():
|
| 38 |
+
_test_model('LLM-Research/Meta-Llama-3-8B-Instruct')
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def test_marco_o1():
|
| 42 |
+
_test_model('AIDC-AI/Marco-o1')
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def test_deepseek_r1_llama():
|
| 46 |
+
_test_model('deepseek-ai/DeepSeek-R1-Distill-Llama-8B')
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def test_deepseek_r1_qwen():
|
| 50 |
+
_test_model('deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B')
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def test_yi():
|
| 54 |
+
_test_model('01ai/Yi-1.5-6B-Chat')
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def test_megrez():
|
| 58 |
+
_test_model('InfiniAI/Megrez-3b-Instruct')
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def test_llama3_1():
|
| 62 |
+
_test_model('LLM-Research/Meta-Llama-3.1-8B-Instruct')
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def test_llama3_2():
|
| 66 |
+
_test_model('LLM-Research/Llama-3.2-1B-Instruct')
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def test_qwen3():
|
| 70 |
+
_test_model('Qwen/Qwen3-0.6B-Base')
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def test_qwen2_moe():
|
| 74 |
+
_test_model('Qwen/Qwen1.5-MoE-A2.7B-Chat')
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def test_qwen3_moe():
|
| 78 |
+
_test_model('Qwen/Qwen3-15B-A2B-Base')
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
if __name__ == '__main__':
|
| 82 |
+
# test_qwen2()
|
| 83 |
+
# test_llama2()
|
| 84 |
+
# test_llama3()
|
| 85 |
+
# test_marco_o1()
|
| 86 |
+
# test_deepseek_r1_llama()
|
| 87 |
+
# test_deepseek_r1_qwen()
|
| 88 |
+
# test_yi()
|
| 89 |
+
# test_megrez()
|
| 90 |
+
# test_llama3_1()
|
| 91 |
+
# test_llama3_2()
|
| 92 |
+
# test_qwen3()
|
| 93 |
+
# test_qwen2_moe()
|
| 94 |
+
test_qwen3_moe()
|
tests/megatron/test_export.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def _infer_model(pt_engine, system=None, messages=None):
|
| 7 |
+
from swift.utils import seed_everything, get_logger
|
| 8 |
+
from swift.llm import RequestConfig
|
| 9 |
+
logger = get_logger()
|
| 10 |
+
seed_everything(42)
|
| 11 |
+
request_config = RequestConfig(max_tokens=128, temperature=0)
|
| 12 |
+
if messages is None:
|
| 13 |
+
messages = []
|
| 14 |
+
if system is not None:
|
| 15 |
+
messages += [{'role': 'system', 'content': system}]
|
| 16 |
+
messages += [{'role': 'user', 'content': 'who are you?'}]
|
| 17 |
+
resp = pt_engine.infer([{'messages': messages}], request_config=request_config)
|
| 18 |
+
response = resp[0].choices[0].message.content
|
| 19 |
+
messages += [{'role': 'assistant', 'content': response}, {'role': 'user', 'content': '<image>这是什么'}]
|
| 20 |
+
else:
|
| 21 |
+
messages = messages.copy()
|
| 22 |
+
resp = pt_engine.infer([{
|
| 23 |
+
'messages': messages,
|
| 24 |
+
}], request_config=request_config)
|
| 25 |
+
response = resp[0].choices[0].message.content
|
| 26 |
+
messages += [{'role': 'assistant', 'content': response}]
|
| 27 |
+
logger.info(f'model: {pt_engine.model_info.model_name}, messages: {messages}')
|
| 28 |
+
return response
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
model_id = 'Qwen/Qwen2-7B-Instruct'
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def hf2mcore():
|
| 35 |
+
from swift.llm import export_main, ExportArguments
|
| 36 |
+
export_main(
|
| 37 |
+
ExportArguments(
|
| 38 |
+
model=model_id, to_mcore=True, torch_dtype='bfloat16', exist_ok=True, test_convert_precision=True))
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def mcore2hf():
|
| 42 |
+
from swift.llm import export_main, ExportArguments
|
| 43 |
+
export_main(
|
| 44 |
+
ExportArguments(
|
| 45 |
+
mcore_model='Qwen2-7B-Instruct-mcore',
|
| 46 |
+
to_hf=True,
|
| 47 |
+
torch_dtype='bfloat16',
|
| 48 |
+
exist_ok=True,
|
| 49 |
+
test_convert_precision=True))
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def infer_hf_align():
|
| 53 |
+
from swift.llm import PtEngine
|
| 54 |
+
pt_engine = PtEngine(model_id)
|
| 55 |
+
response = _infer_model(pt_engine)
|
| 56 |
+
pt_engine = PtEngine('Qwen2-7B-Instruct-mcore-hf')
|
| 57 |
+
response2 = _infer_model(pt_engine)
|
| 58 |
+
assert response == response2
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
if __name__ == '__main__':
|
| 62 |
+
# hf2mcore()
|
| 63 |
+
mcore2hf()
|
| 64 |
+
infer_hf_align()
|
tests/megatron/test_model.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
|
| 4 |
+
os.environ['MASTER_PORT'] = '29560'
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def get_mg_model_tokenizer(model_id):
|
| 8 |
+
from megatron.training.initialize import initialize_megatron
|
| 9 |
+
set_default_ddp_config()
|
| 10 |
+
hf_model, processor = get_model_tokenizer(model_id, torch_dtype=torch.float32)
|
| 11 |
+
megatron_model_meta = get_megatron_model_meta(processor.model_meta.model_type)
|
| 12 |
+
model_info = processor.model_info
|
| 13 |
+
kwargs = megatron_model_meta.convert_hf_config(model_info.config)
|
| 14 |
+
megatron_args = MegatronArguments(**kwargs, seq_length=1, use_cpu_initialization=True, no_initialization=True)
|
| 15 |
+
patch_megatron_tokenizer(processor)
|
| 16 |
+
extra_args = megatron_args.parse_to_megatron()
|
| 17 |
+
initialize_megatron(args_defaults=extra_args)
|
| 18 |
+
mg_model = megatron_model_meta.model_provider()
|
| 19 |
+
megatron_model_meta.convert_hf2mcore(hf_model, mg_model)
|
| 20 |
+
return hf_model, mg_model, processor
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def test_bf16_fp32():
|
| 24 |
+
hf_model_fp32, processor = get_model_tokenizer(model_id, torch_dtype=torch.float32)
|
| 25 |
+
hf_model_bf16, processor = get_model_tokenizer(model_id, torch_dtype=torch.bfloat16)
|
| 26 |
+
template = get_template(hf_model_fp32.model_meta.template, processor)
|
| 27 |
+
input_ids = template.encode(InferRequest(messages=[{'role': 'user', 'content': 'who are you?'}]))['input_ids']
|
| 28 |
+
input_ids = torch.tensor(input_ids)[None].to('cuda')
|
| 29 |
+
with torch.inference_mode():
|
| 30 |
+
hf_logits_fp32 = hf_model_fp32(input_ids).logits
|
| 31 |
+
hf_logits_bf16 = hf_model_bf16(input_ids).logits
|
| 32 |
+
mean_diff = (hf_logits_fp32 - hf_logits_bf16).abs().mean().item()
|
| 33 |
+
max_diff = (hf_logits_fp32 - hf_logits_bf16).abs().max().item()
|
| 34 |
+
# mean_diff: 0.13342587649822235, max_diff: 7.1983513832092285
|
| 35 |
+
print(f'mean_diff: {mean_diff}, max_diff: {max_diff}')
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def test_align(hf_model, mg_model, processor):
|
| 39 |
+
from megatron.training.utils import get_ltor_masks_and_position_ids
|
| 40 |
+
template = get_template(hf_model.model_meta.template, processor)
|
| 41 |
+
input_ids = template.encode(InferRequest(messages=[{'role': 'user', 'content': 'who are you?'}]))['input_ids']
|
| 42 |
+
input_ids = torch.tensor(input_ids)[None].to('cuda')
|
| 43 |
+
attention_mask, _, position_ids = get_ltor_masks_and_position_ids(input_ids, -100, True, True, True)
|
| 44 |
+
with torch.inference_mode():
|
| 45 |
+
hf_model.cuda()
|
| 46 |
+
mg_model.cuda()
|
| 47 |
+
hf_logits = hf_model(input_ids).logits
|
| 48 |
+
mg_logits = mg_model(input_ids=input_ids, attention_mask=attention_mask, position_ids=position_ids)
|
| 49 |
+
mean_diff = (mg_logits - hf_logits).abs().mean().item()
|
| 50 |
+
max_diff = (mg_logits - hf_logits).abs().max().item()
|
| 51 |
+
print(f'mean_diff: {mean_diff}, max_diff: {max_diff}')
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
model_id = 'Qwen/Qwen2-7B-Instruct'
|
| 55 |
+
|
| 56 |
+
if __name__ == '__main__':
|
| 57 |
+
import torch
|
| 58 |
+
from swift.llm import InferRequest, get_model_tokenizer, get_template
|
| 59 |
+
from swift.utils import set_default_ddp_config
|
| 60 |
+
from swift.megatron.argument import MegatronArguments
|
| 61 |
+
from swift.megatron.model import get_megatron_model_meta
|
| 62 |
+
from swift.megatron.utils import patch_megatron_tokenizer
|
| 63 |
+
# test_bf16_fp32()
|
| 64 |
+
hf_model, mg_model, processor = get_mg_model_tokenizer(model_id)
|
| 65 |
+
test_align(hf_model, mg_model, processor)
|
tests/megatron/test_save.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def get_mg_model_tokenizer():
|
| 7 |
+
model_id = 'Qwen/Qwen2.5-7B-Instruct'
|
| 8 |
+
hf_model_id = 'Qwen/Qwen2.5-7B'
|
| 9 |
+
from megatron.training.initialize import initialize_megatron
|
| 10 |
+
set_default_ddp_config()
|
| 11 |
+
hf_model, _ = get_model_tokenizer(hf_model_id, torch_dtype=torch.float32)
|
| 12 |
+
_, processor = get_model_tokenizer(model_id, load_model=False)
|
| 13 |
+
megatron_model_meta = get_megatron_model_meta(processor.model_meta.model_type)
|
| 14 |
+
model_info = processor.model_info
|
| 15 |
+
kwargs = megatron_model_meta.convert_hf_config(model_info.config)
|
| 16 |
+
megatron_args = MegatronArguments(
|
| 17 |
+
**kwargs,
|
| 18 |
+
seq_length=1,
|
| 19 |
+
use_cpu_initialization=True,
|
| 20 |
+
no_initialization=True,
|
| 21 |
+
load='Qwen2-7B-Instruct-mcore',
|
| 22 |
+
save='mcore-hf-test',
|
| 23 |
+
no_load_optim=True,
|
| 24 |
+
no_load_rng=True)
|
| 25 |
+
patch_megatron_tokenizer(processor)
|
| 26 |
+
extra_args = megatron_args.parse_to_megatron()
|
| 27 |
+
initialize_megatron(args_defaults=extra_args)
|
| 28 |
+
mg_model = megatron_model_meta.model_provider()
|
| 29 |
+
megatron_model_meta.convert_mcore2hf(hf_model, mg_model)
|
| 30 |
+
return hf_model, mg_model, processor
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def test_align(hf_model, mg_model, processor):
|
| 34 |
+
from megatron.training.utils import get_ltor_masks_and_position_ids
|
| 35 |
+
template = get_template(hf_model.model_meta.template, processor)
|
| 36 |
+
input_ids = template.encode(InferRequest(messages=[{'role': 'user', 'content': 'who are you?'}]))['input_ids']
|
| 37 |
+
input_ids = torch.tensor(input_ids)[None].to('cuda')
|
| 38 |
+
attention_mask, _, position_ids = get_ltor_masks_and_position_ids(input_ids, -100, True, True, True)
|
| 39 |
+
with torch.inference_mode():
|
| 40 |
+
hf_model.cuda()
|
| 41 |
+
mg_model.cuda()
|
| 42 |
+
hf_logits = hf_model(input_ids).logits
|
| 43 |
+
mg_logits = mg_model(input_ids=input_ids, attention_mask=attention_mask, position_ids=position_ids)
|
| 44 |
+
mean_diff = (mg_logits - hf_logits).abs().mean().item()
|
| 45 |
+
max_diff = (mg_logits - hf_logits).abs().max().item()
|
| 46 |
+
print(f'mean_diff: {mean_diff}, max_diff: {max_diff}')
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def test_save():
|
| 50 |
+
hf_model, mg_model, processor = get_mg_model_tokenizer()
|
| 51 |
+
test_align(hf_model, mg_model, processor)
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
if __name__ == '__main__':
|
| 55 |
+
import torch
|
| 56 |
+
from swift.llm import InferRequest, get_model_tokenizer, get_template
|
| 57 |
+
from swift.utils import set_default_ddp_config
|
| 58 |
+
from swift.megatron.argument import MegatronArguments
|
| 59 |
+
from swift.megatron.model import get_megatron_model_meta
|
| 60 |
+
from swift.megatron.utils import patch_megatron_tokenizer
|
| 61 |
+
test_save()
|
tests/megatron/test_train.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def test_sft():
|
| 7 |
+
from swift.megatron import megatron_sft_main, MegatronTrainArguments
|
| 8 |
+
megatron_sft_main(
|
| 9 |
+
MegatronTrainArguments(
|
| 10 |
+
load='Qwen2-7B-Instruct-mcore',
|
| 11 |
+
dataset=[
|
| 12 |
+
'AI-ModelScope/alpaca-gpt4-data-zh#500', 'swift/self-cognition#500',
|
| 13 |
+
'AI-ModelScope/alpaca-gpt4-data-en#500'
|
| 14 |
+
],
|
| 15 |
+
tensor_model_parallel_size=2,
|
| 16 |
+
train_iters=100,
|
| 17 |
+
model_author='swift',
|
| 18 |
+
model_name='swift-robot',
|
| 19 |
+
eval_iters=5,
|
| 20 |
+
finetune=True))
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def test_pt():
|
| 24 |
+
from swift.megatron import megatron_pt_main, MegatronTrainArguments
|
| 25 |
+
megatron_pt_main(
|
| 26 |
+
MegatronTrainArguments(
|
| 27 |
+
load='Qwen2-7B-mcore',
|
| 28 |
+
dataset=['AI-ModelScope/alpaca-gpt4-data-zh#500', 'AI-ModelScope/alpaca-gpt4-data-en#500'],
|
| 29 |
+
tensor_model_parallel_size=2,
|
| 30 |
+
train_iters=200,
|
| 31 |
+
eval_iters=5,
|
| 32 |
+
finetune=True))
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
if __name__ == '__main__':
|
| 36 |
+
# test_sft()
|
| 37 |
+
test_pt()
|
tests/models/test_flash_attn.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from swift.llm import get_model_tokenizer
|
| 2 |
+
|
| 3 |
+
if __name__ == '__main__':
|
| 4 |
+
# model, tokenizer = get_model_tokenizer('Qwen/Qwen2-7B-Instruct', attn_impl='flash_attn')
|
| 5 |
+
# model, tokenizer = get_model_tokenizer('AIDC-AI/Ovis2-2B', attn_impl='flash_attn')
|
| 6 |
+
# model, tokenizer = get_model_tokenizer('OpenGVLab/InternVL2-2B', attn_impl='flash_attn')
|
| 7 |
+
model, tokenizer = get_model_tokenizer('Shanghai_AI_Laboratory/internlm3-8b-instruct', attn_impl='flash_attn')
|
| 8 |
+
print(model)
|
tests/models/test_llm.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def test_llama3():
|
| 7 |
+
from swift.llm import infer_main, InferArguments
|
| 8 |
+
infer_main(
|
| 9 |
+
InferArguments(
|
| 10 |
+
model='LLM-Research/Meta-Llama-3.1-8B-Instruct',
|
| 11 |
+
max_batch_size=2,
|
| 12 |
+
val_dataset='AI-ModelScope/alpaca-gpt4-data-en#2'))
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
if __name__ == '__main__':
|
| 16 |
+
test_llama3()
|
tests/models/test_mllm.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def test_cogvlm():
|
| 7 |
+
from swift.llm import infer_main, InferArguments, sft_main, TrainArguments
|
| 8 |
+
# infer_main(InferArguments(model='ZhipuAI/cogvlm2-video-llama3-chat'))
|
| 9 |
+
sft_main(
|
| 10 |
+
TrainArguments(
|
| 11 |
+
model='ZhipuAI/cogvlm2-video-llama3-chat',
|
| 12 |
+
dataset=['AI-ModelScope/alpaca-gpt4-data-zh#200', 'swift/VideoChatGPT:Generic#200']))
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
if __name__ == '__main__':
|
| 16 |
+
test_cogvlm()
|
tests/sample/test_client.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def test_client():
|
| 5 |
+
from swift.llm import sampling_main, SamplingArguments
|
| 6 |
+
import json
|
| 7 |
+
base_url = 'https://dashscope.aliyuncs.com/compatible-mode/v1'
|
| 8 |
+
api_key = os.environ.get('OPENAI_API_KEY')
|
| 9 |
+
engine_kwargs = json.dumps({
|
| 10 |
+
'base_url': base_url,
|
| 11 |
+
'api_key': api_key,
|
| 12 |
+
})
|
| 13 |
+
dataset = 'tastelikefeet/competition_math#5'
|
| 14 |
+
system = """A conversation between User and Assistant. The user asks a question, and the Assistant solves it.
|
| 15 |
+
The assistant first thinks about the reasoning process in the mind and then provides the user
|
| 16 |
+
with the answer. The reasoning process and answer are enclosed
|
| 17 |
+
within <think> </think> and <answer> </answer> tags, respectively,
|
| 18 |
+
i.e., <think> reasoning process here </think> <answer> answer here </answer>."""
|
| 19 |
+
args = SamplingArguments(
|
| 20 |
+
sampler_type='distill',
|
| 21 |
+
sampler_engine='client',
|
| 22 |
+
model='deepseek-r1',
|
| 23 |
+
dataset=dataset,
|
| 24 |
+
num_return_sequences=1,
|
| 25 |
+
stream=True,
|
| 26 |
+
system=system,
|
| 27 |
+
temperature=0.6,
|
| 28 |
+
top_p=0.95,
|
| 29 |
+
engine_kwargs=engine_kwargs,
|
| 30 |
+
)
|
| 31 |
+
sampling_main(args)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
if __name__ == '__main__':
|
| 35 |
+
test_client()
|
tests/test_align/test_cls.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from pprint import pprint
|
| 3 |
+
|
| 4 |
+
import torch
|
| 5 |
+
|
| 6 |
+
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
|
| 7 |
+
kwargs = {
|
| 8 |
+
'per_device_train_batch_size': 4,
|
| 9 |
+
'per_device_eval_batch_size': 4,
|
| 10 |
+
'gradient_accumulation_steps': 4,
|
| 11 |
+
'num_train_epochs': 1,
|
| 12 |
+
'save_steps': 100,
|
| 13 |
+
'max_length': 512,
|
| 14 |
+
'task_type': 'seq_cls',
|
| 15 |
+
'num_labels': 2,
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def calc_acc(infer_result):
|
| 20 |
+
n_correct = 0
|
| 21 |
+
for res in infer_result:
|
| 22 |
+
if res['response'] == res['labels']:
|
| 23 |
+
n_correct += 1
|
| 24 |
+
return f'acc: {n_correct/len(infer_result)}, n_correct: {n_correct}, len(res): {len(infer_result)}'
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def test_llm():
|
| 28 |
+
from swift.llm import sft_main, TrainArguments, infer_main, InferArguments, Template
|
| 29 |
+
res = []
|
| 30 |
+
for model in ['Qwen/Qwen2.5-0.5B-Instruct', 'Qwen/Qwen2.5-0.5B', 'AI-ModelScope/bert-base-chinese']:
|
| 31 |
+
dataset = ['DAMO_NLP/jd:cls#2000']
|
| 32 |
+
result = sft_main(TrainArguments(model=model, dataset=dataset, split_dataset_ratio=0.1, **kwargs))
|
| 33 |
+
last_model_checkpoint = result['last_model_checkpoint']
|
| 34 |
+
infer_result = infer_main(
|
| 35 |
+
InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True, truncation_strategy='right'))
|
| 36 |
+
res.append(calc_acc(infer_result))
|
| 37 |
+
infer_result2 = infer_main(
|
| 38 |
+
InferArguments(
|
| 39 |
+
ckpt_dir=last_model_checkpoint, load_data_args=True, max_batch_size=16, truncation_strategy='right'))
|
| 40 |
+
res.append(calc_acc(infer_result2))
|
| 41 |
+
|
| 42 |
+
model = 'Qwen/Qwen2.5-0.5B-Instruct'
|
| 43 |
+
dataset = ['DAMO_NLP/jd#2000']
|
| 44 |
+
train_kwargs = kwargs.copy()
|
| 45 |
+
train_kwargs.pop('task_type')
|
| 46 |
+
train_kwargs.pop('num_labels')
|
| 47 |
+
result = sft_main(TrainArguments(model=model, dataset=dataset, split_dataset_ratio=0.1, **train_kwargs))
|
| 48 |
+
last_model_checkpoint = result['last_model_checkpoint']
|
| 49 |
+
infer_result = infer_main(
|
| 50 |
+
InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True, truncation_strategy='right'))
|
| 51 |
+
res.append(calc_acc(infer_result))
|
| 52 |
+
infer_result2 = infer_main(
|
| 53 |
+
InferArguments(
|
| 54 |
+
ckpt_dir=last_model_checkpoint, load_data_args=True, max_batch_size=16, truncation_strategy='right'))
|
| 55 |
+
res.append(calc_acc(infer_result2))
|
| 56 |
+
pprint(res)
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
if __name__ == '__main__':
|
| 60 |
+
test_llm()
|
tests/test_align/test_lmdeploy_vlm.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def _infer_image(model, system=None, images=None):
|
| 7 |
+
engine = LmdeployEngine(model)
|
| 8 |
+
if images is None:
|
| 9 |
+
images = ['http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/cat.png']
|
| 10 |
+
messages = []
|
| 11 |
+
if system is not None:
|
| 12 |
+
messages += [{'role': 'system', 'content': system}]
|
| 13 |
+
messages.append({'role': 'user', 'content': 'describe the image.'})
|
| 14 |
+
resp_list = engine.infer([InferRequest(messages=messages, images=images)],
|
| 15 |
+
RequestConfig(temperature=0, max_tokens=64, repetition_penalty=1.))
|
| 16 |
+
return resp_list[0].choices[0].message.content
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def _infer_image_pipeline(model, images=None, prefix='<IMAGE_TOKEN>\n'):
|
| 20 |
+
from lmdeploy import pipeline, GenerationConfig
|
| 21 |
+
from lmdeploy.vl import load_image
|
| 22 |
+
from swift.llm import safe_snapshot_download
|
| 23 |
+
gen_config = GenerationConfig(temperature=0., repetition_penalty=1., max_new_tokens=64)
|
| 24 |
+
pipe = pipeline(safe_snapshot_download(model))
|
| 25 |
+
|
| 26 |
+
image = load_image('http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/cat.png')
|
| 27 |
+
response = pipe((f'{prefix}describe the image.', image), gen_config=gen_config)
|
| 28 |
+
return response.text
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def test_internvl2_5():
|
| 32 |
+
model = 'OpenGVLab/InternVL2_5-4B'
|
| 33 |
+
response = _infer_image(model)
|
| 34 |
+
response2 = _infer_image_pipeline(model)
|
| 35 |
+
assert response == response2
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def test_internvl2():
|
| 39 |
+
model = 'OpenGVLab/InternVL2-2B'
|
| 40 |
+
response = _infer_image(model)
|
| 41 |
+
response2 = _infer_image_pipeline(model) # Missing '\n' after '<|im_end|>'
|
| 42 |
+
assert response == response2
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def test_deepseek_vl():
|
| 46 |
+
model = 'deepseek-ai/deepseek-vl-1.3b-chat'
|
| 47 |
+
response = _infer_image(model)
|
| 48 |
+
response2 = _infer_image_pipeline(model, prefix='<IMAGE_TOKEN>')
|
| 49 |
+
assert response == response2
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def test_qwen_vl():
|
| 53 |
+
model = 'Qwen/Qwen-VL-Chat'
|
| 54 |
+
response = _infer_image_pipeline(model) # Missing: 'Picture 1: '
|
| 55 |
+
response2 = _infer_image(model)
|
| 56 |
+
assert response == response2
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def test_qwen2_vl():
|
| 60 |
+
model = 'Qwen/Qwen2-VL-2B-Instruct'
|
| 61 |
+
response = _infer_image_pipeline(model, prefix='<IMAGE_TOKEN>')
|
| 62 |
+
response2 = _infer_image(model)
|
| 63 |
+
assert response == response2
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def test_qwen2_5_vl():
|
| 67 |
+
model = 'Qwen/Qwen2.5-VL-3B-Instruct'
|
| 68 |
+
response = _infer_image(model)
|
| 69 |
+
response2 = _infer_image_pipeline(model, prefix='<IMAGE_TOKEN>')
|
| 70 |
+
assert response == response2
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
if __name__ == '__main__':
|
| 74 |
+
from swift.llm import LmdeployEngine, InferRequest, RequestConfig
|
| 75 |
+
# test_internvl2()
|
| 76 |
+
# test_internvl2_5()
|
| 77 |
+
# test_deepseek_vl()
|
| 78 |
+
# test_qwen_vl()
|
| 79 |
+
# test_qwen2_vl()
|
| 80 |
+
test_qwen2_5_vl()
|