update: zeroclue
Browse files- README.md +86 -0
- pytorch_model.bin +1 -1
README.md
CHANGED
|
@@ -99,6 +99,92 @@ example_dict={
|
|
| 99 |
|
| 100 |
"生成式摘要":{"text_a":"针对传统的流量分类管理系统存在不稳定、结果反馈不及时、分类结果显示不直观等问题,设计一个基于web的在线的流量分类管理系统.该系统采用流中前5个包(排除3次握手包)所含信息作为特征值计算资源,集成一种或多种分类算法用于在线网络流量分类,应用数据可视化技术处理分类结果.实验表明:在采用适应在线分类的特征集和c4.5决策树算法做分类时,系统能快速做出分类,且精度达到94%以上;数据可视化有助于人机交互,改善分类指导."}
|
| 101 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
```
|
| 103 |
|
| 104 |
## 预训练或微调 prtrain or finetune
|
|
|
|
| 99 |
|
| 100 |
"生成式摘要":{"text_a":"针对传统的流量分类管理系统存在不稳定、结果反馈不及时、分类结果显示不直观等问题,设计一个基于web的在线的流量分类管理系统.该系统采用流中前5个包(排除3次握手包)所含信息作为特征值计算资源,集成一种或多种分类算法用于在线网络流量分类,应用数据可视化技术处理分类结果.实验表明:在采用适应在线分类的特征集和c4.5决策树算法做分类时,系统能快速做出分类,且精度达到94%以上;数据可视化有助于人机交互,改善分类指导."}
|
| 101 |
}
|
| 102 |
+
|
| 103 |
+
# 构造prompt的过程中,verbalizer这个占位key的内容,是通过 "/".join(choices) 拼接起来
|
| 104 |
+
dataset2instruction = {
|
| 105 |
+
"情感分析": {
|
| 106 |
+
"prompt": "{}任务:【{}】这篇文章的情感态度是什么?{}",
|
| 107 |
+
"keys_order": ["subtask_type","text_a", "verbalizer"],
|
| 108 |
+
"data_type": "classification",
|
| 109 |
+
},
|
| 110 |
+
"文本分类": {
|
| 111 |
+
"prompt": "{}任务:【{}】这篇文章的类别是什么?{}",
|
| 112 |
+
"keys_order": ["subtask_type","text_a", "verbalizer"],
|
| 113 |
+
"data_type": "classification",
|
| 114 |
+
},
|
| 115 |
+
"新闻分类": {
|
| 116 |
+
"prompt": "{}任务:【{}】这篇文章的类别是什么?{}",
|
| 117 |
+
"keys_order": ["subtask_type","text_a", "verbalizer"],
|
| 118 |
+
"data_type": "classification",
|
| 119 |
+
},
|
| 120 |
+
"意图识别": {
|
| 121 |
+
"prompt": "{}任务:【{}】这句话的意图是什么?{}",
|
| 122 |
+
"keys_order": ["subtask_type","text_a", "verbalizer"],
|
| 123 |
+
"data_type": "classification",
|
| 124 |
+
},
|
| 125 |
+
# --------------------
|
| 126 |
+
"自然语言推理": {
|
| 127 |
+
"prompt": "{}任务:【{}】和【{}】,以上两句话的逻辑关系是什么?{}",
|
| 128 |
+
"keys_order": ["subtask_type","text_a", "text_b", "verbalizer"],
|
| 129 |
+
"data_type": "classification",
|
| 130 |
+
},
|
| 131 |
+
"语义匹配": {
|
| 132 |
+
"prompt": "{}任务:【{}】和【{}】,以上两句话的内容是否相似?{}",
|
| 133 |
+
"keys_order": ["subtask_type","text_a", "text_b", "verbalizer"],
|
| 134 |
+
"data_type": "classification",
|
| 135 |
+
},
|
| 136 |
+
# -----------------------
|
| 137 |
+
"指代消解": {
|
| 138 |
+
"prompt": "{}任务:文章【{}】中{}{}",
|
| 139 |
+
"keys_order": ["subtask_type","text_a", "question", "verbalizer"],
|
| 140 |
+
"data_type": "classification",
|
| 141 |
+
},
|
| 142 |
+
"多项选择": {
|
| 143 |
+
"prompt": "{}任务:阅读文章【{}】问题【{}】?{}",
|
| 144 |
+
"keys_order": ["subtask_type","text_a", "question", "verbalizer"],
|
| 145 |
+
"data_type": "classification",
|
| 146 |
+
},
|
| 147 |
+
# ------------------------
|
| 148 |
+
"抽取式阅读理解": {
|
| 149 |
+
"prompt": "{}任务:阅读文章【{}】问题【{}】的答案是什么?",
|
| 150 |
+
"keys_order": ["subtask_type","text_a", "question"],
|
| 151 |
+
"data_type": "mrc",
|
| 152 |
+
},
|
| 153 |
+
"实体识别": {
|
| 154 |
+
"prompt": "{}任务:找出【{}】这篇文章中所有【{}】类型的实体?",
|
| 155 |
+
"keys_order": ["subtask_type","text_a", "question"],
|
| 156 |
+
"data_type": "ner",
|
| 157 |
+
},
|
| 158 |
+
# ------------------------
|
| 159 |
+
"关键词抽取": {
|
| 160 |
+
"prompt": "{}任务:【{}】这篇文章的关键词是什么?",
|
| 161 |
+
"keys_order": ["subtask_type","text_a"],
|
| 162 |
+
"data_type": "keys",
|
| 163 |
+
},
|
| 164 |
+
"关键词识别":{
|
| 165 |
+
"prompt": "{}任务:阅读文章【{}】问题【{}】{}",
|
| 166 |
+
"keys_order": ["subtask_type","text_a","question","verbalizer"],
|
| 167 |
+
"data_type": "classification",
|
| 168 |
+
},
|
| 169 |
+
"生成式摘要": {
|
| 170 |
+
"prompt": "{}任务:【{}】这篇文章的摘要是什么?",
|
| 171 |
+
"keys_order": ["subtask_type","text_a"],
|
| 172 |
+
"data_type": "summ",
|
| 173 |
+
},
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
def get_instruction(sample):
|
| 177 |
+
|
| 178 |
+
template = dataset2instruction[sample["subtask_type"]]
|
| 179 |
+
# print(template)
|
| 180 |
+
# print(sample)
|
| 181 |
+
sample["instruction"] = template["prompt"].format(*[
|
| 182 |
+
sample[k] for k in template["keys_order"]
|
| 183 |
+
])
|
| 184 |
+
|
| 185 |
+
print(sample["instruction"])
|
| 186 |
+
|
| 187 |
+
return sample["instruction"]
|
| 188 |
```
|
| 189 |
|
| 190 |
## 预训练或微调 prtrain or finetune
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 229158725
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e9ddf252616a1356efef6c7f2ef21143aa8f9242bd9758cd4489aa3f4a87ec3e
|
| 3 |
size 229158725
|