Spaces:
Sleeping
Sleeping
| from datasets import load_dataset | |
| import pandas as pd | |
| from langchain.schema import Document | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.vectorstores import FAISS | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
| from langchain.llms import HuggingFacePipeline | |
| from langchain.prompts import PromptTemplate | |
| def load_raw_dataset(): | |
| dataset = load_dataset("lavita/ChatDoctor-HealthCareMagic-100k") | |
| df = pd.DataFrame(dataset["train"]) | |
| df["combined"] = df["input"] + " " + df["output"] | |
| docs = [ | |
| Document( | |
| page_content=row["combined"], | |
| metadata={"question": row["input"], "answer": row["output"]}, | |
| ) | |
| for _, row in df.iterrows() | |
| ] | |
| return docs | |
| def create_vector_database(docs, model_name): | |
| embedding_model = HuggingFaceEmbeddings(model_name=model_name) | |
| vectorstore = FAISS.from_documents(docs, embedding_model) | |
| return vectorstore | |
| def get_llm(model_name): | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, torch_dtype="auto", device_map="auto" | |
| ) | |
| pipe = pipeline( | |
| "text-generation", | |
| model=model, | |
| tokenizer=tokenizer, | |
| max_new_tokens=512, | |
| temperature=0.7, | |
| do_sample=True, | |
| ) | |
| llm = HuggingFacePipeline(pipeline=pipe) | |
| return llm | |
| def get_prompt_template(): | |
| prompt_template = PromptTemplate( | |
| input_variables=["context", "question"], | |
| template="""Based on the following references and your medical knowledge, provide a detailed response: | |
| References: | |
| {context} | |
| Question: {question} | |
| By considering: | |
| 1. The key medical concepts in the question. | |
| 2. How the reference cases relate to this question. | |
| 3. What medical principles should be applied. | |
| 4. Any potential complications or considerations. | |
| Give the final response: | |
| """, | |
| ) | |
| return prompt_template | |