Spaces:
Build error
Build error
| import streamlit as st | |
| from fastai.collab import * | |
| import torch | |
| from torch import nn | |
| import pickle | |
| import pandas as pd | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| import sentencepiece | |
| import string | |
| import requests | |
| def load_stuff(): | |
| # Load the data loader | |
| dls = pd.read_pickle("dataloader.pkl") | |
| # Create an instance of the model | |
| learn = collab_learner(dls, use_nn=True, layers=[20, 10], y_range=(0, 10.5)) | |
| # Load the saved state dictionary | |
| state_dict = torch.load("myModel.pth", map_location=torch.device("cpu")) | |
| # Assign the loaded state dictionary to the model's load_state_dict() method | |
| learn.model.load_state_dict(state_dict) | |
| # load books dataframe | |
| books = pd.read_csv("./data/BX_Books.csv", sep=";", encoding="latin-1") | |
| # load tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained("pszemraj/pegasus-x-large-book-summary") | |
| # load model | |
| model = AutoModelForSeq2SeqLM.from_pretrained( | |
| "pszemraj/pegasus-x-large-book-summary" | |
| ) | |
| return dls, learn, books, tokenizer, model | |
| dls, learn, books, tokenizer, model = load_stuff() | |
| # function to get recommendations | |
| def get_3_recs(book): | |
| book_factors = learn.model.embeds[1].weight | |
| idx = dls.classes["title"].o2i[book] | |
| distances = nn.CosineSimilarity(dim=1)(book_factors, book_factors[idx][None]) | |
| idxs = distances.argsort(descending=True)[1:4] | |
| recs = [dls.classes["title"][i] for i in idxs] | |
| return recs | |
| # function to get descriptions from Google Books | |
| def search_book_description(title): | |
| # Google Books API endpoint for book search | |
| url = "https://www.googleapis.com/books/v1/volumes" | |
| # Parameters for the book search | |
| params = {"q": title, "maxResults": 1} | |
| # Send GET request to Google Books API | |
| response = requests.get(url, params=params) | |
| # Check if the request was successful | |
| if response.status_code == 200: | |
| # Parse the JSON response to extract the book description | |
| data = response.json() | |
| if "items" in data and len(data["items"]) > 0: | |
| book_description = data["items"][0]["volumeInfo"].get( | |
| "description", "No description available." | |
| ) | |
| return book_description | |
| else: | |
| print("No book found with the given title.") | |
| return None | |
| else: | |
| # If the request failed, print the error message | |
| print("Error:", response.status_code, response.text) | |
| return None | |
| # function to ensure summaries end with punctuation | |
| def cut(sum): | |
| last_punc_idx = max(sum.rfind(".")) | |
| output = sum[: last_punc_idx + 1] | |
| return output | |
| # function to summarize | |
| def summarize(des_list): | |
| if "No description available." in des_list: | |
| idx = des_list.index("No description available.") | |
| des = des_list.copy() | |
| des.pop(idx) | |
| rest = summarize(des) | |
| rest.insert(idx, "No description available.") | |
| return rest | |
| else: | |
| # Tokenize all the descriptions in the list | |
| encoded_inputs = tokenizer( | |
| des_list, truncation=True, padding="longest", return_tensors="pt" | |
| ) | |
| # Generate summaries for all the inputs | |
| summaries = model.generate(**encoded_inputs, max_new_tokens=100) | |
| # Decode the summaries and process them | |
| outputs = tokenizer.batch_decode(summaries, skip_special_tokens=True) | |
| outputs = list(map(cut, outputs)) | |
| return outputs | |
| # function to get cover images | |
| def get_covers(recs): | |
| imgs = [books[books["Book-Title"] == r]["Image-URL-L"].tolist()[0] for r in recs] | |
| return imgs | |
| # streamlit app construction | |
| st.title("Your digital librarian") | |
| st.markdown( | |
| "Hi there! I recommend you books based on one you love (which might not be in the same genre because that's boring) and give you my own synopsis of each book. Enjoy!" | |
| ) | |
| options = books["Book-Title"].tolist() | |
| input = st.selectbox("Select your favorite book", options) | |
| if st.button("Get recommendations"): | |
| recs = get_3_recs(input) | |
| descriptions = list(map(search_book_description, recs)) | |
| des_sums = summarize(descriptions) | |
| imgs = get_covers(recs) | |
| col1, col2, col3 = st.columns(3) | |
| col1.image(imgs[0]) | |
| col1.markdown(f"**{recs[0]}**") | |
| col1.write(des_sums[0]) | |
| col2.image(imgs[1]) | |
| col2.markdown(f"**{recs[1]}**") | |
| col2.write(des_sums[1]) | |
| col3.image(imgs[2]) | |
| col3.markdown(f"**{recs[2]}**") | |
| col3.write(des_sums[2]) | |