Spaces:

gouravchahar
/

alkaike

Sleeping

App Files Files Community

gouravchahar commited on Mar 24

Commit

c6a6d99

verified ·

1 Parent(s): 93408e3

required files

Browse files

Files changed (8) hide show

.gitattributes +1 -0
README.md +64 -14
app.py +71 -0
news.py +47 -0
output.wav +3 -0
requirements.txt +0 -0
scrap.py +67 -0
utils.py +133 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+output.wav filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,14 +1,64 @@
----
-title: Alkaike
-emoji: 📚
-colorFrom: blue
-colorTo: pink
-sdk: streamlit
-sdk_version: 1.43.2
-app_file: app.py
-pinned: false
-license: mit
-short_description: news sentiment analysis
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: Alkaike
+emoji: 📚
+colorFrom: blue
+colorTo: pink
+sdk: streamlit
+sdk_version: 1.43.2
+app_file: app.py
+pinned: false
+license: mit
+short_description: news sentiment analysis
+---
+## News Sentiment Analysis
+This project, **Alkaike**, is a Streamlit-based application designed to perform sentiment analysis on news articles. It leverages natural language processing techniques to determine the sentiment (positive, negative, or neutral) of news content. Additionally, it includes features for Hindi text-to-speech (TTS), article comparison, and summarization.
+### Features
+- **Sentiment Analysis**: Analyze the sentiment of news articles in real-time.
+- **Hindi Text-to-Speech (TTS)**: Convert Hindi text into speech for better accessibility.
+- **Article Comparison**: Compare multiple news articles to identify similarities or differences.
+- **Article Summarization**: Generate concise summaries of lengthy news articles.
+- **User-Friendly Interface**: Built with Streamlit for an interactive and intuitive user experience.
+- **Customizable**: Easily extendable to include additional features or datasets.
+### Installation
+1. Clone the repository:
+    ```bash
+    git clone https://github.com/gouravchahar13/alkaike.git
+    ```
+2. Navigate to the project directory:
+    ```bash
+    cd alkaike
+    ```
+3. Install the required dependencies:
+    ```bash
+    pip install -r requirements.txt
+    ```
+### Usage
+Run the application locally:
+```bash
+streamlit run app.py
+```
+### License
+This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
+### Contributing
+Contributions are welcome! Feel free to open issues or submit pull requests to improve the project.
+### Acknowledgments
+- Built using [Streamlit](https://streamlit.io/).
+- Inspired by advancements in natural language processing, sentiment analysis, and text-to-speech technologies.
+### Contact
+For any inquiries or feedback, please reach out to the project maintainer.

app.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import streamlit as st
+from news import fetch_news
+from utils import get_sentiment, extract_keywords, text_to_speech, comparison_impact, summarize_text
+from googletrans import Translator
+# Initialize Google Translator
+translator = Translator()
+# Streamlit App Title
+st.title("📢 News Sentiment & Keyword Analyzer with Hindi Speech & Comparison")
+# User Input for Company Name
+company_name = st.text_input("Enter Company Name:",placeholder="Google Tesla Apple etc")
+if st.button("Fetch News & Analyze"):
+    st.write(f"Fetching latest news about **{company_name}**...")
+    # Fetch News Articles
+    news_data = fetch_news(company=company_name, limit=10)
+    if news_data:
+        sentiment_results = []  # Store Sentiment Results
+        summarized_text = ""  # Combined summary for TTS
+        previous_article = None  # Store the previous article for comparison
+        for article in news_data:
+            title = article["title"]
+            snippet = article["snippet"]
+            link = article["link"]
+            # Summarize title + snippet
+            summary = summarize_text(title + " " + snippet)
+            # Analyze Sentiment
+            sentiment = get_sentiment(summary)
+            # Extract Keywords
+            keywords = extract_keywords(summary)
+            keywords_display = ", ".join(keywords) if isinstance(keywords, list) else "No keywords extracted"
+            # Display Summarized Article with Sentiment and Keywords
+            st.subheader(title)
+            st.write(f"📰 **Summary:** {summary}")
+            st.write(f"🔗 [Read More]({link})")
+            st.write(f"🧠 **Sentiment:** {sentiment}")
+            st.write(f"🔑 **Keywords:** {keywords_display}")
+            # Compare with previous article
+            if previous_article:
+                comparison_result = comparison_impact(previous_article, summary)
+                st.write("📊 **Comparison Impact with Previous Article:**")
+                st.write(comparison_result["Impact Analysis"])
+            # Store current summary as previous for next iteration
+            previous_article = summary
+            sentiment_results.append((title, sentiment))
+            summarized_text += summary + " "  # Append for TTS
+        # Translate Summary to Hindi
+        translated_summary = translator.translate(summarized_text, src="en", dest="hi").text
+        # Automatically Generate and Play Hindi Speech
+        st.write("🔊 **Generating Hindi Audio...**")
+        text_to_speech(translated_summary)
+        # Display Audio Output
+        st.audio("output.wav", format="audio/wav")
+    else:
+        st.error("❌ No news articles found! Try another company.")

news.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import requests
+import os
+import json
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+API_KEY = os.getenv("RAPID_API_KEY")
+# API Endpoint and Headers
+URL = "https://real-time-news-data.p.rapidapi.com/search"
+HEADERS = {
+    "x-rapidapi-key": f"{API_KEY}",
+    "x-rapidapi-host": "real-time-news-data.p.rapidapi.com"
+}
+def fetch_news(company, limit=20, country="US", lang="en", time_published="anytime"):
+    query_params = {
+        "query": company,
+        "limit": str(limit),
+        "time_published": time_published,
+        "country": country,
+        "lang": lang
+    }
+    try:
+        response = requests.get(URL, headers=HEADERS, params=query_params)
+        response.raise_for_status()  # Raises an error for HTTP errors (e.g., 400, 500)
+        data = response.json()
+        if "data" not in data:
+            print("Error: Unexpected API response format")
+            return []
+        articles = []
+        for item in data["data"]:
+            articles.append({
+                "title": item.get("title", "No Title"),
+                "snippet": item.get("snippet", "No Snippet"),
+                "link": item.get("link", "#")
+            })
+        return articles
+    except requests.exceptions.RequestException as e:
+        print(f"❌ Error fetching news: {e}")
+        return []

output.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:78ca890300aad15d27751fe16b1f38e17e1f63a14106126b4a288914b14e4113
+size 4114035

requirements.txt ADDED Viewed

Binary file (254 Bytes). View file

scrap.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import requests
+import json
+from bs4 import BeautifulSoup
+def get_top_articles(url):
+    try:
+        response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
+        response.raise_for_status()
+        soup = BeautifulSoup(response.text, 'html.parser')
+        articles = []
+        for article in soup.select('a[href*="/articleshow/"]')[:10]:
+            title = article.get_text(strip=True)
+            link = article['href']
+            if not link.startswith("http"):
+                link = "https://timesofindia.indiatimes.com" + link
+            articles.append({"title": title, "link": link})
+        return articles
+    except requests.exceptions.RequestException as e:
+        return {"error": str(e)}
+def extract_article_content(url):
+    try:
+        response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
+        response.raise_for_status()
+        soup = BeautifulSoup(response.text, 'html.parser')
+        title = soup.find('h1')
+        article_title = title.get_text(strip=True) if title else "No title found"
+        # Find the div with data-article="1"
+        article_body = soup.find('div', {'data-articlebody': "1"})
+        if article_body:
+            paragraphs = [p.get_text(strip=True) for p in article_body.find_all('div') if len(p.get_text(strip=True)) > 20]
+            content = "\n".join(paragraphs)
+        else:
+            content = "No content found"
+        return {"title": article_title, "content": content, "link": url}
+    except requests.exceptions.RequestException as e:
+        return {"title": "Error", "content": f"Error fetching content: {e}", "link": url}
+if __name__ == "__main__":
+    url = "https://timesofindia.indiatimes.com/topic/Google"
+    print(f"Extracting top 10 articles from: {url}\n")
+    articles = get_top_articles(url)
+    if "error" in articles:
+        print("Error:", articles["error"])
+    else:
+        all_articles = []
+        for idx, article in enumerate(articles, start=1):
+            print(f"Extracting content for article {idx}: {article['title']}\n   Link: {article['link']}\n")
+            article_data = extract_article_content(article['link'])
+            print(f"Heading: {article_data['title']}\n   Link: {article_data['link']}\n")
+            print(f"Content:\n{article_data['content']}\n")
+            all_articles.append(article_data)
+        with open("articles.json", "w", encoding="utf-8") as f:
+            json.dump(all_articles, f, ensure_ascii=False, indent=4)
+        print("All articles saved to articles.json")

utils.py ADDED Viewed

	@@ -0,0 +1,133 @@

+import requests
+import os
+from dotenv import load_dotenv
+import time
+load_dotenv()
+API_KEY=os.getenv("API_KEY")
+def get_sentiment(text):
+    API_URL = "https://api-inference.huggingface.co/models/distilbert-base-uncased-finetuned-sst-2-english"
+    HEADERS = {"Authorization": f"Bearer {API_KEY}"}
+    data = {"inputs": text}
+    response = requests.post(API_URL, headers=HEADERS, json=data)
+    try:
+        result = response.json()
+        if isinstance(result, list) and len(result) > 0 and isinstance(result[0], list):
+            best_label = max(result[0], key=lambda x: x["score"])  # Extract highest score
+            return best_label["label"]
+        else:
+            return "Error: Unexpected response format"
+    except requests.exceptions.JSONDecodeError:
+        return "Error: Empty or invalid JSON response"
+def summarize_text(text, max_length=150, min_length=50):
+    API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
+    HEADERS = {"Authorization": f"Bearer {API_KEY}"}
+    data = {
+        "inputs": text,
+        "parameters": {"max_length": max_length, "min_length": min_length, "do_sample": False}
+    }
+    response = requests.post(API_URL, headers=HEADERS, json=data)
+    try:
+        result = response.json()
+        if isinstance(result, list) and "summary_text" in result[0]:
+            return result[0]["summary_text"]  # Extract summary text
+        else:
+            return "Error: Unexpected response format"
+    except requests.exceptions.JSONDecodeError:
+        return "Error: Empty or invalid JSON response"
+def extract_keywords(text, top_n=5):
+    API_URL = "https://api-inference.huggingface.co/models/ml6team/keyphrase-extraction-kbir-inspec"
+    HEADERS = {"Authorization": f"Bearer {API_KEY}"}
+    data = {"inputs": text}
+    response = requests.post(API_URL, headers=HEADERS, json=data)
+    try:
+        result = response.json()
+        if isinstance(result, list) and len(result) > 0:
+            keywords = [item["word"] for item in result[:top_n]]
+            return keywords
+        else:
+            return "Error: Unexpected response format"
+    except requests.exceptions.JSONDecodeError:
+        return "Error: Empty or invalid JSON response"
+def text_to_speech(text):
+    API_URL = 'https://api-inference.huggingface.co/models/facebook/mms-tts-hin'
+    headers = {'Authorization': f'Bearer {API_KEY}'}
+    payload = {'inputs': text}
+    response = requests.post(API_URL, headers=headers, json=payload)
+    if response.status_code == 200:
+        with open('output.wav', 'wb') as f:
+            f.write(response.content)
+        print('Audio content written to output.wav')
+    else:
+        print(f'Error: {response.status_code}, {response.text}')
+HEADERS = {"Authorization": f"Bearer {API_KEY}"}
+MODELS = {
+    "comparison": "https://api-inference.huggingface.co/models/sentence-transformers/all-MiniLM-L6-v2",
+    "sentiment": "https://api-inference.huggingface.co/models/distilbert-base-uncased-finetuned-sst-2-english"
+}
+def request_huggingface(api_url, payload, retries=3, delay=2):
+    for attempt in range(retries):
+        try:
+            response = requests.post(api_url, headers=HEADERS, json=payload)
+            if response.status_code == 200:
+                return response.json()
+            elif response.status_code in [429, 503]:  # Rate limited or service unavailable
+                print(f"Rate limited. Retrying in {delay} seconds...")
+                time.sleep(delay)
+            else:
+                print(f"Error {response.status_code}: {response.text}")
+                return None
+        except requests.exceptions.RequestException as e:
+            print(f"Request failed: {e}")
+    print("Failed to get a valid response after retries.")
+    return None
+def comparison_impact(text1, text2):
+    # Comparison Analysis
+    comparison_payload = {"inputs": {"source_sentence": text1, "sentences": [text2]}}
+    comparison_result = request_huggingface(MODELS["comparison"], comparison_payload)
+    # Sentiment Analysis for Impact
+    sentiment1 = request_huggingface(MODELS["sentiment"], {"inputs": text1})
+    sentiment2 = request_huggingface(MODELS["sentiment"], {"inputs": text2})
+    if sentiment1 and sentiment2:
+        sentiment1_label = max(sentiment1[0], key=lambda x: x["score"])["label"]
+        sentiment2_label = max(sentiment2[0], key=lambda x: x["score"])["label"]
+        impact_analysis = f"Sentiment Shift: '{sentiment1_label}' → '{sentiment2_label}'"
+    else:
+        impact_analysis = "Error in sentiment analysis."
+    return {
+        "Comparison Result": comparison_result,
+        "Impact Analysis": impact_analysis
+    }