gouravchahar commited on
Commit
c6a6d99
·
verified ·
1 Parent(s): 93408e3

required files

Browse files
Files changed (8) hide show
  1. .gitattributes +1 -0
  2. README.md +64 -14
  3. app.py +71 -0
  4. news.py +47 -0
  5. output.wav +3 -0
  6. requirements.txt +0 -0
  7. scrap.py +67 -0
  8. utils.py +133 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ output.wav filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,14 +1,64 @@
1
- ---
2
- title: Alkaike
3
- emoji: 📚
4
- colorFrom: blue
5
- colorTo: pink
6
- sdk: streamlit
7
- sdk_version: 1.43.2
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- short_description: news sentiment analysis
12
- ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Alkaike
3
+ emoji: 📚
4
+ colorFrom: blue
5
+ colorTo: pink
6
+ sdk: streamlit
7
+ sdk_version: 1.43.2
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ short_description: news sentiment analysis
12
+ ---
13
+
14
+ ## News Sentiment Analysis
15
+
16
+ This project, **Alkaike**, is a Streamlit-based application designed to perform sentiment analysis on news articles. It leverages natural language processing techniques to determine the sentiment (positive, negative, or neutral) of news content. Additionally, it includes features for Hindi text-to-speech (TTS), article comparison, and summarization.
17
+
18
+ ### Features
19
+
20
+ - **Sentiment Analysis**: Analyze the sentiment of news articles in real-time.
21
+ - **Hindi Text-to-Speech (TTS)**: Convert Hindi text into speech for better accessibility.
22
+ - **Article Comparison**: Compare multiple news articles to identify similarities or differences.
23
+ - **Article Summarization**: Generate concise summaries of lengthy news articles.
24
+ - **User-Friendly Interface**: Built with Streamlit for an interactive and intuitive user experience.
25
+ - **Customizable**: Easily extendable to include additional features or datasets.
26
+
27
+ ### Installation
28
+
29
+ 1. Clone the repository:
30
+ ```bash
31
+ git clone https://github.com/gouravchahar13/alkaike.git
32
+ ```
33
+ 2. Navigate to the project directory:
34
+ ```bash
35
+ cd alkaike
36
+ ```
37
+ 3. Install the required dependencies:
38
+ ```bash
39
+ pip install -r requirements.txt
40
+ ```
41
+
42
+ ### Usage
43
+
44
+ Run the application locally:
45
+ ```bash
46
+ streamlit run app.py
47
+ ```
48
+
49
+ ### License
50
+
51
+ This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
52
+
53
+ ### Contributing
54
+
55
+ Contributions are welcome! Feel free to open issues or submit pull requests to improve the project.
56
+
57
+ ### Acknowledgments
58
+
59
+ - Built using [Streamlit](https://streamlit.io/).
60
+ - Inspired by advancements in natural language processing, sentiment analysis, and text-to-speech technologies.
61
+
62
+ ### Contact
63
+
64
+ For any inquiries or feedback, please reach out to the project maintainer.
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from news import fetch_news
3
+ from utils import get_sentiment, extract_keywords, text_to_speech, comparison_impact, summarize_text
4
+ from googletrans import Translator
5
+
6
+ # Initialize Google Translator
7
+ translator = Translator()
8
+
9
+ # Streamlit App Title
10
+ st.title("📢 News Sentiment & Keyword Analyzer with Hindi Speech & Comparison")
11
+
12
+ # User Input for Company Name
13
+ company_name = st.text_input("Enter Company Name:",placeholder="Google Tesla Apple etc")
14
+
15
+ if st.button("Fetch News & Analyze"):
16
+ st.write(f"Fetching latest news about **{company_name}**...")
17
+
18
+ # Fetch News Articles
19
+ news_data = fetch_news(company=company_name, limit=10)
20
+
21
+ if news_data:
22
+ sentiment_results = [] # Store Sentiment Results
23
+ summarized_text = "" # Combined summary for TTS
24
+ previous_article = None # Store the previous article for comparison
25
+
26
+ for article in news_data:
27
+ title = article["title"]
28
+ snippet = article["snippet"]
29
+ link = article["link"]
30
+
31
+ # Summarize title + snippet
32
+ summary = summarize_text(title + " " + snippet)
33
+
34
+ # Analyze Sentiment
35
+ sentiment = get_sentiment(summary)
36
+
37
+ # Extract Keywords
38
+ keywords = extract_keywords(summary)
39
+ keywords_display = ", ".join(keywords) if isinstance(keywords, list) else "No keywords extracted"
40
+
41
+ # Display Summarized Article with Sentiment and Keywords
42
+ st.subheader(title)
43
+ st.write(f"📰 **Summary:** {summary}")
44
+ st.write(f"🔗 [Read More]({link})")
45
+ st.write(f"🧠 **Sentiment:** {sentiment}")
46
+ st.write(f"🔑 **Keywords:** {keywords_display}")
47
+
48
+ # Compare with previous article
49
+ if previous_article:
50
+ comparison_result = comparison_impact(previous_article, summary)
51
+ st.write("📊 **Comparison Impact with Previous Article:**")
52
+ st.write(comparison_result["Impact Analysis"])
53
+
54
+ # Store current summary as previous for next iteration
55
+ previous_article = summary
56
+
57
+ sentiment_results.append((title, sentiment))
58
+ summarized_text += summary + " " # Append for TTS
59
+
60
+ # Translate Summary to Hindi
61
+ translated_summary = translator.translate(summarized_text, src="en", dest="hi").text
62
+
63
+ # Automatically Generate and Play Hindi Speech
64
+ st.write("🔊 **Generating Hindi Audio...**")
65
+ text_to_speech(translated_summary)
66
+
67
+ # Display Audio Output
68
+ st.audio("output.wav", format="audio/wav")
69
+
70
+ else:
71
+ st.error("❌ No news articles found! Try another company.")
news.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import os
3
+ import json
4
+ from dotenv import load_dotenv
5
+
6
+ # Load environment variables
7
+ load_dotenv()
8
+ API_KEY = os.getenv("RAPID_API_KEY")
9
+
10
+ # API Endpoint and Headers
11
+ URL = "https://real-time-news-data.p.rapidapi.com/search"
12
+ HEADERS = {
13
+ "x-rapidapi-key": f"{API_KEY}",
14
+ "x-rapidapi-host": "real-time-news-data.p.rapidapi.com"
15
+ }
16
+
17
+ def fetch_news(company, limit=20, country="US", lang="en", time_published="anytime"):
18
+ query_params = {
19
+ "query": company,
20
+ "limit": str(limit),
21
+ "time_published": time_published,
22
+ "country": country,
23
+ "lang": lang
24
+ }
25
+ try:
26
+ response = requests.get(URL, headers=HEADERS, params=query_params)
27
+ response.raise_for_status() # Raises an error for HTTP errors (e.g., 400, 500)
28
+
29
+ data = response.json()
30
+
31
+ if "data" not in data:
32
+ print("Error: Unexpected API response format")
33
+ return []
34
+
35
+ articles = []
36
+ for item in data["data"]:
37
+ articles.append({
38
+ "title": item.get("title", "No Title"),
39
+ "snippet": item.get("snippet", "No Snippet"),
40
+ "link": item.get("link", "#")
41
+ })
42
+ return articles
43
+
44
+ except requests.exceptions.RequestException as e:
45
+ print(f"❌ Error fetching news: {e}")
46
+ return []
47
+
output.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78ca890300aad15d27751fe16b1f38e17e1f63a14106126b4a288914b14e4113
3
+ size 4114035
requirements.txt ADDED
Binary file (254 Bytes). View file
 
scrap.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+ from bs4 import BeautifulSoup
4
+
5
+ def get_top_articles(url):
6
+ try:
7
+ response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
8
+ response.raise_for_status()
9
+
10
+ soup = BeautifulSoup(response.text, 'html.parser')
11
+
12
+ articles = []
13
+ for article in soup.select('a[href*="/articleshow/"]')[:10]:
14
+ title = article.get_text(strip=True)
15
+ link = article['href']
16
+ if not link.startswith("http"):
17
+ link = "https://timesofindia.indiatimes.com" + link
18
+ articles.append({"title": title, "link": link})
19
+
20
+ return articles
21
+ except requests.exceptions.RequestException as e:
22
+ return {"error": str(e)}
23
+
24
+ def extract_article_content(url):
25
+ try:
26
+ response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
27
+ response.raise_for_status()
28
+
29
+ soup = BeautifulSoup(response.text, 'html.parser')
30
+
31
+ title = soup.find('h1')
32
+ article_title = title.get_text(strip=True) if title else "No title found"
33
+
34
+ # Find the div with data-article="1"
35
+ article_body = soup.find('div', {'data-articlebody': "1"})
36
+
37
+ if article_body:
38
+ paragraphs = [p.get_text(strip=True) for p in article_body.find_all('div') if len(p.get_text(strip=True)) > 20]
39
+ content = "\n".join(paragraphs)
40
+ else:
41
+ content = "No content found"
42
+
43
+ return {"title": article_title, "content": content, "link": url}
44
+ except requests.exceptions.RequestException as e:
45
+ return {"title": "Error", "content": f"Error fetching content: {e}", "link": url}
46
+
47
+ if __name__ == "__main__":
48
+ url = "https://timesofindia.indiatimes.com/topic/Google"
49
+ print(f"Extracting top 10 articles from: {url}\n")
50
+ articles = get_top_articles(url)
51
+
52
+ if "error" in articles:
53
+ print("Error:", articles["error"])
54
+ else:
55
+ all_articles = []
56
+
57
+ for idx, article in enumerate(articles, start=1):
58
+ print(f"Extracting content for article {idx}: {article['title']}\n Link: {article['link']}\n")
59
+ article_data = extract_article_content(article['link'])
60
+ print(f"Heading: {article_data['title']}\n Link: {article_data['link']}\n")
61
+ print(f"Content:\n{article_data['content']}\n")
62
+ all_articles.append(article_data)
63
+
64
+ with open("articles.json", "w", encoding="utf-8") as f:
65
+ json.dump(all_articles, f, ensure_ascii=False, indent=4)
66
+
67
+ print("All articles saved to articles.json")
utils.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import os
3
+ from dotenv import load_dotenv
4
+ import time
5
+ load_dotenv()
6
+ API_KEY=os.getenv("API_KEY")
7
+ def get_sentiment(text):
8
+ API_URL = "https://api-inference.huggingface.co/models/distilbert-base-uncased-finetuned-sst-2-english"
9
+ HEADERS = {"Authorization": f"Bearer {API_KEY}"}
10
+
11
+ data = {"inputs": text}
12
+ response = requests.post(API_URL, headers=HEADERS, json=data)
13
+
14
+ try:
15
+ result = response.json()
16
+
17
+ if isinstance(result, list) and len(result) > 0 and isinstance(result[0], list):
18
+ best_label = max(result[0], key=lambda x: x["score"]) # Extract highest score
19
+ return best_label["label"]
20
+ else:
21
+ return "Error: Unexpected response format"
22
+
23
+ except requests.exceptions.JSONDecodeError:
24
+ return "Error: Empty or invalid JSON response"
25
+
26
+
27
+
28
+ def summarize_text(text, max_length=150, min_length=50):
29
+ API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
30
+ HEADERS = {"Authorization": f"Bearer {API_KEY}"}
31
+
32
+ data = {
33
+ "inputs": text,
34
+ "parameters": {"max_length": max_length, "min_length": min_length, "do_sample": False}
35
+ }
36
+
37
+ response = requests.post(API_URL, headers=HEADERS, json=data)
38
+
39
+ try:
40
+ result = response.json()
41
+ if isinstance(result, list) and "summary_text" in result[0]:
42
+ return result[0]["summary_text"] # Extract summary text
43
+ else:
44
+ return "Error: Unexpected response format"
45
+
46
+ except requests.exceptions.JSONDecodeError:
47
+ return "Error: Empty or invalid JSON response"
48
+
49
+ def extract_keywords(text, top_n=5):
50
+ API_URL = "https://api-inference.huggingface.co/models/ml6team/keyphrase-extraction-kbir-inspec"
51
+ HEADERS = {"Authorization": f"Bearer {API_KEY}"}
52
+
53
+ data = {"inputs": text}
54
+
55
+ response = requests.post(API_URL, headers=HEADERS, json=data)
56
+
57
+ try:
58
+ result = response.json()
59
+ if isinstance(result, list) and len(result) > 0:
60
+ keywords = [item["word"] for item in result[:top_n]]
61
+ return keywords
62
+ else:
63
+ return "Error: Unexpected response format"
64
+
65
+ except requests.exceptions.JSONDecodeError:
66
+ return "Error: Empty or invalid JSON response"
67
+
68
+ def text_to_speech(text):
69
+ API_URL = 'https://api-inference.huggingface.co/models/facebook/mms-tts-hin'
70
+ headers = {'Authorization': f'Bearer {API_KEY}'}
71
+ payload = {'inputs': text}
72
+ response = requests.post(API_URL, headers=headers, json=payload)
73
+ if response.status_code == 200:
74
+ with open('output.wav', 'wb') as f:
75
+ f.write(response.content)
76
+ print('Audio content written to output.wav')
77
+ else:
78
+ print(f'Error: {response.status_code}, {response.text}')
79
+
80
+
81
+
82
+
83
+ HEADERS = {"Authorization": f"Bearer {API_KEY}"}
84
+ MODELS = {
85
+ "comparison": "https://api-inference.huggingface.co/models/sentence-transformers/all-MiniLM-L6-v2",
86
+ "sentiment": "https://api-inference.huggingface.co/models/distilbert-base-uncased-finetuned-sst-2-english"
87
+ }
88
+
89
+ def request_huggingface(api_url, payload, retries=3, delay=2):
90
+ for attempt in range(retries):
91
+ try:
92
+ response = requests.post(api_url, headers=HEADERS, json=payload)
93
+
94
+ if response.status_code == 200:
95
+ return response.json()
96
+
97
+ elif response.status_code in [429, 503]: # Rate limited or service unavailable
98
+ print(f"Rate limited. Retrying in {delay} seconds...")
99
+ time.sleep(delay)
100
+ else:
101
+ print(f"Error {response.status_code}: {response.text}")
102
+ return None
103
+
104
+ except requests.exceptions.RequestException as e:
105
+ print(f"Request failed: {e}")
106
+
107
+ print("Failed to get a valid response after retries.")
108
+ return None
109
+
110
+ def comparison_impact(text1, text2):
111
+ # Comparison Analysis
112
+ comparison_payload = {"inputs": {"source_sentence": text1, "sentences": [text2]}}
113
+ comparison_result = request_huggingface(MODELS["comparison"], comparison_payload)
114
+
115
+ # Sentiment Analysis for Impact
116
+ sentiment1 = request_huggingface(MODELS["sentiment"], {"inputs": text1})
117
+ sentiment2 = request_huggingface(MODELS["sentiment"], {"inputs": text2})
118
+
119
+ if sentiment1 and sentiment2:
120
+ sentiment1_label = max(sentiment1[0], key=lambda x: x["score"])["label"]
121
+ sentiment2_label = max(sentiment2[0], key=lambda x: x["score"])["label"]
122
+
123
+ impact_analysis = f"Sentiment Shift: '{sentiment1_label}' → '{sentiment2_label}'"
124
+ else:
125
+ impact_analysis = "Error in sentiment analysis."
126
+
127
+ return {
128
+ "Comparison Result": comparison_result,
129
+ "Impact Analysis": impact_analysis
130
+ }
131
+
132
+
133
+