Spaces:

elozano
/

news-analyzer

Runtime error

App Files Files Community

elozano commited on Feb 9, 2022

Commit

685ba0e

1 Parent(s): ce5c5cb

App updated

Browse files

Files changed (5) hide show

.gitignore +1 -0
analyzer.py +80 -0
app.py +49 -43
news_pipeline.py +0 -61
pipeline.py +16 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ __pycache__

analyzer.py ADDED Viewed

	@@ -0,0 +1,80 @@

+from typing import Dict, Optional, Union
+from transformers import (
+    AutoModelForSequenceClassification,
+    AutoModelForTokenClassification,
+    AutoTokenizer,
+    TokenClassificationPipeline,
+)
+from pipeline import NewsPipeline
+CATEGORY_EMOJIS = {
+    "Automobile": "🚗",
+    "Entertainment": "🍿",
+    "Politics": "⚖️",
+    "Science": "🧪",
+    "Sports": "🏀",
+    "Technology": "💻",
+    "World": "🌍",
+}
+FAKE_EMOJIS = {"Fake": "👻", "Real": "👍"}
+CLICKBAIT_EMOJIS = {"Clickbait": "🎣", "Normal": "✅"}
+class NewsAnalyzer:
+    def __init__(
+        self,
+        category_model_name: str,
+        fake_model_name: str,
+        clickbait_model_name: str,
+        ner_model_name: str,
+    ) -> None:
+        self.category_pipe = NewsPipeline(
+            model=AutoModelForSequenceClassification.from_pretrained(
+                category_model_name
+            ),
+            tokenizer=AutoTokenizer.from_pretrained(category_model_name),
+            emojis=CATEGORY_EMOJIS,
+        )
+        self.fake_pipe = NewsPipeline(
+            model=AutoModelForSequenceClassification.from_pretrained(fake_model_name),
+            tokenizer=AutoTokenizer.from_pretrained(fake_model_name),
+            emojis=FAKE_EMOJIS,
+        )
+        self.clickbait_pipe = NewsPipeline(
+            model=AutoModelForSequenceClassification.from_pretrained(
+                clickbait_model_name
+            ),
+            tokenizer=AutoTokenizer.from_pretrained(clickbait_model_name),
+            emojis=CLICKBAIT_EMOJIS,
+        )
+        self.ner_pipe = TokenClassificationPipeline(
+            model=AutoModelForTokenClassification.from_pretrained(ner_model_name),
+            tokenizer=AutoTokenizer.from_pretrained(ner_model_name),
+            aggregation_strategy="simple",
+        )
+    def __call__(
+        self, headline: str, content: Optional[str] = None
+    ) -> Dict[str, Union[str, float]]:
+        return {
+            "category": self.category_pipe(headline=headline, content=content),
+            "fake": self.fake_pipe(headline=headline, content=content),
+            "clickbait": self.clickbait_pipe(headline=headline, content=None),
+            "ner": {
+                "headline": self.ner_pipe(headline),
+                "content": self.ner_pipe(content) if content else None,
+            },
+        }
+if __name__ == "__main__":
+    analyzer = NewsAnalyzer(
+        category_model_name="elozano/news-category",
+        fake_model_name="elozano/news-fake",
+        clickbait_model_name="elozano/news-clickbait",
+        ner_model_name="dslim/bert-base-NER",
+    )
+    prediction = analyzer(headline="Lakers Won!")
+    print(prediction)

app.py CHANGED Viewed

@@ -1,68 +1,74 @@
 import streamlit as st
 from annotated_text import annotated_text
-from news_pipeline import NewsPipeline
-CATEGORY_EMOJIS = {
-    "Automobile": "🚗",
-    "Entertainment": "🍿",
-    "Politics": "⚖️",
-    "Science": "🧪",
-    "Sports": "🏀",
-    "Technology": "💻",
-    "World": "🌍",
-}
-FAKE_EMOJIS = {"Fake": "👻", "Real": "👍"}
-CLICKBAIT_EMOJIS = {"Clickbait": "🎣", "Normal": "✅"}
-def app():
-    news_pipe = NewsPipeline()
     st.title("📰 News Analyzer")
-    headline = st.text_input("Article headline:")
-    content = st.text_area("Article content:")
-    button = st.button("Analyze")
-    if button:
-        if headline == "" and content == "":
-            st.error("Please, introduce an article headline and content.")
-        else:
-            if headline == "" or content == "":
-                st.warning(
-                    "Please, provide both headline and content to achieve better results."
-                )
-            else:
-                st.success("Article successfully analyzed!")
-            with st.spinner("Analyzing article..."):
-                prediction = news_pipe(headline, content)
-            col1, _, col2 = st.columns([2, 1, 6])
             with col1:
                 st.subheader("Analysis:")
                 st.markdown(
-                    f"{CATEGORY_EMOJIS[prediction['category']]} **Category**: {prediction['category']}"
                 )
                 st.markdown(
-                    f"{FAKE_EMOJIS[prediction['fake']]} **Fake**: {'Yes' if prediction['fake'] == 'Fake' else 'No'}"
                 )
                 st.markdown(
-                    f"{CLICKBAIT_EMOJIS[prediction['clickbait']]} **Clickbait**: {'Yes' if prediction['clickbait'] == 'Clickbait' else 'No'}"
                 )
             with col2:
-                st.subheader("Headline")
-                annotated_text(*parse_text(headline, prediction["ner"]["headline"]))
-                st.subheader("Content")
-                annotated_text(*parse_text(content, prediction["ner"]["content"]))
-def parse_text(text, prediction):
     start = 0
     parsed_text = []
-    for p in prediction:
-        parsed_text.append(text[start : p["start"]])
-        parsed_text.append((p["word"], p["entity_group"]))
-        start = p["end"]
     parsed_text.append(text[start:])
     return parsed_text
 if __name__ == "__main__":
-    app()

+from typing import Dict, List, Tuple, Union
 import streamlit as st
 from annotated_text import annotated_text
+from analyzer import NewsAnalyzer
+def run() -> None:
+    analyzer = NewsAnalyzer(
+        category_model_name="elozano/news-category",
+        fake_model_name="elozano/news-fake",
+        clickbait_model_name="elozano/news-clickbait",
+        ner_model_name="dslim/bert-base-NER",
+    )
     st.title("📰 News Analyzer")
+    headline = st.text_input("Headline:")
+    content = st.text_input("Content:")
+    if headline == "":
+        st.error("Please, provide a headline.")
+    else:
+        if content == "":
+            st.warning(
+                "Please, provide both headline and content to achieve better results."
+            )
+        button = st.button("Analyze")
+        if button:
+            predictions = analyzer(headline=headline, content=content)
+            col1, _, col2 = st.columns([2, 1, 5])
             with col1:
                 st.subheader("Analysis:")
+                category_prediction = predictions["category"]
                 st.markdown(
+                    f"{category_prediction['emoji']} **Category**: {category_prediction['label']}"
                 )
+                clickbait_prediction = predictions["clickbait"]
                 st.markdown(
+                    f"{clickbait_prediction['emoji']} **Clickbait**: {'Yes' if clickbait_prediction['label'] == 'Clickbait' else 'No'}"
                 )
+                fake_prediction = predictions["fake"]
                 st.markdown(
+                    f"{fake_prediction['emoji']} **Fake**: {'Yes' if fake_prediction['label'] == 'Fake' else 'No'}"
                 )
             with col2:
+                st.subheader("Headline:")
+                annotated_text(
+                    *parse_entities(headline, predictions["ner"]["headline"])
+                )
+                st.subheader("Content:")
+                if content:
+                    annotated_text(
+                        *parse_entities(content, predictions["ner"]["content"])
+                    )
+                else:
+                    st.error("Content not provided.")
+def parse_entities(
+    text: str, entities: Dict[str, Union[str, int]]
+) -> List[Union[str, Tuple[str, str]]]:
     start = 0
     parsed_text = []
+    for entity in entities:
+        parsed_text.append(text[start : entity["start"]])
+        parsed_text.append((entity["word"], entity["entity_group"]))
+        start = entity["end"]
     parsed_text.append(text[start:])
     return parsed_text
 if __name__ == "__main__":
+    run()

news_pipeline.py DELETED Viewed

@@ -1,61 +0,0 @@
-from typing import Dict
-from transformers import (
-    AutoModelForSequenceClassification,
-    AutoModelForTokenClassification,
-    AutoTokenizer,
-    TextClassificationPipeline,
-    TokenClassificationPipeline,
-)
-class NewsPipeline:
-    def __init__(self) -> None:
-        self.category_tokenizer = AutoTokenizer.from_pretrained("elozano/news-category")
-        self.category_pipeline = TextClassificationPipeline(
-            model=AutoModelForSequenceClassification.from_pretrained(
-                "elozano/news-category"
-            ),
-            tokenizer=self.category_tokenizer,
-        )
-        self.fake_tokenizer = AutoTokenizer.from_pretrained("elozano/news-fake")
-        self.fake_pipeline = TextClassificationPipeline(
-            model=AutoModelForSequenceClassification.from_pretrained(
-                "elozano/news-fake"
-            ),
-            tokenizer=self.fake_tokenizer,
-        )
-        self.clickbait_pipeline = TextClassificationPipeline(
-            model=AutoModelForSequenceClassification.from_pretrained(
-                "elozano/news-clickbait"
-            ),
-            tokenizer=AutoTokenizer.from_pretrained("elozano/news-clickbait"),
-        )
-        self.ner_pipeline = TokenClassificationPipeline(
-            tokenizer=AutoTokenizer.from_pretrained("dslim/bert-base-NER"),
-            model=AutoModelForTokenClassification.from_pretrained(
-                "dslim/bert-base-NER"
-            ),
-            aggregation_strategy="simple",
-        )
-    def __call__(self, headline: str, content: str) -> Dict[str, str]:
-        category_article_text = f" {self.category_tokenizer.sep_token} ".join(
-            [headline, content]
-        )
-        fake_article_text = f" {self.fake_tokenizer.sep_token} ".join(
-            [headline, content]
-        )
-        return {
-            "category": self.category_pipeline(category_article_text)[0]["label"],
-            "fake": self.fake_pipeline(fake_article_text)[0]["label"],
-            "clickbait": self.clickbait_pipeline(headline)[0]["label"],
-            "ner": {
-                "headline": list(
-                    filter(lambda x: x["score"] > 0.8, self.ner_pipeline(headline))
-                ),
-                "content": list(
-                    filter(lambda x: x["score"] > 0.8, self.ner_pipeline(content))
-                ),
-            },
-        }

pipeline.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from transformers import TextClassificationPipeline
+from typing import Dict, Optional
+class NewsPipeline(TextClassificationPipeline):
+    def __init__(self, emojis: Dict[str, str], **kwargs) -> None:
+        self.emojis = emojis
+        super().__init__(**kwargs)
+    def __call__(self, headline: str, content: Optional[str]) -> str:
+        if content:
+            text = f" {self.tokenizer.sep_token} ".join([headline, content])
+        else:
+            text = headline
+        prediction = super().__call__(text)[0]
+        return {**prediction, "emoji": self.emojis[prediction["label"]]}