Spaces:
Runtime error
Runtime error
DeDeckerThomas
commited on
Commit
Β·
31decce
1
Parent(s):
8339421
Test new layout
Browse files
app.py
CHANGED
|
@@ -52,7 +52,7 @@ def get_annotated_text(text, keyphrases):
|
|
| 52 |
rf"$K:{keyphrases.index(keyphrase)}\2",
|
| 53 |
text,
|
| 54 |
flags=re.I,
|
| 55 |
-
count=1
|
| 56 |
)
|
| 57 |
|
| 58 |
result = []
|
|
@@ -91,7 +91,7 @@ def get_annotated_text(text, keyphrases):
|
|
| 91 |
|
| 92 |
|
| 93 |
def rerender_output(layout):
|
| 94 |
-
layout.
|
| 95 |
if (
|
| 96 |
len(st.session_state.keyphrases) > 0
|
| 97 |
and len(st.session_state.selected_rows) == 0
|
|
@@ -142,31 +142,65 @@ if "select_rows" not in st.session_state:
|
|
| 142 |
st.set_page_config(
|
| 143 |
page_icon="π",
|
| 144 |
page_title="Keyphrase extraction/generation with Transformers",
|
| 145 |
-
layout="
|
| 146 |
)
|
| 147 |
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
|
| 152 |
-
|
| 153 |
-
"Choose your model:",
|
| 154 |
-
st.session_state.config.get("models"),
|
| 155 |
-
)
|
| 156 |
-
st.session_state.chosen_model = chosen_model
|
| 157 |
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
)
|
| 162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
|
| 164 |
-
st.
|
| 165 |
-
|
| 166 |
-
).replace("\n", " ")
|
| 167 |
-
with st.spinner("Extracting keyphrases..."):
|
| 168 |
-
pressed = col1.button("Extract", on_click=extract_keyphrases)
|
| 169 |
|
|
|
|
|
|
|
| 170 |
|
| 171 |
if len(st.session_state.data_frame.columns) > 0:
|
| 172 |
st.subheader("π History")
|
|
@@ -182,6 +216,3 @@ if len(st.session_state.data_frame.columns) > 0:
|
|
| 182 |
update_mode=GridUpdateMode.SELECTION_CHANGED,
|
| 183 |
)
|
| 184 |
st.session_state.selected_rows = pd.DataFrame(data["selected_rows"])
|
| 185 |
-
|
| 186 |
-
if len(st.session_state.selected_rows) > 0 or len(st.session_state.keyphrases) > 0:
|
| 187 |
-
rerender_output(col2)
|
|
|
|
| 52 |
rf"$K:{keyphrases.index(keyphrase)}\2",
|
| 53 |
text,
|
| 54 |
flags=re.I,
|
| 55 |
+
count=1,
|
| 56 |
)
|
| 57 |
|
| 58 |
result = []
|
|
|
|
| 91 |
|
| 92 |
|
| 93 |
def rerender_output(layout):
|
| 94 |
+
layout.write("βοΈ Output")
|
| 95 |
if (
|
| 96 |
len(st.session_state.keyphrases) > 0
|
| 97 |
and len(st.session_state.selected_rows) == 0
|
|
|
|
| 142 |
st.set_page_config(
|
| 143 |
page_icon="π",
|
| 144 |
page_title="Keyphrase extraction/generation with Transformers",
|
| 145 |
+
layout="centered",
|
| 146 |
)
|
| 147 |
|
| 148 |
+
with open("css/style.css") as f:
|
| 149 |
+
st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
|
|
|
|
| 150 |
|
| 151 |
+
st.header("π Keyphrase extraction/generation with Transformers")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
|
| 153 |
+
description = """
|
| 154 |
+
Keyphrase extraction is a technique in text analysis where you extract the important keyphrases
|
| 155 |
+
from a text. Since this is a time-consuming process, Artificial Intelligence is used to automate it.
|
| 156 |
+
Currently, classical machine learning methods, that use statistics and linguistics, are widely used
|
| 157 |
+
for the extraction process. The fact that these methods have been widely used in the community has
|
| 158 |
+
the advantage that there are many easy-to-use libraries. Now with the recent innovations in
|
| 159 |
+
deep learning methods (such as recurrent neural networks and transformers, GANS, β¦),
|
| 160 |
+
keyphrase extraction can be improved. These new methods also focus on the semantics and
|
| 161 |
+
context of a document, which is quite an improvement.
|
| 162 |
+
|
| 163 |
+
This space gives you the ability to test around with some keyphrase extraction and generation models.
|
| 164 |
+
Keyphrase extraction models are transformers models fine-tuned as a token classification problem where
|
| 165 |
+
the tokens in a text are annotated as:
|
| 166 |
+
|
| 167 |
+
* B: Beginning of a keyphrase
|
| 168 |
+
* I: Inside a keyphrases
|
| 169 |
+
* O: Outside a keyhprase.
|
| 170 |
+
|
| 171 |
+
While keyphrase extraction can only extract keyphrases from a given text. Keyphrase generation models
|
| 172 |
+
work a bit differently. Here you use an encoder-decoder model like BART to generate keyphrases from a given text.
|
| 173 |
+
These models also have the ability to generate keyphrases, which are not present in the text π€―.
|
| 174 |
+
|
| 175 |
+
Do you want to see some magic π§ββοΈ? Try it out yourself! π
|
| 176 |
+
"""
|
| 177 |
+
|
| 178 |
+
st.write(description)
|
| 179 |
+
|
| 180 |
+
with st.form("test"):
|
| 181 |
+
chosen_model = st.selectbox(
|
| 182 |
+
"Choose your model:",
|
| 183 |
+
st.session_state.config.get("models"),
|
| 184 |
+
)
|
| 185 |
+
st.session_state.chosen_model = chosen_model
|
| 186 |
+
st.markdown(
|
| 187 |
+
f"For more information about the chosen model, please be sure to check it out the [π€ Model Card](https://huggingface.co/DeDeckerThomas/{chosen_model})."
|
| 188 |
)
|
| 189 |
|
| 190 |
+
with st.spinner("Loading pipeline..."):
|
| 191 |
+
pipe = load_pipeline(
|
| 192 |
+
f"{st.session_state.config.get('model_author')}/{st.session_state.chosen_model}"
|
| 193 |
+
)
|
| 194 |
+
|
| 195 |
+
st.session_state.input_text = st.text_area(
|
| 196 |
+
"β Input", st.session_state.config.get("example_text"), height=300
|
| 197 |
+
).replace("\n", " ")
|
| 198 |
|
| 199 |
+
with st.spinner("Extracting keyphrases..."):
|
| 200 |
+
pressed = st.form_submit_button("Extract", on_click=extract_keyphrases)
|
|
|
|
|
|
|
|
|
|
| 201 |
|
| 202 |
+
if len(st.session_state.selected_rows) > 0 or len(st.session_state.keyphrases) > 0:
|
| 203 |
+
rerender_output(st)
|
| 204 |
|
| 205 |
if len(st.session_state.data_frame.columns) > 0:
|
| 206 |
st.subheader("π History")
|
|
|
|
| 216 |
update_mode=GridUpdateMode.SELECTION_CHANGED,
|
| 217 |
)
|
| 218 |
st.session_state.selected_rows = pd.DataFrame(data["selected_rows"])
|
|
|
|
|
|
|
|
|
css/style.css
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
@import url('https://fonts.googleapis.com/css2?family=Roboto:ital,wght@0,100;0,300;0,400;0,500;0,700;0,900;1,100;1,300;1,400;1,500;1,700;1,900&display=swap');
|
| 2 |
+
|
| 3 |
+
body {
|
| 4 |
+
font-family: 'Roboto', 'Source Sans Pro', sans-serif;
|
| 5 |
+
}
|
pipelines/__pycache__/keyphrase_extraction_pipeline.cpython-39.pyc
CHANGED
|
Binary files a/pipelines/__pycache__/keyphrase_extraction_pipeline.cpython-39.pyc and b/pipelines/__pycache__/keyphrase_extraction_pipeline.cpython-39.pyc differ
|
|
|
pipelines/__pycache__/keyphrase_generation_pipeline.cpython-39.pyc
CHANGED
|
Binary files a/pipelines/__pycache__/keyphrase_generation_pipeline.cpython-39.pyc and b/pipelines/__pycache__/keyphrase_generation_pipeline.cpython-39.pyc differ
|
|
|