Spaces:
Running
Running
Commit
·
46450b0
1
Parent(s):
0edd515
Update app.py
Browse files
app.py
CHANGED
|
@@ -50,7 +50,7 @@ api_url = 'https://a02q342s5b.execute-api.us-east-2.amazonaws.com/reinvent-demo-
|
|
| 50 |
|
| 51 |
|
| 52 |
# Creating Tabs
|
| 53 |
-
tab1, tab2, tab3 = st.tabs(["Image Generation", "Architecture", "Code"])
|
| 54 |
|
| 55 |
with tab1:
|
| 56 |
# Create two columns for layout
|
|
@@ -74,7 +74,7 @@ with tab1:
|
|
| 74 |
"A lost city rediscovered in the Amazon jungle, overgrown with plants, in the style of a vintage travel poster",
|
| 75 |
"A steampunk train emitting clouds of steam as it races through a mountain pass, digital art",
|
| 76 |
"An enchanted forest with bioluminescent trees and fairies dancing, in a Studio Ghibli style",
|
| 77 |
-
"A
|
| 78 |
"A post-apocalyptic Tokyo with nature reclaiming skyscrapers, in the style of a concept art",
|
| 79 |
"A mythical phoenix rising from ashes, vibrant colors, with a nebula in the background",
|
| 80 |
"A cybernetic wolf in a neon-lit city, cyberpunk theme, rain-drenched streets",
|
|
@@ -150,78 +150,78 @@ with tab1:
|
|
| 150 |
else:
|
| 151 |
st.error(f"Error: {response_one.text}")
|
| 152 |
|
| 153 |
-
# with
|
| 154 |
|
| 155 |
-
# st.title('Llama 2 7B Text Generation with AWS Inferentia 2')
|
| 156 |
-
|
| 157 |
-
# params = {
|
| 158 |
-
# "do_sample" : True,
|
| 159 |
-
# "top_p": 0.6,
|
| 160 |
-
# "temperature": 0.9,
|
| 161 |
-
# "top_k": 50,
|
| 162 |
-
# "max_new_tokens": 512,
|
| 163 |
-
# "repetition_penalty": 1.03,
|
| 164 |
-
# }
|
| 165 |
-
|
| 166 |
-
# if "messages" not in st.session_state:
|
| 167 |
-
# st.session_state.messages = [
|
| 168 |
-
# {"role": "system", "content": "You are a helpful Travel Planning Assistant. You respond with only 1-2 sentences."},
|
| 169 |
-
# {'role': 'user', 'content': 'Where can I travel in the fall for cloudy, rainy, and beautiful views?'},
|
| 170 |
-
# ]
|
| 171 |
-
|
| 172 |
-
# for message in st.session_state.messages:
|
| 173 |
-
# with st.chat_message(message["role"]):
|
| 174 |
-
# st.markdown(message["content"])
|
| 175 |
-
|
| 176 |
-
# with st.chat_message("assistant"):
|
| 177 |
-
# message_placeholder = st.empty()
|
| 178 |
-
# full_response = ""
|
| 179 |
-
# prompt_input_one = {
|
| 180 |
-
# "prompt": st.session_state.messages,
|
| 181 |
-
# "parameters": params,
|
| 182 |
-
# "endpoint": "huggingface-pytorch-inference-neuronx-2023-11-28-16-09-51-708"
|
| 183 |
-
# }
|
| 184 |
-
|
| 185 |
-
# response_one = requests.post(api_url, json=prompt_input_one)
|
| 186 |
-
|
| 187 |
-
# if response_one.status_code == 200:
|
| 188 |
-
# result_one = response_one.json()
|
| 189 |
-
# # st.success(f"Prediction result: {result}")
|
| 190 |
-
# full_response += result_one["generation"]
|
| 191 |
-
# else:
|
| 192 |
-
# st.error(f"Error: {response_one.text}")
|
| 193 |
-
|
| 194 |
-
# message_placeholder.markdown(full_response)
|
| 195 |
-
# st.session_state.messages.append({"role": "assistant", "content": full_response})
|
| 196 |
-
|
| 197 |
-
# if prompt := st.chat_input("What is up?"):
|
| 198 |
-
# st.session_state.messages.append({"role": "user", "content": prompt})
|
| 199 |
-
# print(st.session_state.messages)
|
| 200 |
-
# with st.chat_message("user"):
|
| 201 |
-
# st.markdown(prompt)
|
| 202 |
-
|
| 203 |
-
# with st.chat_message("assistant"):
|
| 204 |
-
# message_placeholder = st.empty()
|
| 205 |
-
# new_response = ""
|
| 206 |
-
# prompt_input_one = {
|
| 207 |
-
# "prompt": st.session_state.messages,
|
| 208 |
-
# "parameters": params,
|
| 209 |
-
# "endpoint": "huggingface-pytorch-inference-neuronx-2023-11-28-16-09-51-708"
|
| 210 |
-
# }
|
| 211 |
|
| 212 |
-
#
|
| 213 |
-
|
| 214 |
-
#
|
| 215 |
-
#
|
| 216 |
-
#
|
| 217 |
-
|
| 218 |
-
#
|
| 219 |
-
#
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
#
|
| 223 |
-
|
| 224 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
|
| 226 |
|
| 227 |
with tab2:
|
|
@@ -276,6 +276,28 @@ with tab2:
|
|
| 276 |
""")
|
| 277 |
|
| 278 |
with tab3:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
with st.expander("(1) Deploy GenAI Model to AWS Inferentia 2 Instance and Amazon SageMaker Endpoint"):
|
| 280 |
st.markdown(
|
| 281 |
"""
|
|
|
|
| 50 |
|
| 51 |
|
| 52 |
# Creating Tabs
|
| 53 |
+
tab1, tab2, tab3, tab4 = st.tabs(["Image Generation", "Architecture", "Stable Diffusion Architecture", "Code"])
|
| 54 |
|
| 55 |
with tab1:
|
| 56 |
# Create two columns for layout
|
|
|
|
| 74 |
"A lost city rediscovered in the Amazon jungle, overgrown with plants, in the style of a vintage travel poster",
|
| 75 |
"A steampunk train emitting clouds of steam as it races through a mountain pass, digital art",
|
| 76 |
"An enchanted forest with bioluminescent trees and fairies dancing, in a Studio Ghibli style",
|
| 77 |
+
"A portrait of an elegant alien empress with a detailed headdress, reminiscent of Art Nouveau",
|
| 78 |
"A post-apocalyptic Tokyo with nature reclaiming skyscrapers, in the style of a concept art",
|
| 79 |
"A mythical phoenix rising from ashes, vibrant colors, with a nebula in the background",
|
| 80 |
"A cybernetic wolf in a neon-lit city, cyberpunk theme, rain-drenched streets",
|
|
|
|
| 150 |
else:
|
| 151 |
st.error(f"Error: {response_one.text}")
|
| 152 |
|
| 153 |
+
# with pass:
|
| 154 |
|
| 155 |
+
# st.title('Llama 2 7B Text Generation with AWS Inferentia 2')
|
| 156 |
+
|
| 157 |
+
# params = {
|
| 158 |
+
# "do_sample" : True,
|
| 159 |
+
# "top_p": 0.6,
|
| 160 |
+
# "temperature": 0.9,
|
| 161 |
+
# "top_k": 50,
|
| 162 |
+
# "max_new_tokens": 512,
|
| 163 |
+
# "repetition_penalty": 1.03,
|
| 164 |
+
# }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
|
| 166 |
+
# if "messages" not in st.session_state:
|
| 167 |
+
# st.session_state.messages = [
|
| 168 |
+
# {"role": "system", "content": "You are a helpful Travel Planning Assistant. You respond with only 1-2 sentences."},
|
| 169 |
+
# {'role': 'user', 'content': 'Where can I travel in the fall for cloudy, rainy, and beautiful views?'},
|
| 170 |
+
# ]
|
| 171 |
+
|
| 172 |
+
# for message in st.session_state.messages:
|
| 173 |
+
# with st.chat_message(message["role"]):
|
| 174 |
+
# st.markdown(message["content"])
|
| 175 |
+
|
| 176 |
+
# with st.chat_message("assistant"):
|
| 177 |
+
# message_placeholder = st.empty()
|
| 178 |
+
# full_response = ""
|
| 179 |
+
# prompt_input_one = {
|
| 180 |
+
# "prompt": st.session_state.messages,
|
| 181 |
+
# "parameters": params,
|
| 182 |
+
# "endpoint": "huggingface-pytorch-inference-neuronx-2023-11-28-16-09-51-708"
|
| 183 |
+
# }
|
| 184 |
+
|
| 185 |
+
# response_one = requests.post(api_url, json=prompt_input_one)
|
| 186 |
+
|
| 187 |
+
# if response_one.status_code == 200:
|
| 188 |
+
# result_one = response_one.json()
|
| 189 |
+
# # st.success(f"Prediction result: {result}")
|
| 190 |
+
# full_response += result_one["generation"]
|
| 191 |
+
# else:
|
| 192 |
+
# st.error(f"Error: {response_one.text}")
|
| 193 |
+
|
| 194 |
+
# message_placeholder.markdown(full_response)
|
| 195 |
+
# st.session_state.messages.append({"role": "assistant", "content": full_response})
|
| 196 |
+
|
| 197 |
+
# if prompt := st.chat_input("What is up?"):
|
| 198 |
+
# st.session_state.messages.append({"role": "user", "content": prompt})
|
| 199 |
+
# print(st.session_state.messages)
|
| 200 |
+
# with st.chat_message("user"):
|
| 201 |
+
# st.markdown(prompt)
|
| 202 |
+
|
| 203 |
+
# with st.chat_message("assistant"):
|
| 204 |
+
# message_placeholder = st.empty()
|
| 205 |
+
# new_response = ""
|
| 206 |
+
# prompt_input_one = {
|
| 207 |
+
# "prompt": st.session_state.messages,
|
| 208 |
+
# "parameters": params,
|
| 209 |
+
# "endpoint": "huggingface-pytorch-inference-neuronx-2023-11-28-16-09-51-708"
|
| 210 |
+
# }
|
| 211 |
+
|
| 212 |
+
# response_one = requests.post(api_url, json=prompt_input_one)
|
| 213 |
+
|
| 214 |
+
# if response_one.status_code == 200:
|
| 215 |
+
# result_one = response_one.json()
|
| 216 |
+
# # st.success(f"Prediction result: {result}")
|
| 217 |
+
# new_response += result_one["generation"]
|
| 218 |
+
# else:
|
| 219 |
+
# st.error(f"Error: {response_one.text}")
|
| 220 |
+
|
| 221 |
+
# message_placeholder.markdown(new_response)
|
| 222 |
+
# st.session_state.messages.append({"role": "assistant", "content": new_response})
|
| 223 |
+
|
| 224 |
+
pass
|
| 225 |
|
| 226 |
|
| 227 |
with tab2:
|
|
|
|
| 276 |
""")
|
| 277 |
|
| 278 |
with tab3:
|
| 279 |
+
left_column, _, right_column = st.columns([2,.2,3])
|
| 280 |
+
|
| 281 |
+
with right_column:
|
| 282 |
+
# Define Streamlit UI elements
|
| 283 |
+
st.markdown("""<br>""", unsafe_allow_html=True)
|
| 284 |
+
st.image('./sdxl_arch.png', caption=f"SDXL Architecture")
|
| 285 |
+
|
| 286 |
+
with left_column:
|
| 287 |
+
st.write("## SDXL Architecture Overview")
|
| 288 |
+
|
| 289 |
+
st.write("""
|
| 290 |
+
The stable diffusion model takes both a latent seed and a text prompt as an input. The latent seed is then used to generate random latent image representations of size 64×64 where as the text prompt is transformed to text embeddings of size 77×768 via CLIP's text encoder.
|
| 291 |
+
|
| 292 |
+
Next the U-Net iteratively denoises the random latent image representations while being conditioned on the text embeddings. The output of the U-Net, being the noise residual, is used to compute a denoised latent image representation via a scheduler algorithm. Many different scheduler algorithms can be used for this computation, each having its pro- and cons.
|
| 293 |
+
|
| 294 |
+
|
| 295 |
+
Theory on how the scheduler algorithm function is out-of-scope for this demo, but in short one should remember that they compute the predicted denoised image representation from the previous noise representation and the predicted noise residual.
|
| 296 |
+
|
| 297 |
+
The denoising process is repeated ca. 50 times to step-by-step retrieve better latent image representations. Once complete, the latent image representation is decoded by the decoder part of the variational auto encoder.
|
| 298 |
+
""")
|
| 299 |
+
|
| 300 |
+
with tab4:
|
| 301 |
with st.expander("(1) Deploy GenAI Model to AWS Inferentia 2 Instance and Amazon SageMaker Endpoint"):
|
| 302 |
st.markdown(
|
| 303 |
"""
|