hannahcyberey commited on
Commit
1a99b8c
·
1 Parent(s): f75f514
Files changed (1) hide show
  1. app.py +78 -27
app.py CHANGED
@@ -38,35 +38,85 @@ HEAD = """
38
 
39
  HTML = f"""
40
  <div id="banner">
41
- <h1 style="font-size: 32px; line-height: 1.5em; margin-bottom: 0em;">
42
- <img src="/gradio_api/file=assets/rudder_3094973.png" style="display: inline; height: 1.5em;"> LLM Censorship Steering
43
- </h1>
44
- <div id="cover" style="height: 130px;">
45
- <img style="height: 100%; padding-top: 0.5em;" src="/gradio_api/file=assets/demo-cover.png">
 
 
 
 
 
46
  </div>
47
  </div>
48
  """
49
 
50
  CSS = """
 
 
 
 
51
  div#banner {
52
  display: flex;
53
  flex-direction: column;
54
  align-items: center;
55
  justify-content: center;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  }
57
- div#component-8 .form {
58
- padding-top: 7.5px;
59
- background: var(--block-background-fill);
60
  }
61
- div#component-9 {
62
- .toggle-label {color: var(--body-text-color);}
 
 
 
 
 
63
  span p {
64
  font-size: var(--block-info-text-size);
65
  line-height: var(--line-sm);
66
  color: var(--block-label-text-color);
67
  }
68
  }
69
- div#component-10 {
 
 
70
  .slider_input_container span {color: var(--body-text-color);}
71
  .slider_input_container {
72
  display: flex;
@@ -74,7 +124,8 @@ div#component-10 {
74
  input {appearance: auto;}
75
  }
76
  }
77
- div#component-10 .wrap .head {
 
78
  justify-content: unset;
79
  label {margin-right: var(--size-2);}
80
  label span {
@@ -259,23 +310,23 @@ with gr.Blocks(title="LLM Censorship Steering", theme=theme, head=HEAD, css=CSS,
259
 
260
  gr.HTML(HTML)
261
 
262
- @gr.render(inputs=endpoint_state, triggers=[endpoint_state.change])
263
- def render_state(endpoint_state):
264
- if endpoint_state == "Ready":
265
- color = "green"
266
- elif endpoint_state == "Server Error":
267
- color = "red"
268
- else:
269
- color = "orange"
270
-
271
- if endpoint_state != None:
272
- gr.Markdown(f'🤖 {model_name} | Inference Endpoint State: <span style="color:{color}; font-weight: bold;">{endpoint_state}</span>')
273
-
274
- with gr.Row():
275
  with gr.Column(scale=1):
 
 
 
 
 
 
 
 
 
 
 
 
276
  with gr.Row():
277
- steer_toggle = Toggle(label="Steering", info="Turn off to generate original outputs", value=True, interactive=True, scale=2)
278
- coeff = gr.Slider(label="Steering Coefficient:", value=-1.0, minimum=-2, maximum=2, step=0.1, scale=8, show_reset_button=False)
279
 
280
  @gr.on(inputs=[steer_toggle], outputs=[steer_toggle, coeff], triggers=[steer_toggle.change])
281
  def update_toggle(toggle_value):
 
38
 
39
  HTML = f"""
40
  <div id="banner">
41
+ <h1><img src="/gradio_api/file=assets/rudder_3094973.png">&nbsp;LLM Censorship Steering</h1>
42
+
43
+ <div id="links" class="row" style="margin-bottom: .8em;">
44
+ <i class="fa-solid fa-file-pdf fa-lg"></i><a href="https://arxiv.org/abs/2504.17130"> Paper</a> &nbsp;
45
+ <i class="fa-solid fa-blog fa-lg"></i><a href="https://hannahxchen.github.io/blog/2025/censorship-steering"> Blog Post</a> &nbsp;
46
+ <i class="fa-brands fa-github fa-lg"></i><a href="https://github.com/hannahxchen/llm-censorship-steering"> Code</a> &nbsp;
47
+ </div>
48
+
49
+ <div id="cover">
50
+ <img src="/gradio_api/file=assets/demo-cover.png">
51
  </div>
52
  </div>
53
  """
54
 
55
  CSS = """
56
+ div.gradio-container .app {
57
+ max-width: 1600px !important;
58
+ }
59
+
60
  div#banner {
61
  display: flex;
62
  flex-direction: column;
63
  align-items: center;
64
  justify-content: center;
65
+
66
+ h1 {
67
+ font-size: 32px;
68
+ line-height: 1.35em;
69
+ margin-bottom: 0em;
70
+ display: flex;
71
+
72
+ img {
73
+ display: inline;
74
+ height: 1.35em;
75
+ }
76
+ }
77
+
78
+ div#cover img {
79
+ max-height: 130px;
80
+ padding-top: 0.5em;
81
+ }
82
+ }
83
+
84
+ @media (max-width: 500px) {
85
+ div#banner {
86
+ h1 {
87
+ font-size: 22px;
88
+ }
89
+
90
+ div#links {
91
+ font-size: 14px;
92
+ }
93
+ }
94
+
95
+ div#model-state p {
96
+ font-size: 14px;
97
+ }
98
+
99
  }
100
+
101
+ div#main-components {
102
+ align-items: flex-end;
103
  }
104
+
105
+ div#steering-toggle {
106
+ padding-top: 8px;
107
+ padding-bottom: 8px;
108
+ .toggle-label {
109
+ color: var(--body-text-color);
110
+ }
111
  span p {
112
  font-size: var(--block-info-text-size);
113
  line-height: var(--line-sm);
114
  color: var(--block-label-text-color);
115
  }
116
  }
117
+
118
+ div#coeff-slider {
119
+ padding-bottom: 5px;
120
  .slider_input_container span {color: var(--body-text-color);}
121
  .slider_input_container {
122
  display: flex;
 
124
  input {appearance: auto;}
125
  }
126
  }
127
+
128
+ div#coeff-slider .wrap .head {
129
  justify-content: unset;
130
  label {margin-right: var(--size-2);}
131
  label span {
 
310
 
311
  gr.HTML(HTML)
312
 
313
+ with gr.Row(elem_id="main-components"):
 
 
 
 
 
 
 
 
 
 
 
 
314
  with gr.Column(scale=1):
315
+ @gr.render(inputs=endpoint_state, triggers=[endpoint_state.change])
316
+ def render_state(endpoint_state):
317
+ if endpoint_state == "Ready":
318
+ color = "green"
319
+ elif endpoint_state == "Server Error":
320
+ color = "red"
321
+ else:
322
+ color = "orange"
323
+
324
+ if endpoint_state != None:
325
+ gr.Markdown(f'🤖 {model_name} | Inference Endpoint State: <span style="color:{color}; font-weight: bold;">{endpoint_state}</span>', elem_id="model-state")
326
+
327
  with gr.Row():
328
+ steer_toggle = Toggle(label="Steering", info="Turn off to generate original outputs", value=True, interactive=True, scale=2, elem_id="steering-toggle")
329
+ coeff = gr.Slider(label="Coefficient:", value=-1.0, minimum=-2, maximum=2, step=0.1, scale=8, show_reset_button=False, elem_id="coeff-slider")
330
 
331
  @gr.on(inputs=[steer_toggle], outputs=[steer_toggle, coeff], triggers=[steer_toggle.change])
332
  def update_toggle(toggle_value):