Keeby-smilyai commited on
Commit
06078e5
Β·
verified Β·
1 Parent(s): 3be15d2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +139 -149
app.py CHANGED
@@ -1,160 +1,150 @@
 
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments, DataCollatorForLanguageModeling
3
- from datasets import load_dataset
4
- import torch
5
- import os
6
-
7
- #-------------------------------Functions----------------------------------------------#
8
- def load_and_preprocess_data(dataset_name, tokenizer):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  try:
10
- dataset = load_dataset(dataset_name, split="train")
11
- except Exception as e:
12
- return None, f"Error loading dataset: {e}"
13
-
14
- def tokenize_function(examples):
15
- return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=128)
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
- try:
18
- tokenized_datasets = dataset.map(tokenize_function, batched=True, num_proc=4, remove_columns=["text"])
19
- except Exception as e:
20
- return None, f"Error tokenizing dataset: {e}"
21
-
22
- return tokenized_datasets, None
23
- #---------------------------------------------------------------------------------------#
24
-
25
- def train_model(architecture_size, api_key, repo_name, push_to_hub):
26
- # Map architecture size to model name
27
- model_name_mapping = {
28
- "Small": "distilgpt2",
29
- "Medium": "gpt2",
30
- "Large": "gpt2-medium",
31
  }
32
- model_name = model_name_mapping[architecture_size]
 
 
 
 
 
 
 
 
 
33
 
34
- # Device setup
35
- device = "cuda" if torch.cuda.is_available() else "cpu"
36
- device_msg = "CUDA is available! Training will be faster on GPU." if torch.cuda.is_available() else "CUDA not available. Training on CPU will be slow."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
- # Validate push_to_hub inputs
39
- if push_to_hub:
40
- if not api_key or not api_key.strip():
41
- return "❌ Error: You must provide a Hugging Face API key if pushing to hub is selected."
42
- if not repo_name or not repo_name.strip():
43
- return "❌ Error: You must provide a repository name if pushing to hub is selected."
44
 
45
- try:
46
- # Load dataset
47
- dataset_name = "wikitext-2-raw-v1"
48
- tokenizer = AutoTokenizer.from_pretrained(model_name)
49
- if tokenizer.pad_token is None:
50
- tokenizer.pad_token = tokenizer.eos_token
51
-
52
- tokenized_datasets, error_msg = load_and_preprocess_data(dataset_name, tokenizer)
53
- if error_msg:
54
- return f"❌ {error_msg}"
55
- if tokenized_datasets is None:
56
- return "❌ Failed to load and preprocess dataset."
57
-
58
- # Load model
59
- model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
60
- model.resize_token_embeddings(len(tokenizer))
61
-
62
- # Training args
63
- output_dir = "./results"
64
- training_args = TrainingArguments(
65
- output_dir=output_dir,
66
- num_train_epochs=1,
67
- per_device_train_batch_size=4,
68
- save_steps=500,
69
- save_total_limit=1,
70
- logging_steps=250,
71
- learning_rate=5e-5,
72
- weight_decay=0.01,
73
- push_to_hub=push_to_hub,
74
- hub_model_id=repo_name if push_to_hub else None,
75
- hub_token=api_key if push_to_hub else None,
76
- fp16=torch.cuda.is_available(),
77
- )
78
-
79
- # Data collator
80
- data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
81
-
82
- # Trainer
83
- trainer = Trainer(
84
- model=model,
85
- args=training_args,
86
- train_dataset=tokenized_datasets,
87
- data_collator=data_collator,
88
- )
89
-
90
- # Train
91
- trainer.train()
92
-
93
- # Save locally
94
- trainer.save_model(output_dir)
95
-
96
- # Evaluate
97
- eval_results = trainer.evaluate()
98
- eval_loss = eval_results.get('eval_loss', 'N/A')
99
-
100
- # Push to hub if selected
101
- if push_to_hub:
102
- trainer.push_to_hub()
103
- hub_msg = f"βœ… Model pushed to Hugging Face Hub: {repo_name}"
104
- else:
105
- hub_msg = "ℹ️ Model saved locally at ./results (not pushed to hub)."
106
-
107
- return f"""βœ… Training Complete!
108
- - Device: {device_msg}
109
- - Eval Loss: {eval_loss}
110
- - {hub_msg}
111
- """
112
-
113
- except Exception as e:
114
- return f"❌ Training Error: {str(e)}"
115
-
116
- # ----------------------------- Gradio Interface ----------------------------- #
117
-
118
- with gr.Blocks(title="LLM Builder - Gradio") as demo:
119
- gr.Markdown("# πŸ€– LLM Builder")
120
- gr.Markdown("### 1. Select Model Architecture")
121
-
122
- architecture_size = gr.Dropdown(
123
- choices=["Small", "Medium", "Large"],
124
- value="Small",
125
- label="Choose Model Size",
126
- info="Select the size of the model. Larger models have more parameters."
127
  )
128
 
129
- gr.Markdown("### 2. Training Setup")
130
-
131
- with gr.Row():
132
- with gr.Column():
133
- api_key = gr.Textbox(
134
- label="Hugging Face Hub API Key",
135
- type="password",
136
- placeholder="hf_...",
137
- info="Required only if pushing to hub."
138
- )
139
- repo_name = gr.Textbox(
140
- label="Repository Name",
141
- placeholder="your-username/your-model-name",
142
- info="Required only if pushing to hub."
143
- )
144
- push_to_hub = gr.Checkbox(
145
- label="Push to Hugging Face Hub?",
146
- value=False
147
- )
148
-
149
- train_btn = gr.Button("πŸš€ Start Training", variant="primary")
150
- output = gr.Textbox(label="Training Output", placeholder="Training logs and results will appear here...", lines=10)
151
-
152
- train_btn.click(
153
- fn=train_model,
154
- inputs=[architecture_size, api_key, repo_name, push_to_hub],
155
- outputs=output
156
  )
157
 
158
- # Launch the app
159
- if __name__ == "__main__":
160
- demo.launch()
 
1
+ # app.py
2
  import gradio as gr
3
+ from backend import verify_hf_token, get_user_runs, get_run_logs, queue_training_run, start_training_if_free
4
+ from utils import ARCH_ANALOGIES, get_auto_hyperparams
5
+
6
+ # ------------------------------ STATE ------------------------------
7
+ user_state = {"user_id": None, "hf_token": "", "current_run_id": None}
8
+
9
+ # ------------------------------ PAGES ------------------------------
10
+
11
+ def page_login(hf_token):
12
+ user_id, msg = verify_hf_token(hf_token)
13
+ if user_id:
14
+ user_state["user_id"] = user_id
15
+ user_state["hf_token"] = hf_token
16
+ return gr.update(visible=False), gr.update(visible=True), msg
17
+ else:
18
+ return gr.update(), gr.update(), msg
19
+
20
+ def page_processes():
21
+ runs = get_user_runs(user_state["user_id"])
22
+ run_list = "\n".join([
23
+ f"🍳 Run #{r[0]} | {r[1].upper()} x{r[2]} layers | {r[3]} | {r[4]}"
24
+ for r in runs
25
+ ]) or "No runs yet. Start cooking!"
26
+ return run_list
27
+
28
+ def load_run_logs(run_id_str):
29
  try:
30
+ run_id = int(run_id_str)
31
+ logs, status = get_run_logs(run_id)
32
+ return f"Status: {status}\n\n{logs}"
33
+ except:
34
+ return "Invalid run ID."
35
+
36
+ def page_architecture_next(arch_type, num_layers):
37
+ analogy = ARCH_ANALOGIES.get(arch_type, "")
38
+ auto_config = get_auto_hyperparams(arch_type, num_layers)
39
+ user_state["arch_config"] = {
40
+ "arch_type": arch_type,
41
+ "num_layers": num_layers,
42
+ "auto_config": auto_config
43
+ }
44
+ return (
45
+ gr.update(visible=False),
46
+ gr.update(visible=True),
47
+ f"🧠 {analogy}\n\nAuto-Seasoningβ„’ Suggestion:\nLR: {auto_config['learning_rate']} | Epochs: {auto_config['epochs']} | Batch: {auto_config['batch_size']}"
48
+ )
49
 
50
+ def page_hyperparams_next(lr, epochs, batch_size):
51
+ config = user_state["arch_config"]
52
+ final_config = {
53
+ "arch_type": config["arch_type"],
54
+ "num_layers": config["num_layers"],
55
+ "learning_rate": float(lr) if lr else config["auto_config"]["learning_rate"],
56
+ "epochs": int(epochs) if epochs else config["auto_config"]["epochs"],
57
+ "batch_size": int(batch_size) if batch_size else config["auto_config"]["batch_size"],
 
 
 
 
 
 
58
  }
59
+ run_id = queue_training_run(user_state["user_id"], final_config)
60
+ user_state["current_run_id"] = run_id
61
+ # Try to start if RAM allows
62
+ can_start = start_training_if_free()
63
+ status = "queued. Waiting for available stove πŸ”₯..." if not can_start else "starting..."
64
+ return (
65
+ gr.update(visible=False),
66
+ gr.update(visible=True),
67
+ f"βœ… Run #{run_id} {status}\nCheck 'Your Processes' for logs!"
68
+ )
69
 
70
+ # ------------------------------ UI ------------------------------
71
+
72
+ with gr.Blocks(title="LLM Kitchen 🍳") as demo:
73
+ gr.Markdown("# 🍳 Welcome to LLM Kitchen")
74
+ gr.Markdown("### Cook your own language model β€” from scratch!")
75
+
76
+ # ---- PAGE 1: LOGIN ----
77
+ with gr.Group() as page_login_ui:
78
+ gr.Markdown("### πŸ” Step 1: Login with Hugging Face Token")
79
+ token_input = gr.Textbox(label="HF Token (starts with 'hf_')", type="password")
80
+ login_btn = gr.Button("Login to Kitchen", variant="primary")
81
+ login_msg = gr.Markdown()
82
+
83
+ # ---- PAGE 2: PROCESSES ----
84
+ with gr.Group(visible=False) as page_processes_ui:
85
+ gr.Markdown("### πŸ§‘β€πŸ³ Your Processes")
86
+ refresh_btn = gr.Button("Refresh List")
87
+ runs_display = gr.Textbox(label="Your Training Runs", lines=8)
88
+ run_id_input = gr.Textbox(label="Enter Run ID to View Logs")
89
+ view_logs_btn = gr.Button("View Logs")
90
+ logs_display = gr.Textbox(label="Training Logs", lines=10)
91
+ new_run_btn = gr.Button("βž• Start New Process", variant="primary")
92
+
93
+ # ---- PAGE 3: ARCHITECTURE ----
94
+ with gr.Group(visible=False) as page_arch_ui:
95
+ gr.Markdown("### πŸ—οΈ Step 2: Choose Your Architecture")
96
+ arch_dropdown = gr.Dropdown(["cnn", "rnn", "transformer"], label="Architecture Type")
97
+ layers_slider = gr.Slider(1, 16, value=4, step=1, label="Number of Layers (Think: # of sauce reductions)")
98
+ arch_next_btn = gr.Button("Next β†’ Hyperparameters")
99
+ arch_analogy = gr.Markdown()
100
+
101
+ # ---- PAGE 4: HYPERPARAMETERS ----
102
+ with gr.Group(visible=False) as page_hyper_ui:
103
+ gr.Markdown("### πŸ§‚ Step 3: Season Your Model (Hyperparameters)")
104
+ gr.Markdown("Use Auto-Seasoningβ„’ or customize manually")
105
+ lr_input = gr.Number(label="Learning Rate (Saltiness)")
106
+ epochs_input = gr.Number(label="Epochs (Simmer Time)", precision=0)
107
+ batch_input = gr.Number(label="Batch Size (Spoon Size)", precision=0)
108
+ hyper_next_btn = gr.Button("Start Cooking! 🍲")
109
+
110
+ # ---- PAGE 5: TRAINING STARTED ----
111
+ with gr.Group(visible=False) as page_train_ui:
112
+ train_status = gr.Markdown("Starting your training run...")
113
+
114
+ # ------------------------------ EVENTS ------------------------------
115
+
116
+ login_btn.click(
117
+ page_login,
118
+ inputs=token_input,
119
+ outputs=[page_login_ui, page_processes_ui, login_msg]
120
+ )
121
 
122
+ refresh_btn.click(
123
+ page_processes,
124
+ outputs=runs_display
125
+ )
 
 
126
 
127
+ view_logs_btn.click(
128
+ load_run_logs,
129
+ inputs=run_id_input,
130
+ outputs=logs_display
131
+ )
132
+
133
+ new_run_btn.click(
134
+ lambda: (gr.update(visible=False), gr.update(visible=True), ""),
135
+ outputs=[page_processes_ui, page_arch_ui, arch_analogy]
136
+ )
137
+
138
+ arch_next_btn.click(
139
+ page_architecture_next,
140
+ inputs=[arch_dropdown, layers_slider],
141
+ outputs=[page_arch_ui, page_hyper_ui, arch_analogy]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  )
143
 
144
+ hyper_next_btn.click(
145
+ page_hyperparams_next,
146
+ inputs=[lr_input, epochs_input, batch_input],
147
+ outputs=[page_hyper_ui, page_train_ui, train_status]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  )
149
 
150
+ demo.queue().launch()