Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	Commit 
							
							·
						
						74baf5f
	
1
								Parent(s):
							
							1921d67
								
update
Browse files- app.py +56 -50
- scripts/anomaly.py +5 -3
    	
        app.py
    CHANGED
    
    | @@ -33,6 +33,7 @@ def parse_row_selection(df, rows_text: str): | |
| 33 |  | 
| 34 | 
             
            with gr.Blocks() as demo:
         | 
| 35 | 
             
                gr.Markdown("# OMS Analyze — Prototype")
         | 
|  | |
| 36 | 
             
                with gr.Tabs():
         | 
| 37 | 
             
                    # Upload & Preview tab
         | 
| 38 | 
             
                    with gr.TabItem('Upload & Preview'):
         | 
| @@ -74,54 +75,6 @@ with gr.Blocks() as demo: | |
| 74 | 
             
                        csv_up.change(fn=initial_preview, inputs=csv_up, outputs=[original_preview, cleansed_preview, clean_status])
         | 
| 75 | 
             
                        apply_clean.click(fn=apply_cleansing, inputs=[csv_up, remove_dup, missing_handling], outputs=[cleansed_preview, clean_status, download_cleansed])
         | 
| 76 |  | 
| 77 | 
            -
                    # Recommendation tab
         | 
| 78 | 
            -
                    with gr.TabItem('Recommendation'):
         | 
| 79 | 
            -
                        gr.Markdown("**Usecase Scenario — Recommendation**: สร้างสรุปเหตุการณ์ (เช่น สรุปเหตุการณ์ไฟฟ้าล้ม) สำหรับแถวที่เลือก ปรับระดับรายละเอียด และเลือกใช้ Generative AI เพื่อเพิ่มความชัดเจน 및 ดาวน์โหลดไฟล์สรุป")
         | 
| 80 | 
            -
                        csv_in = gr.File(label='Upload CSV (data.csv)')
         | 
| 81 | 
            -
                        with gr.Row():
         | 
| 82 | 
            -
                            rows = gr.Textbox(label='Rows (comma-separated indexes) or empty = all', placeholder='e.g. 0,1,2')
         | 
| 83 | 
            -
                            use_hf = gr.Checkbox(label='Use Generative AI', value=False)
         | 
| 84 | 
            -
                            verbosity = gr.Radio(choices=['analyze','recommend'], value='analyze', label='Summary Type', interactive=True)
         | 
| 85 | 
            -
                            run_btn = gr.Button('Generate Summaries', interactive=True)
         | 
| 86 | 
            -
                        with gr.Row():
         | 
| 87 | 
            -
                            model_selector = gr.Dropdown(
         | 
| 88 | 
            -
                                choices=[
         | 
| 89 | 
            -
                                    'meta-llama/Llama-3.1-8B-Instruct:novita',
         | 
| 90 | 
            -
                                    'meta-llama/Llama-4-Scout-17B-16E-Instruct:novita',
         | 
| 91 | 
            -
                                    'Qwen/Qwen3-VL-235B-A22B-Instruct:novita',
         | 
| 92 | 
            -
                                    'deepseek-ai/DeepSeek-R1:novita'
         | 
| 93 | 
            -
                                ],
         | 
| 94 | 
            -
                                value='meta-llama/Llama-3.1-8B-Instruct:novita',
         | 
| 95 | 
            -
                                label='GenAI Model',
         | 
| 96 | 
            -
                                interactive=True,
         | 
| 97 | 
            -
                                visible=False
         | 
| 98 | 
            -
                            )
         | 
| 99 | 
            -
                        out = gr.Dataframe(headers=['EventNumber','OutageDateTime','Summary'])
         | 
| 100 | 
            -
                        status = gr.Textbox(label='Status', interactive=False)
         | 
| 101 | 
            -
                        download = gr.File(label='Download summaries')
         | 
| 102 | 
            -
             | 
| 103 | 
            -
                        def run_summarize(file, rows_text, use_hf_flag, verbosity_level):
         | 
| 104 | 
            -
                            print(f"Debug: file={file}, rows_text={rows_text}, use_hf_flag={use_hf_flag}, verbosity_level={verbosity_level}")
         | 
| 105 | 
            -
                            if file is None:
         | 
| 106 | 
            -
                                return pd.DataFrame([], columns=['EventNumber','OutageDateTime','Summary']), 'No file provided', None
         | 
| 107 | 
            -
                            df = pd.read_csv(file.name, dtype=str)
         | 
| 108 | 
            -
                            df_sel = parse_row_selection(df, rows_text)
         | 
| 109 | 
            -
                            model = 'meta-llama/Llama-3.1-8B-Instruct:novita'  # default
         | 
| 110 | 
            -
                            res = summarize_events(df_sel, use_hf=use_hf_flag, verbosity=verbosity_level, model=model)
         | 
| 111 | 
            -
                            out_df = pd.DataFrame(res)
         | 
| 112 | 
            -
                            out_file = ROOT / 'outputs' / 'summaries_from_ui.csv'
         | 
| 113 | 
            -
                            out_file.parent.mkdir(exist_ok=True)
         | 
| 114 | 
            -
                            out_df.to_csv(out_file, index=False, encoding='utf-8-sig')
         | 
| 115 | 
            -
                            status_text = f"Summaries generated: {len(out_df)} rows. HF used: {use_hf_flag}"
         | 
| 116 | 
            -
                            return out_df, status_text, str(out_file)
         | 
| 117 | 
            -
             | 
| 118 | 
            -
                        def update_model_visibility(use_hf_flag):
         | 
| 119 | 
            -
                            return gr.update(visible=use_hf_flag, interactive=use_hf_flag)
         | 
| 120 | 
            -
                        
         | 
| 121 | 
            -
                        use_hf.change(fn=update_model_visibility, inputs=use_hf, outputs=model_selector)
         | 
| 122 | 
            -
                        
         | 
| 123 | 
            -
                        run_btn.click(fn=run_summarize, inputs=[csv_in, rows, use_hf, verbosity], outputs=[out, status, download])
         | 
| 124 | 
            -
             | 
| 125 | 
             
                    # Summary tab
         | 
| 126 | 
             
                    with gr.TabItem('Summary'):
         | 
| 127 | 
             
                        gr.Markdown("**Usecase Scenario — Summary**: สร้างสรุปภาพรวมของชุดข้อมูลทั้งหมด รวมสถิติพื้นฐาน และคำนวณดัชนีความน่าเชื่อถือ (เช่น SAIFI, SAIDI, CAIDI) พร้อมตัวเลือกใช้ Generative AI ในการขยายความ")
         | 
| @@ -136,7 +89,8 @@ with gr.Blocks() as demo: | |
| 136 | 
             
                                    'meta-llama/Llama-3.1-8B-Instruct:novita',
         | 
| 137 | 
             
                                    'meta-llama/Llama-4-Scout-17B-16E-Instruct:novita',
         | 
| 138 | 
             
                                    'Qwen/Qwen3-VL-235B-A22B-Instruct:novita',
         | 
| 139 | 
            -
                                    'deepseek-ai/DeepSeek-R1:novita'
         | 
|  | |
| 140 | 
             
                                ],
         | 
| 141 | 
             
                                value='meta-llama/Llama-3.1-8B-Instruct:novita',
         | 
| 142 | 
             
                                label='GenAI Model',
         | 
| @@ -188,11 +142,60 @@ with gr.Blocks() as demo: | |
| 188 |  | 
| 189 | 
             
                        run_sum.click(fn=run_overall_summary, inputs=[csv_in_sum, use_hf_sum, total_customers, model_selector_sum], outputs=[ai_summary_out, basic_stats_out, reliability_out, sum_status])
         | 
| 190 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 191 | 
             
                    with gr.TabItem('Anomaly Detection'):
         | 
| 192 | 
             
                        gr.Markdown("**Usecase Scenario — Anomaly Detection**: ตรวจจับเหตุการณ์ที่มีพฤติกรรมผิดปกติในชุดข้อมูล (เช่น เหตุการณ์ที่มีค่าสูง/ต่ำผิดปกติ) โดยใช้หลาย algorithm ปรับระดับ contamination และส่งออกผลลัพธ์พร้อมธงความผิดปกติ")
         | 
| 193 | 
             
                        csv_in_anom = gr.File(label='Upload CSV for Anomaly')
         | 
| 194 | 
             
                        with gr.Row():
         | 
| 195 | 
            -
                            alg = gr.Radio(choices=[' | 
| 196 | 
             
                            contamination = gr.Slider(minimum=0.01, maximum=0.2, value=0.05, step=0.01, label='Contamination')
         | 
| 197 | 
             
                            run_anom = gr.Button('Run Anomaly Detection')
         | 
| 198 | 
             
                        anom_out = gr.Dataframe()
         | 
| @@ -205,6 +208,9 @@ with gr.Blocks() as demo: | |
| 205 | 
             
                            from scripts.anomaly import detect_anomalies
         | 
| 206 | 
             
                            df = pd.read_csv(file.name, dtype=str)
         | 
| 207 | 
             
                            res = detect_anomalies(df, contamination=contamination, algorithm=algorithm)
         | 
|  | |
|  | |
|  | |
| 208 | 
             
                            out_file = ROOT / 'outputs' / 'anomalies_from_ui.csv'
         | 
| 209 | 
             
                            out_file.parent.mkdir(exist_ok=True)
         | 
| 210 | 
             
                            res.to_csv(out_file, index=False, encoding='utf-8-sig')
         | 
|  | |
| 33 |  | 
| 34 | 
             
            with gr.Blocks() as demo:
         | 
| 35 | 
             
                gr.Markdown("# OMS Analyze — Prototype")
         | 
| 36 | 
            +
                gr.Markdown("> Created by PEACE, Powered by AI, Version 0.0.1")
         | 
| 37 | 
             
                with gr.Tabs():
         | 
| 38 | 
             
                    # Upload & Preview tab
         | 
| 39 | 
             
                    with gr.TabItem('Upload & Preview'):
         | 
|  | |
| 75 | 
             
                        csv_up.change(fn=initial_preview, inputs=csv_up, outputs=[original_preview, cleansed_preview, clean_status])
         | 
| 76 | 
             
                        apply_clean.click(fn=apply_cleansing, inputs=[csv_up, remove_dup, missing_handling], outputs=[cleansed_preview, clean_status, download_cleansed])
         | 
| 77 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 78 | 
             
                    # Summary tab
         | 
| 79 | 
             
                    with gr.TabItem('Summary'):
         | 
| 80 | 
             
                        gr.Markdown("**Usecase Scenario — Summary**: สร้างสรุปภาพรวมของชุดข้อมูลทั้งหมด รวมสถิติพื้นฐาน และคำนวณดัชนีความน่าเชื่อถือ (เช่น SAIFI, SAIDI, CAIDI) พร้อมตัวเลือกใช้ Generative AI ในการขยายความ")
         | 
|  | |
| 89 | 
             
                                    'meta-llama/Llama-3.1-8B-Instruct:novita',
         | 
| 90 | 
             
                                    'meta-llama/Llama-4-Scout-17B-16E-Instruct:novita',
         | 
| 91 | 
             
                                    'Qwen/Qwen3-VL-235B-A22B-Instruct:novita',
         | 
| 92 | 
            +
                                    'deepseek-ai/DeepSeek-R1:novita',
         | 
| 93 | 
            +
                                    'moonshotai/Kimi-K2-Instruct-0905:novita'
         | 
| 94 | 
             
                                ],
         | 
| 95 | 
             
                                value='meta-llama/Llama-3.1-8B-Instruct:novita',
         | 
| 96 | 
             
                                label='GenAI Model',
         | 
|  | |
| 142 |  | 
| 143 | 
             
                        run_sum.click(fn=run_overall_summary, inputs=[csv_in_sum, use_hf_sum, total_customers, model_selector_sum], outputs=[ai_summary_out, basic_stats_out, reliability_out, sum_status])
         | 
| 144 |  | 
| 145 | 
            +
                    # Recommendation tab
         | 
| 146 | 
            +
                    with gr.TabItem('Recommendation'):
         | 
| 147 | 
            +
                        gr.Markdown("**Usecase Scenario — Recommendation**: สร้างสรุปเหตุการณ์ (เช่น สรุปเหตุการณ์ไฟฟ้าขัอข้องหรือบำรุงรักษา) สำหรับแถวที่เลือก ปรับระดับรายละเอียด และเลือกใช้ Generative AI เพื่อเพิ่มความชัดเจน พร้อมดาวน์โหลดไฟล์สรุป")
         | 
| 148 | 
            +
                        csv_in = gr.File(label='Upload CSV (data.csv)')
         | 
| 149 | 
            +
                        with gr.Row():
         | 
| 150 | 
            +
                            rows = gr.Textbox(label='Rows (comma-separated indexes) or empty = all', placeholder='e.g. 0,1,2')
         | 
| 151 | 
            +
                            use_hf = gr.Checkbox(label='Use Generative AI', value=False)
         | 
| 152 | 
            +
                            verbosity = gr.Radio(choices=['analyze','recommend'], value='analyze', label='Summary Type', interactive=True)
         | 
| 153 | 
            +
                            run_btn = gr.Button('Generate Summaries', interactive=True)
         | 
| 154 | 
            +
                        with gr.Row():
         | 
| 155 | 
            +
                            model_selector = gr.Dropdown(
         | 
| 156 | 
            +
                                choices=[
         | 
| 157 | 
            +
                                    'meta-llama/Llama-3.1-8B-Instruct:novita',
         | 
| 158 | 
            +
                                    'meta-llama/Llama-4-Scout-17B-16E-Instruct:novita',
         | 
| 159 | 
            +
                                    'Qwen/Qwen3-VL-235B-A22B-Instruct:novita',
         | 
| 160 | 
            +
                                    'deepseek-ai/DeepSeek-R1:novita',
         | 
| 161 | 
            +
                                    'moonshotai/Kimi-K2-Instruct-0905:novita'
         | 
| 162 | 
            +
                                ],
         | 
| 163 | 
            +
                                value='meta-llama/Llama-3.1-8B-Instruct:novita',
         | 
| 164 | 
            +
                                label='GenAI Model',
         | 
| 165 | 
            +
                                interactive=True,
         | 
| 166 | 
            +
                                visible=False
         | 
| 167 | 
            +
                            )
         | 
| 168 | 
            +
                        out = gr.Dataframe(headers=['EventNumber','OutageDateTime','Summary'])
         | 
| 169 | 
            +
                        status = gr.Textbox(label='Status', interactive=False)
         | 
| 170 | 
            +
                        download = gr.File(label='Download summaries')
         | 
| 171 | 
            +
             | 
| 172 | 
            +
                        def run_summarize(file, rows_text, use_hf_flag, verbosity_level, model):
         | 
| 173 | 
            +
                            print(f"Debug: file={file}, rows_text={rows_text}, use_hf_flag={use_hf_flag}, verbosity_level={verbosity_level}, model={model}")
         | 
| 174 | 
            +
                            if file is None:
         | 
| 175 | 
            +
                                return pd.DataFrame([], columns=['EventNumber','OutageDateTime','Summary']), 'No file provided', None
         | 
| 176 | 
            +
                            df = pd.read_csv(file.name, dtype=str)
         | 
| 177 | 
            +
                            df_sel = parse_row_selection(df, rows_text)
         | 
| 178 | 
            +
                            res = summarize_events(df_sel, use_hf=use_hf_flag, verbosity=verbosity_level, model=model)
         | 
| 179 | 
            +
                            out_df = pd.DataFrame(res)
         | 
| 180 | 
            +
                            out_file = ROOT / 'outputs' / 'summaries_from_ui.csv'
         | 
| 181 | 
            +
                            out_file.parent.mkdir(exist_ok=True)
         | 
| 182 | 
            +
                            out_df.to_csv(out_file, index=False, encoding='utf-8-sig')
         | 
| 183 | 
            +
                            status_text = f"Summaries generated: {len(out_df)} rows. HF used: {use_hf_flag}"
         | 
| 184 | 
            +
                            return out_df, status_text, str(out_file)
         | 
| 185 | 
            +
             | 
| 186 | 
            +
                        def update_model_visibility(use_hf_flag):
         | 
| 187 | 
            +
                            return gr.update(visible=use_hf_flag, interactive=use_hf_flag)
         | 
| 188 | 
            +
                        
         | 
| 189 | 
            +
                        use_hf.change(fn=update_model_visibility, inputs=use_hf, outputs=model_selector)
         | 
| 190 | 
            +
                        
         | 
| 191 | 
            +
                        run_btn.click(fn=run_summarize, inputs=[csv_in, rows, use_hf, verbosity, model_selector], outputs=[out, status, download])
         | 
| 192 | 
            +
             | 
| 193 | 
            +
                    # Anomaly Detection tab
         | 
| 194 | 
             
                    with gr.TabItem('Anomaly Detection'):
         | 
| 195 | 
             
                        gr.Markdown("**Usecase Scenario — Anomaly Detection**: ตรวจจับเหตุการณ์ที่มีพฤติกรรมผิดปกติในชุดข้อมูล (เช่น เหตุการณ์ที่มีค่าสูง/ต่ำผิดปกติ) โดยใช้หลาย algorithm ปรับระดับ contamination และส่งออกผลลัพธ์พร้อมธงความผิดปกติ")
         | 
| 196 | 
             
                        csv_in_anom = gr.File(label='Upload CSV for Anomaly')
         | 
| 197 | 
             
                        with gr.Row():
         | 
| 198 | 
            +
                            alg = gr.Radio(choices=['iso+lof','iso','lof','autoencoder'], value='iso+lof', label='Algorithm')
         | 
| 199 | 
             
                            contamination = gr.Slider(minimum=0.01, maximum=0.2, value=0.05, step=0.01, label='Contamination')
         | 
| 200 | 
             
                            run_anom = gr.Button('Run Anomaly Detection')
         | 
| 201 | 
             
                        anom_out = gr.Dataframe()
         | 
|  | |
| 208 | 
             
                            from scripts.anomaly import detect_anomalies
         | 
| 209 | 
             
                            df = pd.read_csv(file.name, dtype=str)
         | 
| 210 | 
             
                            res = detect_anomalies(df, contamination=contamination, algorithm=algorithm)
         | 
| 211 | 
            +
                            # Reorder columns to put ensemble_flag and final_flag at the end
         | 
| 212 | 
            +
                            cols = [c for c in res.columns if c not in ['ensemble_flag', 'final_flag']] + ['ensemble_flag', 'final_flag']
         | 
| 213 | 
            +
                            res = res[cols]
         | 
| 214 | 
             
                            out_file = ROOT / 'outputs' / 'anomalies_from_ui.csv'
         | 
| 215 | 
             
                            out_file.parent.mkdir(exist_ok=True)
         | 
| 216 | 
             
                            res.to_csv(out_file, index=False, encoding='utf-8-sig')
         | 
    	
        scripts/anomaly.py
    CHANGED
    
    | @@ -154,7 +154,7 @@ def explain_anomalies(df_fe: pd.DataFrame, explain_features=None): | |
| 154 | 
             
                return z, explanations
         | 
| 155 |  | 
| 156 |  | 
| 157 | 
            -
            def detect_anomalies(df: pd.DataFrame, contamination: float = 0.05, algorithm: str = ' | 
| 158 | 
             
                Xs, features, df_fe, scaler = build_feature_matrix(df)
         | 
| 159 |  | 
| 160 | 
             
                if algorithm == 'autoencoder':
         | 
| @@ -175,13 +175,15 @@ def detect_anomalies(df: pd.DataFrame, contamination: float = 0.05, algorithm: s | |
| 175 | 
             
                    # ensemble: flag if both mark as outlier (-1)
         | 
| 176 | 
             
                    res['ensemble_flag'] = ((res['iso_pred'] == -1) & (res['lof_pred'] == -1))
         | 
| 177 |  | 
| 178 | 
            -
                    # algorithm filter: if algorithm == 'iso' or 'lof' or ' | 
| 179 | 
             
                    if algorithm == 'iso':
         | 
| 180 | 
             
                        res['final_flag'] = res['iso_pred'] == -1
         | 
| 181 | 
             
                    elif algorithm == 'lof':
         | 
| 182 | 
             
                        res['final_flag'] = res['lof_pred'] == -1
         | 
| 183 | 
            -
                     | 
| 184 | 
             
                        res['final_flag'] = res['ensemble_flag']
         | 
|  | |
|  | |
| 185 |  | 
| 186 | 
             
                # explainability (same for all)
         | 
| 187 | 
             
                z_df, explanations = explain_anomalies(df_fe)
         | 
|  | |
| 154 | 
             
                return z, explanations
         | 
| 155 |  | 
| 156 |  | 
| 157 | 
            +
            def detect_anomalies(df: pd.DataFrame, contamination: float = 0.05, algorithm: str = 'iso+lof') -> pd.DataFrame:
         | 
| 158 | 
             
                Xs, features, df_fe, scaler = build_feature_matrix(df)
         | 
| 159 |  | 
| 160 | 
             
                if algorithm == 'autoencoder':
         | 
|  | |
| 175 | 
             
                    # ensemble: flag if both mark as outlier (-1)
         | 
| 176 | 
             
                    res['ensemble_flag'] = ((res['iso_pred'] == -1) & (res['lof_pred'] == -1))
         | 
| 177 |  | 
| 178 | 
            +
                    # algorithm filter: if algorithm == 'iso' or 'lof' or 'iso+lof', compute final_flag
         | 
| 179 | 
             
                    if algorithm == 'iso':
         | 
| 180 | 
             
                        res['final_flag'] = res['iso_pred'] == -1
         | 
| 181 | 
             
                    elif algorithm == 'lof':
         | 
| 182 | 
             
                        res['final_flag'] = res['lof_pred'] == -1
         | 
| 183 | 
            +
                    elif algorithm == 'iso+lof':
         | 
| 184 | 
             
                        res['final_flag'] = res['ensemble_flag']
         | 
| 185 | 
            +
                    else:
         | 
| 186 | 
            +
                        raise ValueError(f"Unknown algorithm: {algorithm}")
         | 
| 187 |  | 
| 188 | 
             
                # explainability (same for all)
         | 
| 189 | 
             
                z_df, explanations = explain_anomalies(df_fe)
         | 
