|
|
import gradio as gr |
|
|
import polars as pl |
|
|
|
|
|
|
|
|
RICH_PARQUET_PATH = "all_rich_dataset_cards.parquet" |
|
|
MISSING_PARQUET_PATH = "all_minimal_dataset_cards.parquet" |
|
|
|
|
|
ROWS_PER_PAGE = 50 |
|
|
|
|
|
|
|
|
lazy_rich = pl.scan_parquet(RICH_PARQUET_PATH) |
|
|
lazy_missing = pl.scan_parquet(MISSING_PARQUET_PATH) |
|
|
|
|
|
current_lazy_df = lazy_missing |
|
|
|
|
|
|
|
|
def get_page(lazy_df: pl.LazyFrame, page: int, column: str = None, query: str = ""): |
|
|
filtered_df = lazy_df |
|
|
if column and query: |
|
|
query_lower = query.lower().strip() |
|
|
|
|
|
filtered_df = filtered_df.with_columns([ |
|
|
pl.col(column).cast(pl.Utf8).str.to_lowercase().alias(column) |
|
|
]).filter(pl.col(column).str.contains(query_lower, literal=False)) |
|
|
start = page * ROWS_PER_PAGE |
|
|
page_df = filtered_df.slice(start, ROWS_PER_PAGE).collect().to_pandas() |
|
|
total_rows = filtered_df.collect().height |
|
|
total_pages = (total_rows - 1) // ROWS_PER_PAGE + 1 |
|
|
return page_df, total_pages |
|
|
|
|
|
|
|
|
initial_df, total_pages = get_page(current_lazy_df, 0) |
|
|
columns = list(initial_df.columns) |
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("## Dataset Insight Portal") |
|
|
|
|
|
|
|
|
dataset_select = gr.Dropdown( |
|
|
choices=["DatasetCards rich in information", "DatasetCards missing information"], |
|
|
value="DatasetCards missing information", |
|
|
label="Select Dataset" |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Row(): |
|
|
prev_btn = gr.Button("Previous", elem_id="small-btn") |
|
|
next_btn = gr.Button("Next", elem_id="small-btn") |
|
|
page_number = gr.Number(value=0, label="Page", precision=0) |
|
|
total_pages_display = gr.Label(value=f"Total Pages: {total_pages}") |
|
|
|
|
|
|
|
|
data_table = gr.Dataframe( |
|
|
value=initial_df, headers=columns, datatype="str", |
|
|
interactive=False, row_count=ROWS_PER_PAGE |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Row(): |
|
|
col_dropdown = gr.Dropdown(choices=columns, label="Column") |
|
|
search_text = gr.Textbox(label="Search") |
|
|
search_btn = gr.Button("Search", elem_id="small-btn") |
|
|
reset_btn = gr.Button("Reset", elem_id="small-btn") |
|
|
|
|
|
|
|
|
def load_dataset(dataset_choice): |
|
|
global current_lazy_df |
|
|
current_lazy_df = lazy_rich if dataset_choice == "DatasetCards rich in information" else lazy_missing |
|
|
initial_df, total_pages = get_page(current_lazy_df, 0) |
|
|
columns = list(initial_df.columns) |
|
|
return ( |
|
|
gr.update(value=initial_df, headers=columns), |
|
|
f"Total Pages: {total_pages}", |
|
|
0, |
|
|
gr.update(choices=columns, value=columns[0]) |
|
|
) |
|
|
|
|
|
def next_page_func(page, column, query): |
|
|
page += 1 |
|
|
page_df, total_pages = get_page(current_lazy_df, page, column, query) |
|
|
if page >= total_pages: |
|
|
page = total_pages - 1 |
|
|
page_df, total_pages = get_page(current_lazy_df, page, column, query) |
|
|
return page_df, f"Total Pages: {total_pages}", page |
|
|
|
|
|
def prev_page_func(page, column, query): |
|
|
page -= 1 |
|
|
page = max(0, page) |
|
|
page_df, total_pages = get_page(current_lazy_df, page, column, query) |
|
|
return page_df, f"Total Pages: {total_pages}", page |
|
|
|
|
|
def search_func(column, query): |
|
|
page_df, total_pages = get_page(current_lazy_df, 0, column, query) |
|
|
return page_df, f"Total Pages: {total_pages}", 0 |
|
|
|
|
|
def reset_func(): |
|
|
page_df, total_pages = get_page(current_lazy_df, 0) |
|
|
return page_df, f"Total Pages: {total_pages}", 0 |
|
|
|
|
|
|
|
|
dataset_select.change(load_dataset, dataset_select, [data_table, total_pages_display, page_number, col_dropdown]) |
|
|
next_btn.click(next_page_func, [page_number, col_dropdown, search_text], [data_table, total_pages_display, page_number]) |
|
|
prev_btn.click(prev_page_func, [page_number, col_dropdown, search_text], [data_table, total_pages_display, page_number]) |
|
|
search_btn.click(search_func, [col_dropdown, search_text], [data_table, total_pages_display, page_number]) |
|
|
reset_btn.click(reset_func, [], [data_table, total_pages_display, page_number]) |
|
|
|
|
|
demo.launch() |
|
|
|