Spaces:
Running
Running
| import requests | |
| import gradio as gr | |
| from urllib.parse import urlencode | |
| import os | |
| from datetime import datetime | |
| import json | |
| # Load environment variables | |
| DEFAULT_IMAGE = "https://hub-recap.imglab-cdn.net/default.jpg?width=1200&text=%3Cspan+size%3D%2212pt%22+weight%3D%22bold%22%3EHugging+Face++%E2%9D%A4%EF%B8%8F+bartowski+in+2024%3C%2Fspan%3E%0A%0A%3Cspan+weight%3D%22bold%22%3E2%2C020%2C552%3C%2Fspan%3E+model+downloads%0A%3Cspan+weight%3D%22bold%22%3E5%2C407%3C%2Fspan%3E+model+likes%0A%3Cspan+weight%3D%22bold%22%3E0%3C%2Fspan%3E+dataset+downloads%0A%3Cspan+weight%3D%22bold%22%3E0%3C%2Fspan%3E+dataset+likes%0A%0A%3Cspan+size%3D%2210pt%22%3EMost+Popular+Contributions%3A%3C%2Fspan%3E%0AModel%3A+%3Cspan+weight%3D%22bold%22%3Ebartowski%2Fgemma-2-9b-it-GGUF%3C%2Fspan%3E%0A++%2843%2C949+downloads%2C+196+likes%29%0ADataset%3A+%3Cspan+weight%3D%22bold%22%3ENone%3C%2Fspan%3E%0A++%280+downloads%2C+0+likes%29%0ASpace%3A+%3Cspan+weight%3D%22bold%22%3Ebartowski%2Fgguf-metadata-updater%3C%2Fspan%3E%0A++%287+likes%29&text-width=800&text-height=600&text-padding=60&text-color=39%2C71%2C111&text-x=460&text-y=40&format=png&dpr=2" | |
| # Load percentiles data | |
| with open("percentiles.json") as f: | |
| PERCENTILES = json.load(f) | |
| def get_percentile_rank(likes, category): | |
| if likes == 0: | |
| return 0 | |
| percentiles = PERCENTILES[f"{category}_percentiles"] | |
| if likes >= percentiles["p_99999"]: | |
| return 99.999 | |
| elif likes >= percentiles["p_9999"]: | |
| return 99.99 | |
| elif likes >= percentiles["p_999"]: | |
| return 99.9 | |
| return 0 | |
| def create_image(stats, username): | |
| # Determine which image to use based on highest value | |
| total_stats = stats["Total Statistics"] | |
| model_activity = total_stats["Model Downloads"] + total_stats["Model Likes"] | |
| dataset_activity = total_stats["Dataset Downloads"] + total_stats["Dataset Likes"] | |
| space_activity = total_stats["Space Likes"] | |
| # Calculate percentiles based on likes | |
| model_percentile = get_percentile_rank(total_stats["Model Likes"], "model") | |
| dataset_percentile = get_percentile_rank(total_stats["Dataset Likes"], "dataset") | |
| space_percentile = get_percentile_rank(space_activity, "space") | |
| # Choose base image URL based on highest activity (keep using activity for image selection) | |
| if model_activity == 0 and dataset_activity == 0 and space_activity == 0: | |
| url = "https://hub-recap.imglab-cdn.net/images/empty-v1.png" | |
| avatar = "newbie! We couldn't find your stats on the Hub, maybe in 2025?" | |
| elif model_activity >= max(dataset_activity, space_activity): | |
| url = "https://hub-recap.imglab-cdn.net/images/model-v1.png" | |
| avatar = f"Model Pro" + ( | |
| f" (top {100 - model_percentile}%)" if model_percentile > 0 else "" | |
| ) | |
| elif dataset_activity >= max(model_activity, space_activity): | |
| url = "https://hub-recap.imglab-cdn.net/images/dataset-v1.png" | |
| avatar = f"Dataset Guru" + ( | |
| f" (top {100 - dataset_percentile}%)" if dataset_percentile > 0 else "" | |
| ) | |
| elif space_activity >= max(model_activity, dataset_activity): | |
| url = "https://hub-recap.imglab-cdn.net/images/space-v1.png" | |
| avatar = f"Space Artiste" + ( | |
| f" (top {100 - space_percentile}%)" if space_percentile > 0 else "" | |
| ) | |
| else: | |
| url = "https://hub-recap.imglab-cdn.net/images/empty-v1.png" | |
| avatar = "newbie! We couldn't find your stats on the Hub, maybe in 2025?" | |
| # Build text content with proper formatting | |
| text_parts = [] | |
| text_parts.append( | |
| f'<span size="11pt" weight="bold">Hugging Face ❤️ {username} in 2024</span>' | |
| ) | |
| text_parts.append("") # Empty line for spacing | |
| # Stats section | |
| stats_lines = [] | |
| if total_stats["Model Downloads"] > 0: | |
| stats_lines.append( | |
| f'<span size="9pt"><span weight="bold">{total_stats["Model Downloads"]:,}</span> model downloads</span>' | |
| ) | |
| if total_stats["Model Likes"] > 0: | |
| stats_lines.append( | |
| f'<span size="9pt"><span weight="bold">{total_stats["Model Likes"]:,}</span> model likes</span>' | |
| ) | |
| if total_stats["Dataset Downloads"] > 0: | |
| stats_lines.append( | |
| f'<span size="9pt"><span weight="bold">{total_stats["Dataset Downloads"]:,}</span> dataset downloads</span>' | |
| ) | |
| if total_stats["Dataset Likes"] > 0: | |
| stats_lines.append( | |
| f'<span size="9pt"><span weight="bold">{total_stats["Dataset Likes"]:,}</span> dataset likes</span>' | |
| ) | |
| if total_stats["Space Likes"] > 0: | |
| stats_lines.append( | |
| f'<span size="9pt"><span weight="bold">{total_stats["Space Likes"]:,}</span> space likes</span>' | |
| ) | |
| if stats_lines: | |
| text_parts.extend(stats_lines) | |
| text_parts.append("") # Empty line for spacing | |
| # Popular items section | |
| top_items = stats["Most Popular Items"] | |
| if any( | |
| item["likes"] > 0 or item.get("downloads", 0) > 0 for item in top_items.values() | |
| ): | |
| text_parts.append( | |
| '<span size="9pt" weight="bold">Most Popular Contributions:</span>' | |
| ) | |
| if top_items["Top Model"]["downloads"] > 0: | |
| text_parts.append( | |
| f'<span size="9pt">Model: <span weight="bold">{top_items["Top Model"]["name"]}</span></span>' | |
| ) | |
| text_parts.append( | |
| f'<span size="9pt"> ({top_items["Top Model"]["downloads"]:,} downloads, {top_items["Top Model"]["likes"]} likes)</span>' | |
| ) | |
| if top_items["Top Dataset"]["downloads"] > 0: | |
| text_parts.append( | |
| f'<span size="9pt">Dataset: <span weight="bold">{top_items["Top Dataset"]["name"]}</span></span>' | |
| ) | |
| text_parts.append( | |
| f'<span size="9pt"> ({top_items["Top Dataset"]["downloads"]:,} downloads, {top_items["Top Dataset"]["likes"]} likes)</span>' | |
| ) | |
| if top_items["Top Space"]["likes"] > 0: | |
| text_parts.append( | |
| f'<span size="9pt">Space: <span weight="bold">{top_items["Top Space"]["name"]}</span></span>' | |
| ) | |
| text_parts.append( | |
| f'<span size="9pt"> ({top_items["Top Space"]["likes"]} likes)</span>' | |
| ) | |
| # Update the avatar message with percentile | |
| text_parts.append("") # Empty line for spacing | |
| text_parts.append(f'<span size="9pt">You are a {avatar}!</span>') | |
| # Add additional percentile info if other categories are significant | |
| other_percentiles = [] | |
| if model_percentile > 0 and "model" not in avatar.lower(): | |
| other_percentiles.append(f"Top {model_percentile}% in models") | |
| if dataset_percentile > 0 and "dataset" not in avatar.lower(): | |
| other_percentiles.append(f"Top {dataset_percentile}% in datasets") | |
| if space_percentile > 0 and "space" not in avatar.lower(): | |
| other_percentiles.append(f"Top {space_percentile}% in spaces") | |
| if other_percentiles: | |
| text_parts.append(f'<span size="9pt">{". ".join(other_percentiles)}!</span>') | |
| # Join all parts with newlines | |
| text = "\n".join(text_parts) | |
| params = { | |
| "width": "1200", | |
| "text": text, | |
| "text-width": "700", | |
| "text-height": "600", | |
| "text-padding": "30", | |
| "text-color": "39,71,111", | |
| "text-x": "460", | |
| "text-y": "40", | |
| "format": "png", | |
| "dpr": "2", | |
| } | |
| return f"{url}?{urlencode(params)}" | |
| def is_from_2024(created_at_str): | |
| if not created_at_str: | |
| return False | |
| created_at = datetime.strptime(created_at_str, "%Y-%m-%dT%H:%M:%S.%fZ") | |
| return created_at.year == 2024 | |
| def get_user_stats(username): | |
| headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"} | |
| # Get models stats | |
| models_response = requests.get( | |
| "https://huggingface.co/api/models", | |
| params={"author": username, "full": "False", "limit": 100, "sort": "downloads"}, | |
| headers=headers, | |
| ) | |
| # Filter for 2024 models only | |
| models = [ | |
| model | |
| for model in models_response.json() | |
| # if is_from_2024(model.get("createdAt")) | |
| ] | |
| # Get datasets stats | |
| datasets_response = requests.get( | |
| "https://huggingface.co/api/datasets", | |
| params={"author": username, "full": "True"}, | |
| headers=headers, | |
| ) | |
| # Filter for 2024 datasets only | |
| datasets = [ | |
| dataset | |
| for dataset in datasets_response.json() | |
| # if is_from_2024(dataset.get("createdAt")) | |
| ] | |
| # Get spaces stats | |
| spaces_response = requests.get( | |
| "https://huggingface.co/api/spaces", | |
| params={"author": username, "full": "True"}, | |
| headers=headers, | |
| ) | |
| # Filter for 2024 spaces only | |
| spaces = [ | |
| space | |
| for space in spaces_response.json() | |
| # if is_from_2024(space.get("createdAt")) | |
| ] | |
| # Calculate totals for 2024 items only | |
| total_model_downloads = sum(model.get("downloads", 0) for model in models) | |
| total_model_likes = sum(model.get("likes", 0) for model in models) | |
| total_dataset_downloads = sum(dataset.get("downloads", 0) for dataset in datasets) | |
| total_dataset_likes = sum(dataset.get("likes", 0) for dataset in datasets) | |
| total_space_likes = sum(space.get("likes", 0) for space in spaces) | |
| # Find most liked items from 2024 | |
| most_liked_model = max(models, key=lambda x: x.get("likes", 0), default=None) | |
| most_liked_dataset = max(datasets, key=lambda x: x.get("likes", 0), default=None) | |
| most_liked_space = max(spaces, key=lambda x: x.get("likes", 0), default=None) | |
| stats = { | |
| "Total Statistics": { | |
| "Model Downloads": total_model_downloads, | |
| "Model Likes": total_model_likes, | |
| "Dataset Downloads": total_dataset_downloads, | |
| "Dataset Likes": total_dataset_likes, | |
| "Space Likes": total_space_likes, | |
| }, | |
| "Most Popular Items": { | |
| "Top Model": { | |
| "name": ( | |
| most_liked_model.get("modelId", "None") | |
| if most_liked_model | |
| else "None" | |
| ), | |
| "likes": most_liked_model.get("likes", 0) if most_liked_model else 0, | |
| "downloads": ( | |
| most_liked_model.get("downloads", 0) if most_liked_model else 0 | |
| ), | |
| }, | |
| "Top Dataset": { | |
| "name": ( | |
| most_liked_dataset.get("id", "None") | |
| if most_liked_dataset | |
| else "None" | |
| ), | |
| "likes": ( | |
| most_liked_dataset.get("likes", 0) if most_liked_dataset else 0 | |
| ), | |
| "downloads": ( | |
| most_liked_dataset.get("downloads", 0) if most_liked_dataset else 0 | |
| ), | |
| }, | |
| "Top Space": { | |
| "name": ( | |
| most_liked_space.get("id", "None") if most_liked_space else "None" | |
| ), | |
| "likes": most_liked_space.get("likes", 0) if most_liked_space else 0, | |
| }, | |
| }, | |
| } | |
| # Generate image URL | |
| image_url = create_image(stats, username) | |
| return image_url | |
| with gr.Blocks(title="Hugging Face Community Stats") as demo: | |
| gr.Markdown("# Hugging Face Community Recap") | |
| gr.Markdown( | |
| "Enter a username to see their impact and top contributions across the Hugging Face Hub" | |
| ) | |
| with gr.Row(): | |
| username_input = gr.Textbox( | |
| label="Hub username", | |
| placeholder="Enter Hugging Face username...", | |
| scale=6, | |
| value="bartowski", | |
| ) | |
| submit_btn = gr.Button("Get Stats", scale=6) | |
| with gr.Row(): | |
| # Add example usernames | |
| gr.Examples( | |
| examples=[ | |
| ["merve"], | |
| ["mlabonne"], | |
| ["bartowski"], | |
| ["huggingface"], | |
| ["cfahlgren1"], | |
| ["argilla"], | |
| ], | |
| inputs=username_input, | |
| label="Try these examples", | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| stats_image = gr.Markdown(f"") | |
| def format_markdown(image_url): | |
| return f" \n\n * *Downloads are for the last 30 days, likes are for 2024*" | |
| # Handle submission | |
| submit_btn.click( | |
| fn=lambda x: format_markdown(get_user_stats(x)), | |
| inputs=username_input, | |
| outputs=stats_image, | |
| api_name="get_stats", | |
| ) | |
| # Also trigger on enter key | |
| username_input.submit( | |
| fn=lambda x: format_markdown(get_user_stats(x)), | |
| inputs=username_input, | |
| outputs=stats_image, | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |