import gradio as gr
import random
import os
import spaces
import torch
import time
import json
import numpy as np
from diffusers import BriaFiboPipeline
from diffusers.modular_pipelines import ModularPipeline

from optimization import optimize_pipeline_

# resolutions=[
#     "832 1248",
#     "896 1152",
#     "960 1088",
#     "1024 1024",
#     "1088 960",
#     "1152 896",
#     "1216 832",
#     "1280 800",
#     "1344 768",
# ]
MAX_SEED = np.iinfo(np.int32).max
dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"

torch.set_grad_enabled(False)
vlm_pipe = ModularPipeline.from_pretrained("briaai/FIBO-VLM-prompt-to-JSON", trust_remote_code=True).to(device)

pipe = BriaFiboPipeline.from_pretrained(
        "briaai/FIBO",
        trust_remote_code=True,
        torch_dtype=dtype).to(device)
test_prompt_json = 
"""
{
  "short_description": "A surreal and whimsical scene featuring a man, a woman, and a dog posed against a tri-colored backdrop. The woman stands in front of the red section, wearing a t-shirt with a Yoda motif and a skirt with birds. The dog, dressed as a superdog, sits on a checkerboard chair in front of the white section, with a blue tennis ball in its mouth. The man, in a purple suit, stands in front of the gold section, holding a tree branch with a blue jay. The backdrop is divided into red, white, and gold sections, with a small metal grating in the top left and a tear in the gold section. A rustic framed oil painting of the pyramids hangs above the dog.",
  "objects": [
    {
      "description": "A woman standing in front of the red backdrop. She is wearing a beige t-shirt with a Yoda motif and a long skirt with birds on it. Her right hand is holding an axe.",
      "location": "Center-left",
      "relationship": "She is positioned in front of the red backdrop and to the left of the dog and man.",
      "relative_size": "Medium",
      "shape_and_color": "Humanoid shape, beige and multicolored clothing.",
      "appearance_details": "She has a long skirt with birds on it and is holding an axe.",
      "pose": "Standing upright with a slight tilt to the right.",
      "expression": "Neutral",
      "clothing": "She is wearing a beige t-shirt with a Yoda motif and a long skirt with birds on it.",
      "action": "Standing",
      "gender": "Female",
      "skin_tone_and_texture": "Fair, smooth."
    },
    {
      "description": "A dog dressed as a superdog, sitting on a checkerboard chair in front of the white backdrop. It has a blue tennis ball in its mouth.",
      "location": "Center",
      "relationship": "It is positioned in front of the white backdrop and between the woman and the man.",
      "relative_size": "Medium",
      "shape_and_color": "Canine shape, brown and white fur, blue tennis ball.",
      "appearance_details": "It is dressed as a superdog and has a blue tennis ball in its mouth.",
      "pose": "Sitting upright.",
      "expression": "Neutral",
      "clothing": "Superdog costume.",
      "action": "Sitting",
      "gender": "Male",
      "skin_tone_and_texture": "Brown and white fur, soft."
    },
    {
      "description": "A man standing in front of the gold backdrop. He is wearing a three piece purple suit and has spiky blue hair. His left hand is holding a tree branch with a blue jay on it.",
      "location": "Center-right",
      "relationship": "He is positioned in front of the gold backdrop and to the right of the woman and dog.",
      "relative_size": "Medium",
      "shape_and_color": "Humanoid shape, purple suit, blue hair.",
      "appearance_details": "He has spiky blue hair and is holding a tree branch with a blue jay on it.",
      "pose": "Standing upright with a slight tilt to the left.",
      "expression": "Neutral",
      "clothing": "He is wearing a three piece purple suit.",
      "action": "Standing",
      "gender": "Male",
      "skin_tone_and_texture": "Fair, smooth."
    },
    {
      "description": "A checkerboard armchair in yellow and brown.",
      "location": "Bottom-center",
      "relationship": "The dog is sitting on the chair.",
      "relative_size": "Small",
      "shape_and_color": "Chair shape, yellow and brown.",
      "texture": "Smooth. End of texture answer.",
      "appearance_details": "The chair is a checkerboard armchair in yellow and brown."
    },
    {
      "description": "A rustic framed oil painting of the pyramids.",
      "location": "Top-center",
      "relationship": "The painting is hanging above the dog.",
      "relative_size": "Small",
      "shape_and_color": "Rectangular shape, brown frame, yellow and brown pyramids.",
      "texture": "Rough. End of texture answer.",
      "appearance_details": "The painting is a rustic framed oil painting of the pyramids."
    }
  ],
  "background_setting": "The background is a tri-colored backdrop divided equally into red, white, and gold sections. There is a small rectangular metal grating in the top left corner and a subtle tear in the gold backdrop in the bottom right corner.",
  "lighting": {
    "conditions": "Studio lighting",
    "direction": "Front-lit",
    "shadows": "Soft shadows are present, indicating diffused lighting."
  },
  "aesthetics": {
    "composition": "The composition is centered, with the three figures arranged in a row. The backdrop is divided into thirds, creating a symmetrical balance.",
    "color_scheme": "The color scheme is triadic, with red, white, and gold dominating the backdrop, complemented by the various colors of the figures' clothing and accessories.",
    "mood_atmosphere": "The mood is whimsical and surreal, with a touch of humor due to the unusual costumes and props.",
    "preference_score": "high",
    "aesthetic_score": "high"
  },
  "photographic_characteristics": {
    "depth_of_field": "Deep",
    "focus": "Sharp focus on all subjects",
    "camera_angle": "Eye-level",
    "lens_focal_length": "Standard"
  },
  "style_medium": "Photograph",
  "text_render": [
    {
      "text": "Yoda",
      "location": "Center of the woman's t-shirt",
      "size": "Small",
      "color": "Beige",
      "font": "Cartoonish",
      "appearance_details": "The text is part of a graphic design on the t-shirt."
    }
  ],
  "context": "This is a surreal and whimsical portrait of a man, a woman, and a dog posed against a tri-colored backdrop. It could be an art piece or a promotional image for a quirky event or product.",
  "artistic_style": "Surreal Pop"
}
"""
optimize_pipeline_(pipe, test_prompt_json)

def handle_json(text):
    try:
        json.loads(text)
        return text 
    except:
        return "Error"
            

@spaces.GPU(duration=100)
def infer(prompt,
          negative_prompt="",
          seed=42,
          randomize_seed=False,
          width=1024,
          height=1024,
          guidance_scale=5,
          num_inference_steps=50,
         ):
    
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    
    t=time.time()

    
    with torch.inference_mode():
        # 1. Create a prompt to generate an initial image
        output = vlm_pipe(prompt=prompt)
        json_prompt = output.values["json_prompt"]


        image = pipe(prompt=json_prompt, 
                                 num_inference_steps=num_inference_steps, 
                                 negative_prompt=negative_prompt,
                                 width=width,height=height,
                                 guidance_scale=guidance_scale).images[0]

    
    return image, json_prompt

css = """
#col-container{
    margin: 0 auto;
    max-width: 768px;
}
"""
with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.Markdown("## FOBI")
        
        with gr.Group():
            with gr.Column():
                with gr.Row():
                    prompt_in = gr.Textbox(label="Prompt")
                    prompt_in_json = gr.JSON(label="Json")
   
                submit_btn = gr.Button("Generate")
        result = gr.Image(label="output")
        with gr.Accordion("Advanced Settings", open=False):
                    with gr.Row():
                        seed = gr.Slider(
                            label="Seed",
                            minimum=0,
                            maximum=MAX_SEED,
                            step=1,
                            value=0,
                        )
            
                        randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
                    
                    with gr.Row():
                        guidance_scale = gr.Slider(
                            label="guidance scale",
                            minimum=1.0,
                            maximum=10.0,
                            step=0.1,
                            value=5.0
                        )
                        num_inference_steps = gr.Slider(
                            label="number of inference steps",
                            minimum=1,
                            maximum=60,
                            step=1,
                            value=50,
                        )
                        height = gr.Slider(
                            label="Height",
                            minimum=768,
                            maximum=1248,
                            step=32,
                            value=1024,
                        )
                        
                        width = gr.Slider(
                            label="Width",
                            minimum=832,
                            maximum=1344,
                            step=64,
                            value=1024,
                        )
                    with gr.Row():
                        negative_prompt = gr.Textbox(label="negative prompt", value=json.dumps(''))
                        negative_prompt_json = gr.JSON(label="json negative prompt", value=json.dumps(''))

    # prompt_in.change(
    #     handle_json, 
    #     inputs=prompt_in, 
    #     outputs=prompt_in_json)

    # negative_prompt.change(handle_json, inputs=negative_prompt, outputs=negative_prompt_json)
    
    submit_btn.click(
        fn = infer,
        inputs = [
            prompt_in,
            negative_prompt,
            seed,
            randomize_seed,
            width,
            height,
            guidance_scale,
            num_inference_steps,
        ],
        outputs = [
            result, prompt_in_json
        ]
    )
demo.queue().launch()