Qwen-Image-Edit_Fast-Presets

Running on Zero

File size: 10,845 Bytes

import gradio as gr
import numpy as np
import random
import torch
import spaces
from PIL import Image
from diffusers import QwenImageEditPipeline
from diffusers.utils import is_xformers_available
import os
import base64
import json
from huggingface_hub import InferenceClient
import logging
#############################
os.environ.setdefault('GRADIO_ANALYTICS_ENABLED', 'False')
os.environ.setdefault('HF_HUB_DISABLE_TELEMETRY', '1')
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
#############################
def get_caption_language(prompt):
    """Detects if the prompt contains Chinese characters."""
    ranges = [
        ('\u4e00', '\u9fff'),  # CJK Unified Ideographs
    ]
    for char in prompt:
        if any(start <= char <= end for start, end in ranges):
            return 'zh'
    return 'en'
def polish_prompt(original_prompt, system_prompt, hf_token):
    """
    Rewrites the prompt using a Hugging Face InferenceClient.
    Requires user-provided HF token for API access.
    """
    if not hf_token or not hf_token.strip():
        gr.Warning("HF Token is required for prompt rewriting but was not provided!")
        return original_prompt
    client = InferenceClient(
        provider="cerebras",
        api_key=hf_token,
    )
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": original_prompt}
    ]
    try:
        completion = client.chat.completions.create(
            model="Qwen/Qwen3-235B-A22B-Instruct-2507",
            messages=messages,
            max_tokens=512,
        )
        polished_prompt = completion.choices[0].message.content
        polished_prompt = polished_prompt.strip().replace("\n", " ")
        return polished_prompt
    except Exception as e:
        print(f"Error during Hugging Face API call: {e}")
        gr.Warning("Failed to rewrite prompt. Using original.")
        return original_prompt
SYSTEM_PROMPT_EDIT = '''
# Edit Instruction Rewriter
You are a professional edit instruction rewriter. Your task is to generate a precise, concise, and visually achievable instruction based on the user's intent and the input image.
## 1. General Principles
- Keep the rewritten instruction **concise** and clear.
- Avoid contradictions, vagueness, or unachievable instructions.
- Maintain the core logic of the original instruction; only enhance clarity and feasibility.
- Ensure new added elements or modifications align with the image's original context and art style.
## 2. Task Types
### Add, Delete, Replace:
- When the input is detailed, only refine grammar and clarity.
- For vague instructions, infer minimal but sufficient details.
- For replacement, use the format: `"Replace X with Y"`.
### Text Editing (e.g., text replacement):
- Enclose text content in quotes, e.g., `Replace "abc" with "xyz"`.
- Preserving the original structure and language—**do not translate** or alter style.
### Human Editing (e.g., change a person’s face/hair):
- Preserve core visual identity (gender, ethnic features).
- Describe expressions in subtle and natural terms.
- Maintain key clothing or styling details unless explicitly replaced.
### Style Transformation:
- If a style is specified, e.g., `Disco style`, rewrite it to encapsulate the essential visual traits.
- Use a fixed template for **coloring/restoration**:  
  `"Restore old photograph, remove scratches, reduce noise, enhance details, high resolution, realistic, natural skin tones, clear facial features, no distortion, vintage photo restoration"`  
  if applicable.
## 4. Output Format
Please provide the rewritten instruction in a clean `json` format as:
{
  "Rewritten": "..."
}
'''
dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = QwenImageEditPipeline.from_pretrained("Qwen/Qwen-Image-Edit", torch_dtype=dtype).to(device)
# Load LoRA weights for acceleration
pipe.load_lora_weights(
    "lightx2v/Qwen-Image-Lightning", weight_name="Qwen-Image-Lightning-8steps-V1.1.safetensors"
)
pipe.fuse_lora()
if is_xformers_available():
    pipe.enable_xformers_memory_efficient_attention()
else:
    print("xformers not available or failed to load.")
@spaces.GPU(duration=60)
def infer(
    image,
    prompt,
    seed=42,
    randomize_seed=False,
    true_guidance_scale=1.0,
    num_inference_steps=8,
    rewrite_prompt=False,
    hf_token="",
    num_images_per_prompt=1,
    progress=gr.Progress(track_tqdm=True),
):
    """
    Requires user-provided HF token for prompt rewriting.
    """
    original_prompt = prompt  # Save original prompt for display
    negative_prompt = " "
    prompt_info = ""  # Initialize info text
    
    # Handle prompt rewriting with status messages
    if rewrite_prompt:
        if not hf_token.strip():
            gr.Warning("HF Token is required for prompt rewriting but was not provided!")
            prompt_info = f"""## ⚠️ Prompt Rewriting Skipped (No HF Token)
**Original Prompt:**  
{original_prompt}"""
            rewritten_prompt = original_prompt
        else:
            try:
                rewritten_prompt = polish_prompt(original_prompt, SYSTEM_PROMPT_EDIT, hf_token)
                prompt_info = f"""## ✅ Prompt Rewrite Successful
**Original Prompt:**  
{original_prompt}

**Enhanced Prompt:**  
{rewritten_prompt}"""
            except Exception as e:
                gr.Warning(f"Prompt rewriting failed: {str(e)}")
                rewritten_prompt = original_prompt
                prompt_info = f"""## ❌ Prompt Rewrite Failed
**Original Prompt:**  
{original_prompt}
**Error:**  
{str(e)}"""
    else:
        rewritten_prompt = original_prompt
        prompt_info = f"""## Original Prompt (No Rewrite)
**User Input:**  
{original_prompt}"""
    
    # Generate images
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    generator = torch.Generator(device=device).manual_seed(seed)
    
    edited_images = pipe(
        image,
        prompt=rewritten_prompt,
        negative_prompt=negative_prompt,
        num_inference_steps=num_inference_steps,
        generator=generator,
        true_cfg_scale=true_guidance_scale,
        num_images_per_prompt=num_images_per_prompt,
    ).images
    
    return edited_images, seed, prompt_info

MAX_SEED = np.iinfo(np.int32).max
examples = [
    "Replace the cat with a friendly golden retriever. Make it look happier, and add more background details.",
    "Add text 'Qwen - AI for image editing' in Chinese at the bottom center with a small shadow.",
    "Change the style to 1970s vintage, add old photo effect, restore any scratches on the wall or window.",
    "Remove the blue sky and replace it with a dark night cityscape.",
    """Replace "Qwen" with "通义" in the Image. Ensure Chinese font is used for "通义" and position it to the top left with a light heading-style font."""
]

with gr.Blocks() as demo:
    gr.Markdown("# Qwen-Image-Edit [FAST] with HF Prompt Enhancement")
    gr.Markdown("✨ **8-step lightning inferencing with lightx2v's LoRA.**")
    gr.Markdown("⚠️ **Prompt rewriting requires your own [Hugging Face token](https://huggingface.co/settings/tokens)**")
    gr.Markdown("🚧 **Work in progress, further improvements coming soon.**")
    
    with gr.Row():
        with gr.Column():
            input_image = gr.Image(label="Input Image", type="pil")
            prompt = gr.Text(label="Edit Instruction", placeholder="e.g. Add a dog to the right side.")
            with gr.Accordion("Advanced Settings", open=False):
                seed = gr.Slider(
                    label="Seed",
                    minimum=0,
                    maximum=MAX_SEED,
                    step=1,
                    value=0
                )
                randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
                with gr.Row():
                    true_guidance_scale = gr.Slider(
                        label="True Guidance Scale",
                        minimum=1.0,
                        maximum=5.0,
                        step=0.1,
                        value=4.0
                    )
                    num_inference_steps = gr.Slider(
                        label="Inference Steps (Fast 8-step mode)",
                        minimum=4,
                        maximum=16,
                        step=1,
                        value=8
                    )
                    num_images_per_prompt = gr.Slider(
                        label="Images per Prompt",
                        minimum=1,
                        maximum=4,
                        step=1,
                        value=1
                    )
            run_button = gr.Button("Edit", variant="primary")
        
        with gr.Column():
            result = gr.Gallery(label="Output Images", show_label=False, columns=1)
            # New prompt display component
            prompt_info = gr.Markdown("## Prompt Details", visible=False)
            
            with gr.Group():
                rewrite_toggle = gr.Checkbox(label="Use Prompt Rewriter (Requires HF Token)", value=False, interactive=True)
                hf_token_input = gr.Textbox(
                    label="Your Hugging Face Token",
                    type="password",
                    placeholder="hf_xxxxxxxxxxxxxxxx",
                    visible=False,
                    info="Required for prompt rewriting - get yours from [Hugging Face settings](https://huggingface.co/settings/tokens). API tokens are kept safe locally, but as good practice please make sure to double check the source code. Invalid or missing keys will revert to the original prompt entered."
                )
                def toggle_token_visibility(checked):
                    return gr.update(visible=checked)
                
                rewrite_toggle.change(
                    toggle_token_visibility,
                    inputs=[rewrite_toggle],
                    outputs=[hf_token_input]
                )

    gr.on(
        triggers=[run_button.click, prompt.submit],
        fn=infer,
        inputs=[
            input_image,
            prompt,
            seed,
            randomize_seed,
            true_guidance_scale,
            num_inference_steps,
            rewrite_toggle,
            hf_token_input,
            num_images_per_prompt
        ],
        outputs=[result, seed, prompt_info]
    )
    
    # Show prompt info box after processing
    def set_prompt_visible():
        return gr.update(visible=True)
    
    run_button.click(
        fn=set_prompt_visible,
        inputs=None,
        outputs=[prompt_info],
        queue=False
    )
    prompt.submit(
        fn=set_prompt_visible,
        inputs=None,
        outputs=[prompt_info],
        queue=False
    )

if __name__ == "__main__":
    demo.launch()