Aduc-sdr-2_5s

Paused

App Files Files Community

Aduc-sdr-2_5s / app.py

euIaxs22

Update app.py

aa91947 verified about 2 months ago

raw

history blame

6.4 kB

	import os, random, tempfile
	import gradio as gr
	import torch
	import numpy as np
	from PIL import Image

	from diffusers import LTXConditionPipeline
	from diffusers.pipelines.ltx.pipeline_ltx_condition import LTXVideoCondition
	from diffusers.utils import export_to_video

	# Modelo LTX-Video via Diffusers
	MODEL_REPO = os.getenv("LTX_REPO", "Lightricks/LTX-Video")

	# Parâmetros básicos
	FPS = 24
	MAX_FRAMES = 161
	MIN_DIM = 256
	MAX_DIM = 1280

	device = "cuda" if torch.cuda.is_available() else "cpu"

	# Helpers de tipo
	def _to_int(x, d):
	if isinstance(x, (list, tuple)):
	x = x[0] if x else d
	try:
	return int(x)
	except Exception:
	return d

	def _to_float(x, d):
	if isinstance(x, (list, tuple)):
	x = x[0] if x else d
	try:
	return float(x)
	except Exception:
	return d

	def _to_bool(x, d=True):
	if isinstance(x, (list, tuple)):
	x = x[0] if x else d
	return bool(x)

	# Frames alinhados a (8k+1)
	def _frames_from_secs(secs):
	secs = _to_float(secs, 2.0)
	n = max(9, int(round(secs * FPS)))
	k = round((n - 1) / 8.0)
	return int(max(9, min(MAX_FRAMES, k * 8 + 1)))

	def _pad32(v):
	return ((v - 1) // 32 + 1) * 32

	def _dims_for_image(path, target=768):
	im = Image.open(path)
	w, h = im.size
	if w >= h:
	new_h = target
	new_w = int(round((w / max(1, h)) * new_h / 32) * 32)
	else:
	new_w = target
	new_h = int(round((h / max(1, w)) * new_w / 32) * 32)
	new_h = max(MIN_DIM, min(new_h, MAX_DIM))
	new_w = max(MIN_DIM, min(new_w, MAX_DIM))
	return new_h, new_w

	print(f"Carregando {MODEL_REPO} (LTXConditionPipeline)...")
	pipe = LTXConditionPipeline.from_pretrained(
	MODEL_REPO,
	torch_dtype=torch.bfloat16, # simples e estável; FP8 opcional pode ser adicionado depois
	)
	pipe.to(device)

	# Desliga dynamic shifting no scheduler para não exigir 'mu'
	if hasattr(pipe, "scheduler") and hasattr(pipe.scheduler, "use_dynamic_shifting"):
	pipe.scheduler.use_dynamic_shifting = False

	# Tiling do VAE para reduzir picos de VRAM
	if hasattr(pipe, "vae") and hasattr(pipe.vae, "enable_tiling"):
	pipe.vae.enable_tiling()

	def handle_dims(image_path, cur_h, cur_w):
	if not image_path:
	return gr.update(value=cur_h), gr.update(value=cur_w)
	try:
	h, w = _dims_for_image(image_path, 768)
	return gr.update(value=h), gr.update(value=w)
	except Exception as e:
	print(f"Erro ao ajustar dimensões: {e}")
	return gr.update(value=cur_h), gr.update(value=cur_w)

	def generate_i2v(
	prompt,
	neg_prompt,
	image_path,
	height_ui,
	width_ui,
	duration_ui,
	seed_ui,
	randomize_seed,
	guidance_ui,
	denoise_ui,
	image_noise_ui,
	progress=gr.Progress(track_tqdm=True)
	):
	if not image_path:
	raise gr.Error("Selecione uma imagem.")
	h = _to_int(height_ui, 512)
	w = _to_int(width_ui, 704)
	h_pad = _pad32(h)
	w_pad = _pad32(w)
	num_frames = _frames_from_secs(duration_ui)

	# Parâmetros de qualidade estáveis
	guidance_scale = _to_float(guidance_ui, 5.0) # 4.0–6.0 funcionam bem
	denoise_strength = _to_float(denoise_ui, 0.4) # 0.3–0.5 preserva bem a imagem
	image_cond_noise_scale = _to_float(image_noise_ui, 0.0) # 0.0 fixa a aparência; 0.01–0.03 relaxa

	seed = _to_int(seed_ui, 42)
	if _to_bool(randomize_seed, True):
	seed = random.randint(0, 2**32 - 1)

	# Condição: imagem como primeiro frame
	img = Image.open(image_path).convert("RGB")
	cond = LTXVideoCondition(image=img, frame_index=0, strength=1.0)

	gen = torch.Generator(device=device).manual_seed(seed)

	progress(0.0, desc="Gerando vídeo...")
	out = pipe(
	conditions=[cond],
	prompt=prompt,
	negative_prompt=neg_prompt,
	width=w_pad,
	height=h_pad,
	num_frames=num_frames,
	num_inference_steps=30, # simples e estável
	#guidance_scale=guidance_scale,
	#guidance_rescale=0.7, # ajuda a estabilizar CFG
	#decode_timestep=0.05, # valores seguros para >=0.9.1
	#decode_noise_scale=0.025,
	#image_cond_noise_scale=image_cond_noise_scale,
	#denoise_strength=denoise_strength,
	generator=gen,
	output_type="pil",
	)
	frames = out.frames[0]

	tmp = tempfile.mkdtemp()
	out_path = os.path.join(tmp, f"output_{random.randint(10000,99999)}.mp4")
	progress(0.8, desc="Salvando vídeo")
	export_to_video(frames, out_path, fps=FPS)
	return out_path, int(seed)

	# UI simples
	with gr.Blocks() as demo:
	gr.Markdown("LTX I2V (Diffusers) simples com denoise e dynamic shifting desligado")
	with gr.Row():
	with gr.Column():
	img = gr.Image(label="Imagem", type="filepath")
	prompt = gr.Textbox(label="Prompt", value="Subject moves gently; subtle camera push-in", lines=2)
	neg = gr.Textbox(label="Negative", value="worst quality, jitter, blur, distortions", lines=2)
	dur = gr.Slider(label="Duração (s)", minimum=0.5, maximum=8.0, step=0.1, value=2.0)
	with gr.Row():
	h = gr.Slider(label="Altura", minimum=MIN_DIM, maximum=MAX_DIM, step=32, value=512)
	w = gr.Slider(label="Largura", minimum=MIN_DIM, maximum=MAX_DIM, step=32, value=704)
	with gr.Accordion("Avançado", open=False):
	seed = gr.Number(label="Seed", value=42, precision=0, minimum=0, maximum=2**32-1)
	rand = gr.Checkbox(label="Randomize seed", value=True)
	guidance = gr.Slider(label="Guidance scale", minimum=1.0, maximum=10.0, step=0.1, value=5.0)
	denoise = gr.Slider(label="Denoise strength", minimum=0.0, maximum=1.0, step=0.05, value=0.4)
	image_noise = gr.Slider(label="Image cond noise", minimum=0.0, maximum=0.2, step=0.005, value=0.0)
	btn = gr.Button("Gerar", variant="primary")
	with gr.Column():
	vid = gr.Video(label="Vídeo")

	img.upload(handle_dims, [img, h, w], [h, w])
	btn.click(
	generate_i2v,
	[prompt, neg, img, h, w, dur, seed, rand, guidance, denoise, image_noise],
	[vid, seed]
	)

	if __name__ == "__main__":
	# Para integração MCP: instale gradio[mcp] e adicione mcp_server=True se necessário.
	demo.queue().launch(debug=True, share=False)