Carlex22222's picture
Update aduc_framework/managers/llama_scout_manager.py
07a4823 verified
raw
history blame
6.65 kB
# aduc_framework/managers/llama_scout_manager.py
#
# Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
#
# Versão 1.0.0 (Llama-4-Scout Multimodal Specialist)
#
# Este manager implementa um especialista multimodal de ponta, capaz de
# raciocinar sobre imagens e sequências de frames de vídeo para fornecer
# análises complexas.
import torch
import logging
import yaml
import imageio
import numpy as np
from PIL import Image
from transformers import AutoProcessor, AutoModelForCausalLM
import os
import gc
from typing import List
from ..tools.hardware_manager import hardware_manager
logger = logging.getLogger(__name__)
class LlamaScoutManager:
"""
Especialista Multimodal que utiliza o Llama-4-Scout para análise visual.
Mantém o modelo "quente" na GPU para performance máxima.
"""
MODEL_NAME = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
def __init__(self, device_id: str):
self.device = torch.device(device_id)
self.dtype = torch.bfloat16 if 'cuda' in self.device.type and torch.cuda.is_bf16_supported() else torch.float16
self.processor = None
self.model = None
self._initialize_and_warm_up_model()
def _initialize_and_warm_up_model(self):
"""
Baixa, carrega e move o modelo para a GPU imediatamente no startup.
"""
if self.model is not None:
return
try:
logger.info(f"INICIALIZAÇÃO QUENTE: Carregando MLLM '{self.MODEL_NAME}' para a GPU {self.device}...")
self.processor = AutoProcessor.from_pretrained(self.MODEL_NAME)
self.model = AutoModelForCausalLM.from_pretrained(
self.MODEL_NAME,
torch_dtype=self.dtype,
trust_remote_code=True # Necessário para modelos novos
).to(self.device)
logger.info(f"Modelo MLLM '{self.MODEL_NAME}' 'quente' e pronto na GPU {self.device}.")
except Exception as e:
logger.error(f"Falha CRÍTICA ao carregar o MLLM: {e}", exc_info=True)
self.model = None
def _cleanup_gpu_cache(self):
"""Limpa o cache da VRAM após uma inferência."""
if self.device.type == 'cuda':
gc.collect()
torch.cuda.empty_cache()
def answer_on_image(self, image_path: str, question: str) -> str:
"""Responde a uma pergunta baseada em uma única imagem."""
if self.model is None:
return "Error: Multimodal model not initialized. Check logs."
try:
image = Image.open(image_path).convert("RGB")
prompt = f"<|user|>\n<|image_1|>\n{question}<|end|>\n<|assistant|>\n"
inputs = self.processor(text=prompt, images=image, return_tensors="pt").to(self.device, self.dtype)
with torch.no_grad():
generated_ids = self.model.generate(**inputs, max_new_tokens=200, do_sample=False)
# Decodifica a resposta completa e depois extrai apenas a parte do assistente
full_response = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
clean_response = full_response.split("<|assistant|>")[1].strip()
logger.info(f"Question: '{question}' | Image: '{image_path}' -> Answer: '{clean_response}'")
return clean_response
except Exception as e:
logger.error(f"Error processing MLLM for image {image_path}: {e}", exc_info=True)
return f"Error analyzing the image: {e}"
finally:
self._cleanup_gpu_cache()
def analyze_video_movement(self, video_path: str) -> str:
"""
Analisa um vídeo extraindo frames e pedindo ao MLLM para descrever o movimento.
"""
logger.info(f"Iniciando ANÁLISE DE MOVIMENTO para o vídeo '{video_path}'...")
try:
frames = []
with imageio.get_reader(video_path, 'ffmpeg') as reader:
meta_data = reader.get_meta_data()
total_frames = meta_data.get('nframes', reader.count_frames())
if total_frames < 8:
return "Video is too short for movement analysis."
indices_to_sample = list(range(0, total_frames, 8))
frames = [Image.fromarray(reader.get_data(i)) for i in indices_to_sample]
# Constrói o prompt multimodal com múltiplas imagens
prompt_text = (
"<|user|>\n"
+ "".join([f"<|image_{i+1}|>\n" for i in range(len(frames))])
+ "You are a film analyst. The images above are sequential frames from a video, sampled at regular intervals. "
"Describe the movement, action, and narrative that unfolds across this sequence. Focus on what happens between the frames.<|end|>\n"
"<|assistant|>\n"
)
inputs = self.processor(text=prompt_text, images=frames, return_tensors="pt").to(self.device, self.dtype)
with torch.no_grad():
generated_ids = self.model.generate(**inputs, max_new_tokens=400, do_sample=False)
full_response = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
analysis = full_response.split("<|assistant|>")[1].strip()
logger.info(f"Video '{video_path}' analysis complete. Result: '{analysis[:100]}...'")
return analysis
except Exception as e:
logger.error(f"Error analyzing video movement for {video_path}: {e}", exc_info=True)
return f"Error during video movement analysis: {e}"
finally:
self._cleanup_gpu_cache()
# --- Instanciação Singleton ---
try:
with open("config.yaml", 'r') as f: config = yaml.safe_load(f)
# Renomeamos a seção no config para refletir o novo modelo
gpus_required = config['specialists'].get('llama_scout', {}).get('gpus_required', 1)
device_ids = hardware_manager.allocate_gpus('LlamaScout', gpus_required)
llama_scout_manager_singleton = LlamaScoutManager(device_id=device_ids[0])
except Exception as e:
logger.critical(f"Could not initialize LlamaScoutManager: {e}. Using a placeholder.", exc_info=True)
class LlamaScoutPlaceholder:
def answer_on_image(self, *args, **kwargs): return "Error: LlamaScout Specialist not initialized."
def analyze_video_movement(self, *args, **kwargs): return "Error: LlamaScout Specialist not initialized."
llama_scout_manager_singleton = LlamaScoutPlaceholder()