Spaces:
Runtime error
Runtime error
| # aduc_framework/managers/llama_scout_manager.py | |
| # | |
| # Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos | |
| # | |
| # Versão 1.0.0 (Llama-4-Scout Multimodal Specialist) | |
| # | |
| # Este manager implementa um especialista multimodal de ponta, capaz de | |
| # raciocinar sobre imagens e sequências de frames de vídeo para fornecer | |
| # análises complexas. | |
| import torch | |
| import logging | |
| import yaml | |
| import imageio | |
| import numpy as np | |
| from PIL import Image | |
| from transformers import AutoProcessor, AutoModelForCausalLM | |
| import os | |
| import gc | |
| from typing import List | |
| from ..tools.hardware_manager import hardware_manager | |
| logger = logging.getLogger(__name__) | |
| class LlamaScoutManager: | |
| """ | |
| Especialista Multimodal que utiliza o Llama-4-Scout para análise visual. | |
| Mantém o modelo "quente" na GPU para performance máxima. | |
| """ | |
| MODEL_NAME = "meta-llama/Llama-4-Scout-17B-16E-Instruct" | |
| def __init__(self, device_id: str): | |
| self.device = torch.device(device_id) | |
| self.dtype = torch.bfloat16 if 'cuda' in self.device.type and torch.cuda.is_bf16_supported() else torch.float16 | |
| self.processor = None | |
| self.model = None | |
| self._initialize_and_warm_up_model() | |
| def _initialize_and_warm_up_model(self): | |
| """ | |
| Baixa, carrega e move o modelo para a GPU imediatamente no startup. | |
| """ | |
| if self.model is not None: | |
| return | |
| try: | |
| logger.info(f"INICIALIZAÇÃO QUENTE: Carregando MLLM '{self.MODEL_NAME}' para a GPU {self.device}...") | |
| self.processor = AutoProcessor.from_pretrained(self.MODEL_NAME) | |
| self.model = AutoModelForCausalLM.from_pretrained( | |
| self.MODEL_NAME, | |
| torch_dtype=self.dtype, | |
| trust_remote_code=True # Necessário para modelos novos | |
| ).to(self.device) | |
| logger.info(f"Modelo MLLM '{self.MODEL_NAME}' 'quente' e pronto na GPU {self.device}.") | |
| except Exception as e: | |
| logger.error(f"Falha CRÍTICA ao carregar o MLLM: {e}", exc_info=True) | |
| self.model = None | |
| def _cleanup_gpu_cache(self): | |
| """Limpa o cache da VRAM após uma inferência.""" | |
| if self.device.type == 'cuda': | |
| gc.collect() | |
| torch.cuda.empty_cache() | |
| def answer_on_image(self, image_path: str, question: str) -> str: | |
| """Responde a uma pergunta baseada em uma única imagem.""" | |
| if self.model is None: | |
| return "Error: Multimodal model not initialized. Check logs." | |
| try: | |
| image = Image.open(image_path).convert("RGB") | |
| prompt = f"<|user|>\n<|image_1|>\n{question}<|end|>\n<|assistant|>\n" | |
| inputs = self.processor(text=prompt, images=image, return_tensors="pt").to(self.device, self.dtype) | |
| with torch.no_grad(): | |
| generated_ids = self.model.generate(**inputs, max_new_tokens=200, do_sample=False) | |
| # Decodifica a resposta completa e depois extrai apenas a parte do assistente | |
| full_response = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
| clean_response = full_response.split("<|assistant|>")[1].strip() | |
| logger.info(f"Question: '{question}' | Image: '{image_path}' -> Answer: '{clean_response}'") | |
| return clean_response | |
| except Exception as e: | |
| logger.error(f"Error processing MLLM for image {image_path}: {e}", exc_info=True) | |
| return f"Error analyzing the image: {e}" | |
| finally: | |
| self._cleanup_gpu_cache() | |
| def analyze_video_movement(self, video_path: str) -> str: | |
| """ | |
| Analisa um vídeo extraindo frames e pedindo ao MLLM para descrever o movimento. | |
| """ | |
| logger.info(f"Iniciando ANÁLISE DE MOVIMENTO para o vídeo '{video_path}'...") | |
| try: | |
| frames = [] | |
| with imageio.get_reader(video_path, 'ffmpeg') as reader: | |
| meta_data = reader.get_meta_data() | |
| total_frames = meta_data.get('nframes', reader.count_frames()) | |
| if total_frames < 8: | |
| return "Video is too short for movement analysis." | |
| indices_to_sample = list(range(0, total_frames, 8)) | |
| frames = [Image.fromarray(reader.get_data(i)) for i in indices_to_sample] | |
| # Constrói o prompt multimodal com múltiplas imagens | |
| prompt_text = ( | |
| "<|user|>\n" | |
| + "".join([f"<|image_{i+1}|>\n" for i in range(len(frames))]) | |
| + "You are a film analyst. The images above are sequential frames from a video, sampled at regular intervals. " | |
| "Describe the movement, action, and narrative that unfolds across this sequence. Focus on what happens between the frames.<|end|>\n" | |
| "<|assistant|>\n" | |
| ) | |
| inputs = self.processor(text=prompt_text, images=frames, return_tensors="pt").to(self.device, self.dtype) | |
| with torch.no_grad(): | |
| generated_ids = self.model.generate(**inputs, max_new_tokens=400, do_sample=False) | |
| full_response = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
| analysis = full_response.split("<|assistant|>")[1].strip() | |
| logger.info(f"Video '{video_path}' analysis complete. Result: '{analysis[:100]}...'") | |
| return analysis | |
| except Exception as e: | |
| logger.error(f"Error analyzing video movement for {video_path}: {e}", exc_info=True) | |
| return f"Error during video movement analysis: {e}" | |
| finally: | |
| self._cleanup_gpu_cache() | |
| # --- Instanciação Singleton --- | |
| try: | |
| with open("config.yaml", 'r') as f: config = yaml.safe_load(f) | |
| # Renomeamos a seção no config para refletir o novo modelo | |
| gpus_required = config['specialists'].get('llama_scout', {}).get('gpus_required', 1) | |
| device_ids = hardware_manager.allocate_gpus('LlamaScout', gpus_required) | |
| llama_scout_manager_singleton = LlamaScoutManager(device_id=device_ids[0]) | |
| except Exception as e: | |
| logger.critical(f"Could not initialize LlamaScoutManager: {e}. Using a placeholder.", exc_info=True) | |
| class LlamaScoutPlaceholder: | |
| def answer_on_image(self, *args, **kwargs): return "Error: LlamaScout Specialist not initialized." | |
| def analyze_video_movement(self, *args, **kwargs): return "Error: LlamaScout Specialist not initialized." | |
| llama_scout_manager_singleton = LlamaScoutPlaceholder() |