Spaces:

abdull4h
/

C4AI-Arabic-Playground

Sleeping

App Files Files Community

abdull4h commited on Mar 2

Commit

d43130f

verified ·

1 Parent(s): ae02789

Update app.py

Browse files

Files changed (1) hide show

app.py +104 -676

app.py CHANGED Viewed

@@ -1,136 +1,104 @@
 import os
 import re
-import gradio as gr
-from huggingface_hub import login
-import spaces
-# CRITICAL: Disable PyTorch compiler BEFORE importing torch
-os.environ["PYTORCH_NO_CUDA_MEMORY_CACHING"] = "1"
-os.environ["TORCH_COMPILE_DISABLE"] = "1"
-os.environ["TORCH_INDUCTOR_DISABLE"] = "1"
-os.environ["TORCHINDUCTOR_DISABLE_CUDAGRAPHS"] = "1"
-os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
-os.environ["TORCH_USE_CUDA_DSA"] = "0"
-# Now import torch and disable its compiler features
 import torch
-if hasattr(torch, "_dynamo"):
-    if hasattr(torch._dynamo, "config"):
-        torch._dynamo.config.suppress_errors = True
-    if hasattr(torch._dynamo, "disable"):
-        torch._dynamo.disable()
-        print("Disabled torch._dynamo")
 # Model ID
 model_id = "CohereForAI/c4ai-command-r7b-arabic-02-2025"
-# Get token from environment and login
-hf_token = os.environ.get("HF_TOKEN")
-if hf_token:
-    login(token=hf_token)
-    print("Logged in with HF_TOKEN")
-else:
-    print("No HF_TOKEN found. Please set the HF_TOKEN environment variable.")
-# Import transformers
-from transformers import AutoTokenizer, AutoModelForCausalLM
-# Simpler clean_response function
-def clean_response(text):
-    # Remove website references
-    text = re.sub(r'- موقع .*?\n', '', text)
-    # Remove dates
-    text = re.sub(r'\d+ [فبراير|مارس|أبريل|مايو|يونيو|يوليو|أغسطس|سبتمبر|أكتوبر|نوفمبر|ديسمبر]+ \d+ - \d+:\d+ [صباحا|مساء|ص|م]', '', text)
-    # Remove repeated questions
-    text = re.sub(r'(\?[^?]*){2,}', '?', text)
-    # Remove excessive repetition (sentences that repeat)
-    lines = text.split('،')
-    unique_lines = []
-    for line in lines:
-        if line.strip() and line.strip() not in unique_lines:
-            unique_lines.append(line.strip())
-    return '، '.join(unique_lines)
-# Generate text with the Arabic model
-@spaces.GPU
-def generate_text(prompt, max_length=100, temperature=0.7, force_arabic=True):
-    if not prompt.strip():
-        return "Please enter a prompt."
-    try:
-        # Load tokenizer and model
-        print("Loading tokenizer...")
-        tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
-        print("Loading model with compiler disabled...")
-        model = AutoModelForCausalLM.from_pretrained(
-            model_id,
-            token=hf_token,
-            torch_dtype=torch.float16,
-            device_map="auto",
-            use_cache=True,
-            use_flash_attention_2=False,
-            _attn_implementation="eager"
-        )
-        print(f"Model loaded successfully on {next(model.parameters()).device}")
-        """
-        # For Arabic-focused prompting, add a language instruction if needed
-        if force_arabic and not any(arabic_word in prompt for arabic_word in ["العربية", "بالعربي", "باللغة العربية"]):
-            # Add Arabic instruction only if prompt is already in Arabic
-            if any('\u0600' <= c <= '\u06FF' for c in prompt):
-                enhanced_prompt = prompt + " (أجب باللغة العربية)"
-                print(f"Added Arabic language hint: {enhanced_prompt}")
-            else:
-                enhanced_prompt = prompt
-        else:
-            enhanced_prompt = prompt
-        """
-        # Replace with this line:
-        enhanced_prompt = prompt
-        # Create input for the model using proper tokenization with attention mask
-        print(f"Generating response for: {enhanced_prompt[:50]}...")
-        # Try to use a more direct approach
-        encoding = tokenizer(prompt, return_tensors="pt")
-        input_ids = encoding.input_ids.to(model.device)
-        attention_mask = encoding.attention_mask.to(model.device)
-        print(f"Input shape: {input_ids.shape}, Attention mask shape: {attention_mask.shape}")
-        # Add repetition penalty
         with torch.inference_mode():
-            output = model.generate(
-                input_ids=input_ids,
-                attention_mask=attention_mask,
-                max_new_tokens=int(max_length),
-                do_sample=True,
-                temperature=0.7,
-                repetition_penalty=1.2,  # Add this parameter
-                no_repeat_ngram_size=3,  # And this one
                 pad_token_id=tokenizer.eos_token_id
             )
-        # Get only the generated part (exclude the prompt)
-        input_length = input_ids.shape[1]
-        generated_tokens = output[0][input_length:]
-        # Decode just the generated part
-        generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)
-        print(f"Generated text (after input): {generated_text[:100]}...")
-        # Clean any remaining special tokens
-        cleaned_response = clean_response(generated_text)
-        print(f"Final cleaned response: {cleaned_response[:100]}...")
-        return cleaned_response
     except Exception as e:
         import traceback
@@ -138,564 +106,24 @@ def generate_text(prompt, max_length=100, temperature=0.7, force_arabic=True):
         print(f"Error generating text: {str(e)}\n{tb}")
         return f"Error generating text: {str(e)}"
-# Keep the existing code, but replace the custom_css with the following:
-custom_css = """
-/* Enhanced Color Scheme and UI for Arabic Language Model */
-:root {
-    --primary-color: #1F4287;         /* Deep Royal Blue */
-    --secondary-color: #278EA5;       /* Teal Blue */
-    --background-color: #F9FAFC;      /* Soft Light Gray */
-    --text-color: #333942;            /* Dark Slate */
-    --accent-color: #21BF73;          /* Vibrant Green */
-    --highlight-color: #FF6B6B;       /* Coral Red */
-    --header-gradient: linear-gradient(135deg, #1F4287 0%, #278EA5 100%);
-    --card-shadow: 0 10px 30px rgba(31, 66, 135, 0.12);
-    --input-bg: #F2F5F9;              /* Light Blue-Gray for inputs */
-    --border-radius: 16px;            /* Consistent border radius */
-}
-/* Base Styles */
-.gradio-container {
-    background: var(--background-color);
-    color: var(--text-color);
-    font-family: 'Cairo', 'Noto Sans Arabic', 'Helvetica Neue', 'Arial', sans-serif;
-    max-width: 1200px;
-    margin: 0 auto;
-    padding: 20px;
-}
-/* Typography */
-.gradio-container h1 {
-    color: var(--primary-color);
-    font-size: 2.5rem;
-    text-align: center;
-    margin-bottom: 0.5rem;
-    font-weight: 800;
-}
-.gradio-container h2 {
-    color: var(--secondary-color);
-    font-size: 1.5rem;
-    text-align: center;
-    margin-bottom: 2rem;
-    font-weight: 600;
-}
-.gradio-container h3 {
-    color: var(--secondary-color);
-    font-size: 1.25rem;
-    margin-top: 1.5rem;
-    margin-bottom: 1rem;
-    font-weight: 600;
-}
-/* Card-style Blocks */
-.gradio-container .block {
-    background-color: white;
-    border-radius: var(--border-radius);
-    box-shadow: var(--card-shadow);
-    border: none;
-    padding: 30px;
-    margin: 24px 0;
-    transition: all 0.3s ease;
-}
-.gradio-container .block:hover {
-    box-shadow: 0 15px 40px rgba(31, 66, 135, 0.18);
-    transform: translateY(-5px);
-}
-/* Header Style */
-.gradio-container .header {
-    background: var(--header-gradient);
-    color: white;
-    padding: 30px;
-    border-radius: var(--border-radius);
-    margin-bottom: 30px;
-    text-align: center;
-    position: relative;
-    overflow: hidden;
-}
-.gradio-container .header::before {
-    content: '';
-    position: absolute;
-    top: 0;
-    left: 0;
-    right: 0;
-    bottom: 0;
-    background: url('data:image/svg+xml;utf8,<svg xmlns="http://www.w3.org/2000/svg" width="100" height="100" viewBox="0 0 100 100"><text x="50%" y="50%" font-size="80" text-anchor="middle" dominant-baseline="middle" font-family="Arial" fill="rgba(255,255,255,0.05)">ذ</text></svg>') repeat;
-    opacity: 0.1;
-}
-.gradio-container .header h1,
-.gradio-container .header h2 {
-    color: white;
-    text-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
-}
-/* Input and Output Containers */
-.gradio-container .input-container,
-.gradio-container .output-container {
-    background-color: white;
-    border-radius: var(--border-radius);
-    box-shadow: var(--card-shadow);
-    padding: 25px;
-    margin-bottom: 25px;
-    transition: all 0.3s ease;
-}
-.gradio-container .input-container:hover,
-.gradio-container .output-container:hover {
-    box-shadow: 0 15px 40px rgba(31, 66, 135, 0.15);
-}
-.gradio-container .block-title {
-    color: var(--primary-color);
-    font-weight: bold;
-    text-align: center;
-    margin-bottom: 20px;
-    font-size: 1.5rem;
-    position: relative;
-    padding-bottom: 10px;
-}
-.gradio-container .block-title::after {
-    content: '';
-    position: absolute;
-    bottom: 0;
-    left: 50%;
-    transform: translateX(-50%);
-    width: 60px;
-    height: 3px;
-    background: var(--accent-color);
-    border-radius: 3px;
-}
-/* Textareas and Inputs */
-.gradio-container textarea,
-.gradio-container input[type="text"] {
-    background-color: var(--input-bg);
-    border: 2px solid transparent;
-    border-radius: calc(var(--border-radius) - 4px);
-    color: var(--text-color);
-    direction: rtl;
-    padding: 15px;
-    transition: all 0.3s ease;
-    font-size: 1.05rem;
-    line-height: 1.6;
-    resize: vertical;
-}
-.gradio-container textarea::placeholder,
-.gradio-container input[type="text"]::placeholder {
-    color: #9EA7B3;
-}
-.gradio-container textarea:focus,
-.gradio-container input[type="text"]:focus {
-    border-color: var(--accent-color);
-    box-shadow: 0 0 0 3px rgba(33, 191, 115, 0.2);
-    outline: none;
-}
-/* Labels */
-.gradio-container label {
-    color: var(--primary-color);
-    font-weight: 600;
-    margin-bottom: 8px;
-    display: block;
-    font-size: 1.05rem;
-}
-/* Buttons */
-.gradio-container .primary {
-    background: linear-gradient(135deg, var(--secondary-color) 0%, var(--accent-color) 100%) !important;
-    color: white !important;
-    border-radius: calc(var(--border-radius) - 4px);
-    transition: all 0.3s ease;
-    font-weight: bold;
-    padding: 12px 24px !important;
-    border: none !important;
-    font-size: 1.1rem;
-    box-shadow: 0 4px 15px rgba(33, 191, 115, 0.3);
-    text-align: center;
-}
-.gradio-container .primary:hover {
-    transform: translateY(-3px);
-    box-shadow: 0 8px 20px rgba(33, 191, 115, 0.4);
-}
-.gradio-container .primary:active {
-    transform: translateY(-1px);
-}
-.gradio-container .secondary {
-    background-color: #EDF2F7;
-    color: var(--primary-color);
-    border-radius: calc(var(--border-radius) - 4px);
-    transition: all 0.3s ease;
-    font-weight: 600;
-    padding: 12px 24px !important;
-    border: 1px solid #D9E2EC !important;
-    font-size: 1.1rem;
-}
-.gradio-container .secondary:hover {
-    background-color: #E2E8F0;
-    transform: translateY(-2px);
-    box-shadow: 0 4px 10px rgba(31, 66, 135, 0.1);
-}
-/* Example Buttons Styling */
-.gradio-container button:not(.primary):not(.secondary) {
-    background-color: white;
-    color: var(--secondary-color);
-    border: 1px solid var(--secondary-color);
-    border-radius: 30px;
-    padding: 8px 16px;
-    margin: 5px;
-    transition: all 0.3s ease;
-    font-size: 0.95rem;
-}
-.gradio-container button:not(.primary):not(.secondary):hover {
-    background-color: var(--secondary-color);
-    color: white;
-    transform: scale(1.05);
-    box-shadow: 0 4px 12px rgba(39, 142, 165, 0.25);
-}
-/* Accordion Styling */
-.gradio-container .accordion {
-    border: 1px solid #E2E8F0;
-    border-radius: var(--border-radius);
-    overflow: hidden;
-    margin: 20px 0;
-}
-.gradio-container .accordion-title {
-    background-color: #EDF2F7;
-    color: var(--primary-color);
-    padding: 12px 20px;
-    font-weight: bold;
-    cursor: pointer;
-    border-radius: calc(var(--border-radius) - 4px);
-    transition: all 0.3s ease;
-    display: flex;
-    align-items: center;
-    justify-content: space-between;
-}
-.gradio-container .accordion-title:hover {
-    background-color: #E2E8F0;
-}
-.gradio-container .accordion-title::after {
-    content: '▼';
-    font-size: 12px;
-    margin-left: 10px;
-    transition: transform 0.3s ease;
-}
-.gradio-container .accordion-title.open::after {
-    transform: rotate(180deg);
-}
-.gradio-container .accordion-content {
-    padding: 15px 20px;
-    background-color: white;
-}
-/* Sliders */
-.gradio-container input[type="range"] {
-    -webkit-appearance: none;
-    width: 100%;
-    height: 8px;
-    border-radius: 5px;
-    background: #E2E8F0;
-    outline: none;
-    margin: 15px 0;
-}
-.gradio-container input[type="range"]::-webkit-slider-thumb {
-    -webkit-appearance: none;
-    appearance: none;
-    width: 20px;
-    height: 20px;
-    border-radius: 50%;
-    background: var(--accent-color);
-    cursor: pointer;
-    box-shadow: 0 2px 8px rgba(33, 191, 115, 0.4);
-}
-.gradio-container input[type="range"]::-moz-range-thumb {
-    width: 20px;
-    height: 20px;
-    border-radius: 50%;
-    background: var(--accent-color);
-    cursor: pointer;
-    box-shadow: 0 2px 8px rgba(33, 191, 115, 0.4);
-}
-/* Checkboxes */
-.gradio-container input[type="checkbox"] {
-    -webkit-appearance: none;
-    appearance: none;
-    width: 20px;
-    height: 20px;
-    border: 2px solid var(--secondary-color);
-    border-radius: 5px;
-    outline: none;
-    cursor: pointer;
-    margin-right: 10px;
-    vertical-align: middle;
-    position: relative;
-}
-.gradio-container input[type="checkbox"]:checked {
-    background-color: var(--accent-color);
-    border-color: var(--accent-color);
-}
-.gradio-container input[type="checkbox"]:checked::after {
-    content: '✓';
-    color: white;
-    position: absolute;
-    top: 50%;
-    left: 50%;
-    transform: translate(-50%, -50%);
-    font-size: 14px;
-    font-weight: bold;
-}
-/* Status and Processing Indicators */
-.gradio-container .status-message {
-    color: var(--highlight-color);
-    font-weight: bold;
-    text-align: center;
-    margin: 15px 0;
-    padding: 10px;
-    border-radius: calc(var(--border-radius) - 8px);
-    background-color: rgba(255, 107, 107, 0.1);
-    border-left: 3px solid var(--highlight-color);
-}
-/* Loading Animation */
-@keyframes pulse {
-    0% { opacity: 0.6; }
-    50% { opacity: 1; }
-    100% { opacity: 0.6; }
-}
-.gradio-container .loading {
-    animation: pulse 1.5s infinite;
-    display: inline-block;
-    padding-left: 8px;
-}
-/* Responsive Design */
-@media (max-width: 768px) {
-    .gradio-container {
-        padding: 10px;
-    }
-    .gradio-container .block {
-        padding: 20px;
-    }
-    .gradio-container h1 {
-        font-size: 2rem;
-    }
-    .gradio-container h2 {
-        font-size: 1.25rem;
-    }
-    .gradio-container .primary,
-    .gradio-container .secondary {
-        padding: 10px 18px !important;
-        font-size: 1rem;
-    }
-}
-@media (max-width: 480px) {
-    .gradio-container h1 {
-        font-size: 1.75rem;
-    }
-    .gradio-container h2 {
-        font-size: 1.1rem;
-    }
-    .gradio-container .block {
-        padding: 15px;
-    }
-}
-/* RTL Support - Important for Arabic */
-[dir="rtl"] .gradio-container,
-.rtl {
-    text-align: right;
-}
-[dir="rtl"] .gradio-container .accordion-title::after,
-.rtl .gradio-container .accordion-title::after {
-    margin-left: 0;
-    margin-right: 10px;
-}
-/* Dark Mode Support (Optional) */
-@media (prefers-color-scheme: dark) {
-    :root {
-        --primary-color: #4D96FF;
-        --secondary-color: #38B6FF;
-        --background-color: #1A1A2E;
-        --text-color: #E6E6E6;
-        --accent-color: #38E54D;
-        --highlight-color: #FF6B6B;
-        --input-bg: #242442;
-        --header-gradient: linear-gradient(135deg, #4D96FF 0%, #38B6FF 100%);
-        --card-shadow: 0 10px 30px rgba(0, 0, 0, 0.3);
-    }
-    .gradio-container {
-        background: var(--background-color);
-    }
-    .gradio-container .block,
-    .gradio-container .input-container,
-    .gradio-container .output-container {
-        background-color: #242442;
-    }
-    .gradio-container .secondary {
-        background-color: #333355;
-        border-color: #444466 !important;
-    }
-    .gradio-container .secondary:hover {
-        background-color: #3D3D60;
-    }
-    .gradio-container textarea,
-    .gradio-container input[type="text"] {
-        background-color: #333355;
-        color: var(--text-color);
-    }
-    .gradio-container textarea::placeholder,
-    .gradio-container input[type="text"]::placeholder {
-        color: #8D8DAA;
-    }
-    .gradio-container .accordion-title {
-        background-color: #333355;
-    }
-    .gradio-container .accordion-title:hover {
-        background-color: #3D3D60;
-    }
-    .gradio-container input[type="range"] {
-        background: #333355;
-    }
-}
-"""
-# Updated Gradio interface with enhanced design
-with gr.Blocks(title="Cohere Arabic Model Demo", css=custom_css) as demo:
-    # Main title and description
-    gr.Markdown("""
-    # 🌟 نموذج Cohere للغة العربية
-    ## Command R7B Arabic Language Model
-    نموذج ذكاء اصطناعي متقدم للتوليد النصي باللغة العربية
-    """)
-    # Main interface container
-    with gr.Row():
-        # Input Column
-        with gr.Column(scale=1):
-            # Prompt Input
-            prompt = gr.Textbox(
-                label="النص الإدخال | Input Prompt",
-                placeholder="أدخل نصك باللغة العربية هنا...",
-                lines=5
-            )
-            # Example Prompts Section
-            gr.Markdown("### أمثلة سريعة | Quick Examples")
-            with gr.Row():
-                example_prompts = [
-                    "مرحبا، كيف حالك؟",
-                    "اكتب قصة قصيرة عن قطة",
-                    "اشرح مفهوم الذكاء الاصطناعي",
-                    "قانون الجاذبية للأطفال",
-                ]
-                for example in example_prompts:
-                    example_btn = gr.Button(example)
-                    example_btn.click(fn=lambda x=example: x, inputs=[], outputs=[prompt])
-            # Advanced Settings Accordion
-            with gr.Accordion("الإعدادات المتقدمة | Advanced Settings", open=False):
-                max_tokens = gr.Slider(
-                    minimum=10, maximum=500, value=100,
-                    step=10, label="الحد الأقصى للرموز | Max Tokens"
-                )
-                temperature = gr.Slider(
-                    minimum=0.1, maximum=1.0, value=0.7,
-                    step=0.1, label="درجة الحرارة | Temperature"
-                )
-                force_arabic = gr.Checkbox(
-                    label="تشجيع الاستجابات بالعربية | Encourage Arabic Responses",
-                    value=True
-                )
-            # Generate and Clear Buttons
-            with gr.Row():
-                generate_btn = gr.Button("توليد النص | Generate", variant="primary")
-                clear_btn = gr.Button("مسح | Clear", variant="secondary")
-        # Output Column
-        with gr.Column(scale=1):
-            output = gr.Textbox(
-                label="النص المولد | Generated Text",
-                lines=10,
-                interactive=False
-            )
-            # Status Markdown for additional information
-            status = gr.Markdown("جاهز للتوليد | Ready to generate")
-    # Event Handlers
-    def on_generate(prompt, max_tokens, temperature, force_arabic):
-        # Update status to indicate generation is in progress
-        status_update = "جارٍ التوليد... قد يستغرق حتى دقيقتين | Generating... This may take up to 2 minutes."
-        # Call the generation function
-        result = generate_text(prompt, max_tokens, temperature, force_arabic)
-        return result, "اكتمل التوليد | Generation complete!"
-    # Connect buttons to their functions
-    generate_btn.click(
-        fn=on_generate,
-        inputs=[prompt, max_tokens, temperature, force_arabic],
-        outputs=[output, status]
-    )
-    # Clear button functionality
-    clear_btn.click(
-        fn=lambda: ("", "تم المسح | Cleared"),
-        inputs=[],
-        outputs=[prompt, output, status]
-    )
-# Launch the Gradio app
-demo.launch(
-    share=True,  # Enable sharing if needed
-    debug=True   # Enable debug mode
-)

 import os
 import re
 import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+# Global variables for model and tokenizer to prevent reloading
+global_model = None
+global_tokenizer = None
 # Model ID
 model_id = "CohereForAI/c4ai-command-r7b-arabic-02-2025"
+def load_models():
+    """Load the model and tokenizer once and cache them"""
+    global global_model, global_tokenizer
+    # If already loaded, return the cached instances
+    if global_model is not None and global_tokenizer is not None:
+        return global_tokenizer, global_model
+    # Get token from environment
+    hf_token = os.environ.get("HF_TOKEN")
+    if not hf_token:
+        raise ValueError("No HF_TOKEN found. Please set the HF_TOKEN environment variable.")
+    # Load tokenizer
+    print("Loading tokenizer...")
+    tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
+    # Load model with appropriate configuration
+    print("Loading model...")
+    model = AutoModelForCausalLM.from_pretrained(
+        model_id,
+        token=hf_token,
+        torch_dtype=torch.float16,
+        device_map="auto",
+        # Set a reasonable maximum memory usage
+        max_memory={0: "14GB"},
+        # Ensure we use the model's full capabilities
+        use_cache=True,
+        # Use settings for stability
+        _attn_implementation="eager"
+    )
+    # Cache the loaded model and tokenizer
+    global_model = model
+    global_tokenizer = tokenizer
+    return tokenizer, model
+def format_prompt(prompt):
+    """Format the prompt for optimal response from the model"""
+    # Command models often perform better with clear instruction formatting
+    formatted_prompt = f"الإجابة على الأسئلة بدقة ومباشرة ودون التطرق للمواضيع الأخرى غير المتعلقة بالسؤال.\n\nالسؤال: {prompt}\n\nالإجابة:"
+    return formatted_prompt
+def generate_text(prompt, max_new_tokens=500):
+    """Generate text with the Arabic model using optimal parameters"""
+    try:
+        # Get or load the model and tokenizer
+        tokenizer, model = load_models()
+        # Format the prompt
+        formatted_prompt = format_prompt(prompt)
+        print(f"Formatted prompt: {formatted_prompt[:100]}...")
+        # Tokenize with proper padding and attention mask
+        inputs = tokenizer(
+            formatted_prompt,
+            return_tensors="pt",
+            padding=True,
+            truncation=False  # Allow full context window
+        ).to(model.device)
+        # Generate with parameters optimized for the model
         with torch.inference_mode():
+            outputs = model.generate(
+                input_ids=inputs.input_ids,
+                attention_mask=inputs.attention_mask,
+                # Use parameters aligned with model capabilities
+                max_new_tokens=max_new_tokens,
+                temperature=0.3,  # Lower for more deterministic responses
+                top_p=0.9,
+                repetition_penalty=1.2,  # Penalize repetition
+                no_repeat_ngram_size=3,  # Avoid repeating phrases
+                do_sample=True,  # Enable sampling but with controlled randomness
+                num_return_sequences=1,
                 pad_token_id=tokenizer.eos_token_id
             )
+        # Only get the newly generated content after the prompt
+        prompt_length = inputs.input_ids.shape[1]
+        generated_ids = outputs[0][prompt_length:]
+        # Decode the token IDs to text
+        generated_text = tokenizer.decode(generated_ids, skip_special_tokens=True)
+        # Clean up the generated text
+        final_text = clean_response(generated_text)
+        return final_text
     except Exception as e:
         import traceback
         print(f"Error generating text: {str(e)}\n{tb}")
         return f"Error generating text: {str(e)}"
+def clean_response(text):
+    """Clean and format the response"""
+    # Remove any special tokens or artifacts
+    text = re.sub(r'<.*?>', '', text)
+    # Remove any extra whitespace
+    text = re.sub(r'\s+', ' ', text).strip()
+    # Remove any artificial repetitions
+    # This regex looks for repeated phrases (4+ words)
+    text = re.sub(r'(\b\w+\b\s+\b\w+\b\s+\b\w+\b\s+\b\w+\b\s+)(\1)+', r'\1', text)
+    return text
+# Example usage
+if __name__ == "__main__":
+    # Test with the poem question
+    question = 'من كتب قصيدة "على قدر أهل العزم تأتي العزائم"؟'
+    response = generate_text(question)
+    print("\nQuestion:", question)
+    print("\nResponse:", response)