import gradio as gr
import os
import tempfile
from pathlib import Path
import requests
import base64
import re
from typing import Tuple
import markdown
from dotenv import load_dotenv
from openai import OpenAI

# Load environment variables from .env file
load_dotenv()

# API Configuration
API_URL = os.getenv("API_URL", "")
API_TOKEN = os.getenv("API_TOKEN", "")


class Doc2PageConverter:
    def __init__(self):
        self.qianfan_token = os.getenv('QIANFAN_TOKEN')
        self.qianfan_model = "ernie-x1.1-preview"
        self.client = None
        
        if self.qianfan_token:
            self.client = OpenAI(
                base_url="https://qianfan.baidubce.com/v2",
                api_key=self.qianfan_token
            )
    

    def extract_text_with_api(self, file_path: str) -> str:
        """Extract text and structure using PP-StructureV3 API"""
        try:
            if not API_URL or not API_TOKEN:
                raise ValueError(
                    "API_URL and API_TOKEN must be configured in .env file")
            
            # Determine file type
            file_extension = Path(file_path).suffix.lower()
            if file_extension == ".pdf":
                file_type = 0  # PDF
            else:
                file_type = 1  # Image
            
            # Read file content
            with open(file_path, "rb") as f:
                file_bytes = f.read()
            
            # Encode file to base64
            file_data = base64.b64encode(file_bytes).decode("ascii")
            
            # Prepare API request
            headers = {
                "Authorization": f"token {API_TOKEN}",
                "Content-Type": "application/json",
            }
            
            # Use default settings for simplicity
            payload = {
                "file": file_data,
                "fileType": file_type,
                "useFormulaRecognition": True,
                "useChartRecognition": False,
                "useDocOrientationClassify": False,
                "useDocUnwarping": False,
                "useTextlineOrientation": False,
                "useSealRecognition": True,
                "useRegionDetection": True,
                "useTableRecognition": True,
                "layoutThreshold": 0.5,
                "layoutNms": True,
                "layoutUnclipRatio": 1.0,
                "textDetLimitType": "min",
                "textTetLimitSideLen": 736,
                "textDetThresh": 0.30,
                "textDetBoxThresh": 0.60,
                "textDetUnclipRatio": 1.5,
                "textRecScoreThresh": 0.00,
                "sealDetLimitType": "min",
                "sealDetLimitSideLen": 736,
                "sealDetThresh": 0.20,
                "sealDetBoxThresh": 0.60,
                "sealDetUnclipRatio": 0.5,
                "sealRecScoreThresh": 0.00,
                "useOcrResultsWithTableCells": True,
                "useE2eWiredTableRecModel": False,
                "useE2eWirelessTableRecModel": False,
                "useWiredTableCellsTransToHtml": False,
                "useWirelessWableCellsTransToHtml": False,
                "useTableOrientationClassify": True,
            }
            
            # Call API
            response = requests.post(
                API_URL,
                json=payload,
                headers=headers,
                timeout=300,  # 5 minutes timeout
            )
            
            response.raise_for_status()
            result = response.json()
            
            # Process API response
            layout_results = result.get("result", {}).get(
                "layoutParsingResults", [])
            
            markdown_content_list = []
            markdown_list = []
            
            for res in layout_results:
                markdown_data = res["markdown"]
                markdown_text = markdown_data["text"]
                img_path_to_url = markdown_data["images"]
                
                # Embed images into markdown
                markdown_content = self.embed_images_into_markdown_text(
                    markdown_text, img_path_to_url
                )
                markdown_content_list.append(markdown_content)
                
                # Prepare for concatenation
                markdown_with_content = markdown_data.copy()
                markdown_with_content["text"] = markdown_content
                markdown_list.append(markdown_with_content)
            
            # Concatenate all pages
            concatenated_markdown = self.concatenate_markdown_pages(markdown_list)
            
            return concatenated_markdown
            
        except requests.exceptions.RequestException as e:
            raise RuntimeError(f"API request failed: {str(e)}")
        except Exception as e:
            print(f"Error in API extraction: {e}")
            return ""
    
    def embed_images_into_markdown_text(self, markdown_text, markdown_images):
        """Embed images into markdown text"""
        for img_path, img_url in markdown_images.items():
            markdown_text = markdown_text.replace(
                f'<img src="{img_path}"', f'<img src="{img_url}"'
            )
        return markdown_text

    def concatenate_markdown_pages(self, markdown_list):
        """Concatenate markdown pages into single document"""
        markdown_texts = ""
        previous_page_last_element_paragraph_end_flag = True

        for res in markdown_list:
            page_first_element_paragraph_start_flag: bool = res["isStart"]
            page_last_element_paragraph_end_flag: bool = res["isEnd"]

            if (
                not page_first_element_paragraph_start_flag
                and not previous_page_last_element_paragraph_end_flag
            ):
                last_char_of_markdown = (markdown_texts[-1] 
                                       if markdown_texts else "")
                first_char_of_handler = res["text"]

                last_is_chinese_char = (
                    re.match(r"[\u4e00-\u9fff]", last_char_of_markdown)
                    if last_char_of_markdown
                    else False
                )
                first_is_chinese_char = (
                    re.match(r"[\u4e00-\u9fff]", first_char_of_handler)
                    if first_char_of_handler
                    else False
                )
                if not (last_is_chinese_char or first_is_chinese_char):
                    markdown_texts += " " + res["text"]
                else:
                    markdown_texts += res["text"]
            else:
                markdown_texts += "\n\n" + res["text"]
            previous_page_last_element_paragraph_end_flag = (
                page_last_element_paragraph_end_flag
            )

        return markdown_texts
    
    def markdown_to_html_with_ernie(self, markdown_text: str) -> str:
        """Convert markdown to HTML using ERNIE API"""
        if not self.client:
            # Fallback to basic markdown conversion if no API client
            return self.basic_markdown_to_html(markdown_text)
        
        try:
            prompt = f"""Please convert the following markdown text into a modern, clean HTML page. Use contemporary typography with the Inter font family and clean design principles. Make it visually appealing with proper CSS styling, responsive design, and excellent readability.

Design requirements:
- Use Inter font from Google Fonts
- Clean, modern spacing and typography  
- Subtle shadows and rounded corners
- Good color contrast and hierarchy
- Responsive design that works on all devices
- Include proper HTML structure with head, body, and semantic elements

Important: Add a footer at the bottom with "Powered by PaddleOCR and ERNIE" where PaddleOCR links to https://github.com/PaddlePaddle/PaddleOCR and ERNIE links to https://huggingface.co/BAIDU. Style it with modern, subtle styling.

Markdown content:
{markdown_text}

IMPORTANT: Return ONLY the raw HTML code starting with <!DOCTYPE html> and ending with </html>. Do NOT wrap it in markdown code blocks or add any explanations. I need the pure HTML content that can be directly saved as an .html file."""

            messages = [{"role": "user", "content": prompt}]
            
            response = self.client.chat.completions.create(
                model=self.qianfan_model,
                messages=messages,
                max_tokens=64000,
            )
            
            html_content = response.choices[0].message.content
            
            # Clean up markdown code block markers if present
            if html_content.startswith('```html'):
                html_content = html_content[7:]  # Remove ```html
            elif html_content.startswith('```'):
                html_content = html_content[3:]   # Remove ```
                
            if html_content.endswith('```'):
                html_content = html_content[:-3]  # Remove ending ```
                
            # Strip any extra whitespace
            html_content = html_content.strip()
            
            return html_content
                
        except Exception as e:
            print(f"Error calling ERNIE API: {e}")
            return self.basic_markdown_to_html(markdown_text)
    
    def basic_markdown_to_html(self, markdown_text: str) -> str:
        """Fallback markdown to HTML conversion"""
        html = markdown.markdown(markdown_text)
        
        # Wrap in a complete HTML document with styling
        complete_html = f"""
        <!DOCTYPE html>
        <html lang="en">
        <head>
            <meta charset="UTF-8">
            <meta name="viewport" content="width=device-width, initial-scale=1.0">
            <title>Converted Document</title>
            <style>
                /* Modern, clean typography */
                @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
                
                * {{
                    margin: 0;
                    padding: 0;
                    box-sizing: border-box;
                }}
                
                body {{
                    font-family: 'Inter', system-ui, -apple-system, sans-serif;
                    font-weight: 400;
                    line-height: 1.7;
                    color: #1a1a1a;
                    max-width: 850px;
                    margin: 0 auto;
                    padding: 32px 24px;
                    background: #fafafa;
                    font-size: 16px;
                }}
                
                .container {{
                    background: #ffffff;
                    padding: 48px;
                    border-radius: 12px;
                    box-shadow: 0 1px 3px rgba(0,0,0,0.08), 0 4px 24px rgba(0,0,0,0.04);
                    border: 1px solid rgba(0,0,0,0.06);
                }}
                
                /* Typography hierarchy */
                h1, h2, h3, h4, h5, h6 {{
                    font-weight: 600;
                    color: #0f0f0f;
                    margin: 32px 0 16px 0;
                    letter-spacing: -0.02em;
                }}
                
                h1 {{
                    font-size: 2.25rem;
                    font-weight: 700;
                    margin-top: 0;
                    margin-bottom: 24px;
                    border-bottom: 2px solid #e5e7eb;
                    padding-bottom: 16px;
                }}
                
                h2 {{
                    font-size: 1.75rem;
                    margin-top: 48px;
                }}
                
                h3 {{
                    font-size: 1.375rem;
                    margin-top: 40px;
                }}
                
                h4 {{
                    font-size: 1.125rem;
                }}
                
                p {{
                    margin-bottom: 20px;
                    color: #374151;
                    line-height: 1.75;
                }}
                
                /* Code styling */
                code {{
                    font-family: 'SF Mono', Consolas, 'Liberation Mono', monospace;
                    background-color: #f3f4f6;
                    color: #1f2937;
                    padding: 3px 6px;
                    border-radius: 4px;
                    font-size: 0.875rem;
                    font-weight: 500;
                }}
                
                pre {{
                    background-color: #f8fafc;
                    border: 1px solid #e5e7eb;
                    padding: 20px;
                    border-radius: 8px;
                    overflow-x: auto;
                    margin: 24px 0;
                    font-size: 0.875rem;
                    line-height: 1.6;
                }}
                
                pre code {{
                    background: none;
                    padding: 0;
                    border-radius: 0;
                }}
                
                /* Blockquotes */
                blockquote {{
                    border-left: 4px solid #6366f1;
                    padding-left: 20px;
                    margin: 24px 0;
                    font-style: normal;
                    color: #4b5563;
                    background-color: #f8fafc;
                    padding: 16px 20px;
                    border-radius: 0 8px 8px 0;
                }}
                
                /* Images */
                img {{
                    max-width: 100%;
                    height: auto;
                    border-radius: 8px;
                    margin: 20px 0;
                    box-shadow: 0 4px 12px rgba(0,0,0,0.1);
                }}
                
                /* Tables */
                table {{
                    border-collapse: collapse;
                    width: 100%;
                    margin: 24px 0;
                    background: #ffffff;
                    border-radius: 8px;
                    overflow: hidden;
                    box-shadow: 0 1px 3px rgba(0,0,0,0.1);
                }}
                
                th, td {{
                    padding: 16px;
                    text-align: left;
                    border-bottom: 1px solid #e5e7eb;
                }}
                
                th {{
                    background-color: #f9fafb;
                    font-weight: 600;
                    color: #374151;
                    font-size: 0.875rem;
                    text-transform: uppercase;
                    letter-spacing: 0.05em;
                }}
                
                tr:last-child td {{
                    border-bottom: none;
                }}
                
                /* Lists */
                ul, ol {{
                    margin: 16px 0 20px 24px;
                    color: #374151;
                }}
                
                li {{
                    margin-bottom: 8px;
                    line-height: 1.6;
                }}
                
                /* Links */
                a {{
                    color: #6366f1;
                    text-decoration: none;
                    font-weight: 500;
                }}
                
                a:hover {{
                    color: #4f46e5;
                    text-decoration: underline;
                }}
                /* Footer */
                .footer {{
                    margin-top: 64px;
                    padding-top: 24px;
                    border-top: 1px solid #e5e7eb;
                    text-align: center;
                    font-size: 14px;
                    color: #6b7280;
                    font-weight: 400;
                }}
                
                .footer a {{
                    color: #6366f1;
                    font-weight: 500;
                    text-decoration: none;
                }}
                
                .footer a:hover {{
                    color: #4f46e5;
                    text-decoration: underline;
                }}
            </style>
        </head>
        <body>
            <div class="container">
                {html}
                <div class="footer">
                    Powered by <a href="https://github.com/PaddlePaddle/PaddleOCR" target="_blank">PaddleOCR</a> and
                    <a href="https://huggingface.co/BAIDU" target="_blank">ERNIE</a>
                </div>
            </div>
        </body>
        </html>
        """
        return complete_html
    
    def process_document(self, file_path: str) -> Tuple[str, str]:
        """Process uploaded document and convert to HTML"""
        try:
            file_extension = Path(file_path).suffix.lower()
            
            # Check supported formats
            if file_extension == '.pdf' or file_extension in [
                '.png', '.jpg', '.jpeg', '.bmp', '.tiff']:
                # Process with PP-StructureV3 API
                markdown_content = self.extract_text_with_api(file_path)
            else:
                return ("Error: Unsupported file format. "
                       "Please upload PDF or image files."), ""
            
            if not markdown_content.strip():
                return ("Warning: No text content extracted "
                       "from the document."), ""
            
            # Convert markdown to HTML using ERNIE or fallback
            html_content = self.markdown_to_html_with_ernie(markdown_content)
            
            return markdown_content, html_content
            
        except Exception as e:
            return f"Error processing document: {str(e)}", ""

# Initialize converter
converter = Doc2PageConverter()

def process_upload(file):
    """Process uploaded file and return markdown and HTML"""
    if file is None:
        return "Please upload a file.", "", ""
    
    try:
        # Process the document
        markdown_result, html_result = converter.process_document(file.name)
        
        if html_result:
            return "Document processed successfully!", markdown_result, html_result
        else:
            return markdown_result, "", ""  # Error message in markdown_result
            
    except Exception as e:
        return f"Error: {str(e)}", "", ""

def save_html_file(html_content, filename="converted_page"):
    """Save HTML content to file for download"""
    if not html_content:
        return None
    
    # Create temporary file
    temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False, 
                                          prefix=f"{filename}_")
    temp_file.write(html_content)
    temp_file.close()
    
    return temp_file.name

# Create custom theme for a clean, modern look
custom_theme = gr.themes.Default(
    primary_hue="blue",
    secondary_hue="gray",
    neutral_hue="gray",
    font=("Inter", "system-ui", "sans-serif"),
    font_mono=("SF Mono", "Consolas", "monospace")
).set(
    body_background_fill="#fafafa",
    background_fill_primary="#ffffff",
    background_fill_secondary="#f8f9fa",
    border_color_primary="#e5e7eb",
    button_primary_background_fill="#6366f1",
    button_primary_background_fill_hover="#4f46e5",
    button_primary_text_color="#ffffff",
)

# Create Gradio interface
with gr.Blocks(
    title="Doc2Page - Simple Document Converter", 
    theme=custom_theme,
    css="""
    .gradio-container {
        max-width: 1200px !important;
        margin: auto;
        padding: 32px 16px;
    }
    
    /* Enhanced button styling */
    .gr-button {
        font-weight: 500;
        border-radius: 10px;
        font-size: 14px;
        transition: all 0.2s ease;
        box-shadow: 0 2px 4px rgba(99, 102, 241, 0.1);
    }
    
    .gr-button:hover {
        transform: translateY(-1px);
        box-shadow: 0 4px 8px rgba(99, 102, 241, 0.2);
    }
    
    /* Input styling */
    .gr-textbox, .gr-file {
        border-radius: 10px;
        font-family: 'Inter', system-ui, sans-serif;
        border: 1px solid #e5e7eb;
        transition: border-color 0.2s ease;
    }
    
    .gr-textbox:focus, .gr-file:focus {
        border-color: #6366f1;
        box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.1);
    }
    
    /* Typography */
    h1 {
        font-weight: 700;
        color: #1a1a1a;
        margin-bottom: 8px;
        font-size: 2.5rem;
    }
    
    .app-description {
        color: #6b7280;
        font-size: 18px;
        margin-bottom: 40px;
        font-weight: 400;
    }
    
    /* Tab styling */
    .gr-tab {
        border-radius: 8px 8px 0 0;
        font-weight: 500;
    }
    
    /* Card-like sections */
    .gr-column {
        background: rgba(255, 255, 255, 0.5);
        border-radius: 12px;
        padding: 16px;
        margin: 8px;
    }
    
    /* Status styling */
    .gr-textbox[data-testid*="status"] {
        background-color: #f8fafc;
        border: 1px solid #e2e8f0;
    }
    
    /* Download section styling */
    .download-section {
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        border-radius: 12px;
        padding: 20px;
        color: white;
        margin-top: 20px;
    }
    """
) as app:
    
    # Header
    gr.Markdown(
        "# Doc2Page",
        elem_classes="main-title"
    )
    gr.Markdown(
        "🥃 Transform your documents into beautiful webpages!",
        elem_classes="app-description"
    )
    
    # Main interface
    with gr.Row():
        with gr.Column(scale=1, min_width=350):
            with gr.Group():
                gr.Markdown("### 📄 Upload Document")
                file_input = gr.File(
                    label="Choose your file",
                    file_types=[".pdf", ".png", ".jpg", ".jpeg", ".bmp", ".tiff"],
                    file_count="single",
                    height=140
                )
                
                process_btn = gr.Button(
                    "✨ Convert to Webpage", 
                    variant="primary", 
                    size="lg",
                    scale=1
                )
                
                status_output = gr.Textbox(
                    label="Status",
                    placeholder="Ready to convert your document...",
                    interactive=False,
                    lines=3,
                    max_lines=3
                )
        
        with gr.Column(scale=2):
            gr.Markdown("### 📋 Results")
            with gr.Tabs():
                with gr.TabItem("❤️ Preview", id="preview"):
                    html_preview = gr.HTML(
                        label="",
                        value="<div style='padding: 40px; text-align: center; color: #6b7280;'>Your converted webpage will appear here</div>",
                    )
                
                with gr.TabItem("📝 Markdown Source", id="markdown"):
                    markdown_output = gr.Textbox(
                        label="",
                        placeholder="Extracted markdown content will appear here...",
                        lines=22,
                        interactive=False,
                        show_copy_button=True
                    )
                
                with gr.TabItem("🌐 HTML Source", id="html"):
                    html_output = gr.Code(
                        label="",
                        language="html",
                        lines=22,
                        interactive=False
                    )
    
    # Success & Download section
    with gr.Row(visible=False) as download_section:
        with gr.Column():
            gr.Markdown("""
            <div style="background: linear-gradient(135deg, #10b981, #059669); border-radius: 12px; padding: 20px; color: white; text-align: center; margin: 20px 0;">
                <h3 style="margin: 0 0 8px 0; color: white;">✅ Conversion Successful!</h3>
                <p style="margin: 0; opacity: 0.9;">Your document has been converted to a beautiful webpage</p>
            </div>
            """)
            
            with gr.Row():
                with gr.Column(scale=1):
                    gr.Markdown("### 📥 Download Your Webpage")
                    download_btn = gr.File(
                        label="HTML File",
                        visible=True
                    )
                
                with gr.Column(scale=1):
                    gr.Markdown("### 🚀 Quick Deploy Guide")
                    gr.Markdown("""
                    1. **GitHub Pages**: Upload as `index.html` to your repo
                    2. **Netlify**: Drag & drop the file to netlify.app
                    3. **Vercel**: Use their simple file deployment
                    4. **Local**: Double-click to open in browser
                    """, elem_classes="deploy-guide")
    
    # Event handlers
    def process_and_update(file):
        status, markdown_content, html_content = process_upload(file)
        
        # Create download file if HTML was generated
        download_file = None
        show_download = False
        
        if html_content:
            filename = Path(file.name).stem if file else "converted_page"
            download_file = save_html_file(html_content, filename)
            show_download = True
        
        # Preview content with better styling when no content
        preview_content = html_content if html_content else """
        <div style='padding: 60px 20px; text-align: center; color: #6b7280; 
                    background: #f9fafb; border-radius: 8px; border: 2px dashed #d1d5db;'>
            <h3 style='color: #9ca3af; margin: 0;'>No preview available</h3>
            <p style='margin: 8px 0 0 0;'>Convert a document to see the preview</p>
        </div>
        """
        
        return (
            status,  # status_output
            markdown_content,  # markdown_output
            html_content,  # html_output  
            preview_content,  # html_preview
            download_file,  # download_btn
            gr.update(visible=show_download)  # download_section
        )
    
    process_btn.click(
        fn=process_and_update,
        inputs=[file_input],
        outputs=[
            status_output,
            markdown_output,
            html_output,
            html_preview,
            download_btn,
            download_section
        ]
    )
    
    # Footer
    gr.Markdown(
        """
        <div style="text-align: center; padding: 20px 0; margin-top: 40px; border-top: 1px solid #e5e7eb; color: #6b7280; font-size: 14px;">
            Powered by <a href="https://github.com/PaddlePaddle/PaddleOCR" target="_blank" style="color: #6366f1; text-decoration: none;">PaddleOCR</a> 
            for text extraction and <a href="https://huggingface.co/BAIDU" target="_blank" style="color: #6366f1; text-decoration: none;">ERNIE</a> 
            for HTML generation
        </div>
        """,
        elem_id="footer"
    )
    
    # Tips section
    with gr.Accordion("💡 Tips for Best Results", open=False):
        gr.Markdown("""
        **File Types:** PDF, PNG, JPG, JPEG, BMP, TIFF
        
        **For Best OCR Results:**
        - Use high-resolution, clear images
        - Ensure good contrast between text and background  
        - Avoid skewed or rotated documents
        - PDFs generally produce the best results
        
        **🚀 Deploy to GitHub Pages:**
        1. Create a new GitHub repository or use an existing one
        2. Download the generated HTML file from above
        3. Upload it to your repository as `index.html`
        4. Go to repository Settings → Pages
        5. Select "Deploy from a branch" → Choose "main" branch
        6. Your page will be live at `https://yourusername.github.io/yourrepository`
        
        **💡 Pro Tips:**
        - Enable custom domains in GitHub Pages settings
        - Use GitHub Actions for automated deployments
        - Consider using Jekyll themes for enhanced styling
        """)


if __name__ == "__main__":
    app.launch()