from flask import Blueprint, request, jsonify, render_template
import requests
from app import mysql
from app.config import Config
import uuid
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse
import re
from flask_cors import CORS
from google.oauth2 import service_account
from googleapiclient.discovery import build
import json
from dateutil import parser
from dateutil.relativedelta import relativedelta
import logging
import os
from datetime import datetime, timedelta
import pytz
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
import MySQLdb.cursors
from app.routes.auth import require_basic_auth
from app.utils.company_settings import get_company_smtp_settings
from werkzeug.utils import secure_filename
import time
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
import faiss
import numpy as np
from mysql.connector import Error
from mysql.connector.pooling import MySQLConnectionPool
from mysql.connector import pooling
import threading
from contextlib import contextmanager
from app.utils.db_manager import db_manager


api_bp = Blueprint('api', __name__)

# Enable CORS with credentials support
CORS(api_bp, resources={
    r"/api/*": {
        "origins": "*",
        "methods": ["GET", "POST", "OPTIONS"],
        "allow_headers": ["Content-Type", "X-Response-Type", "Authorization"],
        "expose_headers": ["Content-Type", "X-Response-Type"],
        "supports_credentials": True,
        "allow_credentials": True
    }
})

# Add CORS headers to all responses
@api_bp.after_request
def add_cors_headers(response):
    origin = request.headers.get('Origin')
    if origin:
        response.headers['Access-Control-Allow-Origin'] = origin
        
    response.headers['Access-Control-Allow-Credentials'] = 'true'
    response.headers['Access-Control-Allow-Methods'] = 'GET, POST, OPTIONS'
    response.headers['Access-Control-Allow-Headers'] = 'Content-Type, X-Response-Type, Authorization'
    return response

@api_bp.route('/api/<company_id>/chat', methods=['OPTIONS'])
def handle_options(company_id):
    return '', 204

SERVICE_ACCOUNT_FILE = 'config/gpt-ronnie-9ad6512012b3.json'

# Verify the file exists
if not os.path.exists(SERVICE_ACCOUNT_FILE):
    raise FileNotFoundError(f"Service account file not found at {SERVICE_ACCOUNT_FILE}")

# Add these constants at the top of the file
SCOPES = ['https://www.googleapis.com/auth/calendar']

# Set up logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

# Add this configuration
UPLOAD_FOLDER = 'uploads/voice_messages'
ALLOWED_EXTENSIONS = {'mp3'}

# Create the upload folder if it doesn't exist
os.makedirs(UPLOAD_FOLDER, exist_ok=True)

def allowed_file(filename):
    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

def get_website_content(url):
    try:
        if url.startswith('@'):
            url = url[1:]
        
        
        if not url.startswith(('http://', 'https://')):
            url = 'https://' + url
            
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        
        response = requests.get(url, headers=headers, timeout=15)
        soup = BeautifulSoup(response.text, 'html.parser')
        
        links = []
        seen_urls = set()
        base_domain = urlparse(url).netloc
        
        for a in soup.find_all('a', href=True):
            href = a['href']
            full_url = urljoin(url, href)
            parsed_url = urlparse(full_url)
            
            if (parsed_url.netloc == base_domain and 
                full_url not in seen_urls and 
                not href.startswith(('#', 'javascript:', 'mailto:', 'tel:'))):
                
                seen_urls.add(full_url)
                text = a.get_text().strip()
                
                # Clean up the text
                text = re.sub(r'\s+', ' ', text).strip()
                
                if text:
                    link_data = {
                        'url': full_url,
                        'text': text[:255]
                    }
                    links.append(link_data)
                    print(f"Found link: {link_data['text']} -> {link_data['url']}")
        
        print(f"Total links found: {len(links)}")
        return links
        
    except Exception as e:
        print(f"Error crawling website: {str(e)}")
        return []

def find_relevant_urls(website_url, query, company_id):
    try:
        cursor = mysql.connection.cursor()
        print(f"Searching URLs for company {company_id} with query: {query}")  # Debug log
        
        # First check if we have cached URLs
        cursor.execute("""
            SELECT url, page_title 
            FROM website_pages 
            WHERE company_id = %s
        """, (company_id,))
        
        cached_pages = cursor.fetchall()
        print(f"Found {len(cached_pages) if cached_pages else 0} cached pages")  
        
        if not cached_pages:
            print(f"No cached pages found, crawling website: {website_url}")  
            links = get_website_content(website_url)
            print(f"Crawled {len(links)} links from website")  
            
            # Store the links in database
            for link in links:
                try:
                    cursor.execute("""
                        INSERT INTO website_pages (id, company_id, url, page_title, page_content)
                        VALUES (%s, %s, %s, %s, %s)
                    """, (str(uuid.uuid4()), company_id, link['url'], link['text'], ''))
                except Exception as e:
                    print(f"Error storing URL {link['url']}: {str(e)}")
            
            mysql.connection.commit()
            
            # Refresh cached pages after storing
            cursor.execute("""
                SELECT url, page_title 
                FROM website_pages 
                WHERE company_id = %s
            """, (company_id,))
            cached_pages = cursor.fetchall()
        
        # Search for relevant URLs based on query
        search_terms = query.lower().split()
        
        # Special handling for contact page queries
        if 'contact' in query.lower():
            cursor.execute("""
                SELECT url, page_title 
                FROM website_pages 
                WHERE company_id = %s 
                AND (
                    LOWER(page_title) LIKE '%contact%' 
                    OR LOWER(url) LIKE '%contact%'
                    OR LOWER(page_title) LIKE '%reach%'
                    OR LOWER(url) LIKE '%reach-us%'
                )
            """, (company_id,))
        else:
            cursor.execute("""
                SELECT url, page_title 
                FROM website_pages 
                WHERE company_id = %s 
                AND (LOWER(page_title) LIKE %s OR LOWER(url) LIKE %s)
            """, (company_id, f"%{search_terms[0]}%", f"%{search_terms[0]}%"))
        
        relevant_urls = cursor.fetchall()
        print(f"Found {len(relevant_urls)} relevant URLs")  # Debug log
        return relevant_urls
        
    except Exception as e:
        print(f"Error in find_relevant_urls: {str(e)}")
        return []

def create_meeting_prompt(company_name):
    return f"""When handling meeting scheduling requests, strictly follow this process:

    1. When user mentions scheduling/booking a meeting:
       Response: "I'll help you schedule a meeting. First, please provide your email address."

    2. After receiving email:
       Response: "Thank you. What date would you like to schedule the meeting for? You can use YYYY-MM-DD format or say something like 'tomorrow' or 'next Monday'."

    3. After receiving date:
       Response: "Got it. What time would you prefer? Please use 24-hour format (HH:MM), for example: 14:30"

    4. After receiving time:
       Show confirmation message:
       "Please review these meeting details:
       - Email: [collected_email]
       - Date: [formatted_date]
       - Time: [formatted_time]
       
       Type 'confirm' to schedule the meeting or 'cancel' to start over."


    Important:
    - Always validate email format
    - Convert natural language dates to YYYY-MM-DD format
    - Ensure time is in HH:MM format
    - Only proceed to scheduling after explicit confirmation
    - Store meeting details in database after confirmation
    """
def parse_natural_date(date_str):
    """Convert natural language date or standard date format to YYYY-MM-DD format and readable format"""
    try:
        # First check if it's already in YYYY-MM-DD format
        if re.match(r'^\d{4}-\d{2}-\d{2}$', date_str):
            parsed_date = datetime.strptime(date_str, '%Y-%m-%d')
            return {
                'iso_date': date_str,
                'readable_date': parsed_date.strftime('%A, %B %d, %Y')
            }
        
        # Get current date in UTC
        now = datetime.now(pytz.UTC)
        date_str = date_str.lower().strip()

        weekdays = {
            'monday': 0,
            'tuesday': 1,
            'wednesday': 2,
            'thursday': 3,
            'friday': 4,
            'saturday': 5,
            'sunday': 6
        }

        if date_str == 'today':
            parsed_date = now
        elif date_str == 'tomorrow':
            parsed_date = now + timedelta(days=1)
        elif date_str.startswith('next ') and date_str[5:] in weekdays:
            target_day = weekdays[date_str[5:]]
            current_day = now.weekday()
            days_ahead = (target_day - current_day + 7) % 7
            days_ahead = days_ahead if days_ahead != 0 else 7
            parsed_date = now + timedelta(days=days_ahead)
        else:
            # Try to parse other formats using dateutil
            parsed_date = parser.parse(date_str, fuzzy=True)
            # If the parsed date is in the past, optionally push it to the next week
            if parsed_date < now and 'next' not in date_str:
                parsed_date += relativedelta(weeks=1)

        # Ensure we have a datetime object
        if isinstance(parsed_date, timedelta):
            parsed_date = now + parsed_date

        # Convert to UTC if not already
        if parsed_date.tzinfo is None:
            parsed_date = pytz.UTC.localize(parsed_date)

        # Format the date
        return {
            'iso_date': parsed_date.strftime('%Y-%m-%d'),
            'readable_date': parsed_date.strftime('%A, %B %d, %Y')
        }

    except Exception as e:
        print(f"Error parsing date '{date_str}': {str(e)}")
        return None

# Initialize the embeddings model
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

def clean_text_content(text):
    """Clean text content to ensure valid UTF-8 - IMPROVED VERSION"""
    if not text:
        return ""
    
    try:
        # Remove or replace invalid characters but preserve more content
        text = text.encode('utf-8', errors='replace').decode('utf-8')
        
        # Remove null bytes and other problematic characters
        text = text.replace('\x00', '')
        text = text.replace('\ufffd', '')  # Remove replacement characters
        
        # Normalize whitespace but preserve structure
        text = re.sub(r'\s+', ' ', text)  # Replace multiple whitespace with single space
        text = text.strip()
        
        # Only return empty if truly empty after cleaning
        if len(text.strip()) < 10:  # Minimum content length
            logger.warning(f"Text too short after cleaning: {len(text)} chars")
            return ""
        
        return text
    except Exception as e:
        logger.warning(f"Error cleaning text content: {str(e)}")
        return ""

def create_or_get_vector_store(company_id, query_executor):
    """
    Create or retrieve the vector store for a company's documents.
    IMPROVED VERSION with better document processing and debugging.
    """
    logger.info(f"Creating/retrieving vector store for company_id: {company_id}")
    try:
        # Check if vector store exists in database
        result = query_executor("""
            SELECT vector_store 
            FROM company_vector_stores 
            WHERE company_id = %s
        """, (company_id,))
        
        if result and result[0] and result[0][0]:
            logger.info(f"Found existing vector store for company_id: {company_id}. Deserializing...")
            try:
                # Decode base64 back to bytes
                import base64
                serialized_bytes = base64.b64decode(result[0][0])
                
                vector_store = FAISS.deserialize_from_bytes(
                    embeddings=embeddings,
                    serialized=serialized_bytes,
                    allow_dangerous_deserialization=True
                )
                logger.info(f"Vector store deserialized successfully for company_id: {company_id}.")
                return vector_store
            except Exception as e:
                logger.warning(f"Failed to deserialize existing vector store: {str(e)}. Creating new one.")
                # Delete corrupted vector store
                query_executor("""
                    DELETE FROM company_vector_stores 
                    WHERE company_id = %s
                """, (company_id,), fetch=False)
            
        logger.info(f"No existing vector store found for company_id: {company_id}. Creating new one.")
        
        # Fetch and process documents with better debugging
        documents = query_executor("""
            SELECT id, file_content, created_at, file_name, file_type
            FROM company_documents 
            WHERE company_id = %s
        """, (company_id,))
        
        if not documents:
            logger.warning(f"No documents found for company_id: {company_id}")
            return None
            
        logger.info(f"Found {len(documents)} documents to process")
        
        texts = []
        metadatas = []
        processed_docs = 0
        skipped_docs = 0
        
        # Process documents with improved cleaning and debugging
        for doc_id, content, created_at, file_name, file_type in documents:
            try:
                logger.info(f"Processing document: {file_name} (Type: {file_type}, Size: {len(content) if content else 0} chars)")
                
                # Check if content exists
                if not content or len(content.strip()) < 10:
                    logger.warning(f"Skipping document {file_name} - no content or too short")
                    skipped_docs += 1
                    continue
                
                # Improved text cleaning
                cleaned_content = clean_text_content(content)
                if not cleaned_content:
                    logger.warning(f"Skipping document {file_name} - no content after cleaning")
                    skipped_docs += 1
                    continue
                
                logger.info(f"Document {file_name} cleaned content length: {len(cleaned_content)} chars")
                
                # Improved text splitter with better parameters
                text_splitter = RecursiveCharacterTextSplitter(
                    chunk_size=1200,  # Increased from 800
                    chunk_overlap=200,  # Increased from 150
                    length_function=len,
                    separators=["\n\n", "\n", ". ", "! ", "? ", " ", ""]
                )
                chunks = text_splitter.split_text(cleaned_content)
                
                logger.info(f"Document {file_name} split into {len(chunks)} chunks")
                
                for i, chunk in enumerate(chunks):
                    # Less aggressive chunk cleaning
                    cleaned_chunk = chunk.strip()
                    if len(cleaned_chunk) > 20:  # Minimum chunk length
                        texts.append(cleaned_chunk)
                        metadatas.append({
                            "doc_id": doc_id,
                            "file_name": file_name,
                            "file_type": file_type,
                            "chunk_index": i,
                            "created_at": created_at.isoformat() if hasattr(created_at, 'isoformat') else str(created_at)
                        })
                        logger.debug(f"Added chunk {i} from {file_name}: {len(cleaned_chunk)} chars")
                    else:
                        logger.debug(f"Skipped chunk {i} from {file_name} - too short: {len(cleaned_chunk)} chars")
                
                processed_docs += 1
                        
            except Exception as e:
                logger.error(f"Error processing document {file_name} (ID: {doc_id}): {str(e)}")
                skipped_docs += 1
                continue
        
        logger.info(f"Document processing complete: {processed_docs} processed, {skipped_docs} skipped")
        logger.info(f"Total chunks created: {len(texts)}")
        
        if not texts:
            logger.warning(f"No text chunks generated for company_id: {company_id}")
            return None

        # Create vector store with better error handling
        try:
            vector_store = FAISS.from_texts(
                texts=texts,
                embedding=embeddings,
                metadatas=metadatas
            )
            logger.info(f"Vector store created successfully with {len(texts)} chunks")
        except Exception as e:
            logger.error(f"Error creating vector store: {str(e)}")
            return None
        
        # Serialize and encode for database storage
        try:
            serialized = vector_store.serialize_to_bytes()
            import base64
            serialized_b64 = base64.b64encode(serialized).decode('utf-8')
            
            # Save to database
            query_executor("""
                INSERT INTO company_vector_stores (company_id, vector_store)
                VALUES (%s, %s)
                ON DUPLICATE KEY UPDATE vector_store = %s
            """, (company_id, serialized_b64, serialized_b64), fetch=False)
            
            logger.info(f"Vector store saved successfully for company_id: {company_id}")
            
        except Exception as e:
            logger.error(f"Error saving vector store: {str(e)}")
            # Return in-memory vector store even if save fails
            logger.warning("Vector store created in memory but not saved to database")
        
        return vector_store
        
    except Exception as e:
        logger.error(f"Critical error in create_or_get_vector_store: {str(e)}", exc_info=True)
        return None

# Add these constants at the top of your file
MAX_RETRIES = 3
RETRY_DELAY = 1  # seconds

# Thread-local storage for database connections
thread_local = threading.local()

@contextmanager
def get_db_connection():
    try:
        if not hasattr(thread_local, "connection"):
            thread_local.connection = db_manager.get_connection()
        yield thread_local.connection
    except Exception as e:
        if hasattr(thread_local, "connection"):
            try:
                thread_local.connection.close()
            except:
                pass
            del thread_local.connection
        raise e

def execute_query(query, params=None, fetch=True):
    """
    Execute a database query using a connection from the pool.
    The connection is managed by the get_db_connection context manager.
    """
    try:
        with get_db_connection() as conn:
            with conn.cursor() as cursor:
                cursor.execute(query, params or ())
                if fetch:
                    return cursor.fetchall()
                # For non-fetching queries (INSERT, UPDATE), the commit is handled
                # by the get_db_connection context manager upon successful exit.
                return None
    except Exception as e:
        logger.error(f"Failed to execute query: {e}", exc_info=True)
        # We re-raise the exception to be handled by the caller.
        # This prevents the function from returning None on failure,
        # which could be misinterpreted as a valid "no results" response.
        raise

@api_bp.route('/api/<company_id>/chat', methods=['POST'])
def company_chat(company_id):
    try:
        data = request.json
        user_message = data.get('message')
        session_id = data.get('session_id', str(uuid.uuid4()))
        is_voice = data.get('is_voice', False)

        # Get company basic information
        company_data = execute_query("""
            SELECT name, instructions, website, welcome_message
            FROM companies 
            WHERE id = %s
        """, (company_id,))
        
        if not company_data:
            logger.warning(f"Company not found in 'companies' table for company_id: {company_id}")
            return jsonify({"success": False, "error": "Company not found"}), 404
            
        company = company_data[0]

        # --- IMPROVED RAG Integration ---
        vector_store = create_or_get_vector_store(company_id, execute_query)
        
        doc_content = ""
        if vector_store:
            logger.info(f"Vector store available for company_id: {company_id}. Searching for: '{user_message}'")
            try:
                # Retrieve more chunks with improved similarity handling
                docs = vector_store.similarity_search_with_score(
                    user_message,
                    k=15  # Increased from 10 to 15
                )
                
                logger.info(f"Found {len(docs)} similar documents")
                
                # Improved similarity filtering with better thresholds
                relevant_docs = []
                scores = []
                
                for doc, score in docs:
                    scores.append(score)
                    logger.info(f"Chunk score: {score:.3f} (File: {doc.metadata.get('file_name', 'Unknown')})")
                    
                    # More inclusive threshold - FAISS scores are typically 0.0-2.0
                    # Use different thresholds based on score distribution
                    if score < 1.8:  # Very relevant
                        relevant_docs.append((doc.page_content, score, "high"))
                    elif score < 2.2:  # Moderately relevant
                        relevant_docs.append((doc.page_content, score, "medium"))
                    elif score < 2.5:  # Possibly relevant
                        relevant_docs.append((doc.page_content, score, "low"))
                    else:
                        logger.info(f"Filtered out chunk with score: {score:.3f}")
                
                if relevant_docs:
                    # Sort by relevance and take more chunks
                    relevant_docs.sort(key=lambda x: x[1])  # Sort by score (lower is better)
                    doc_content = "\n---\n".join([doc[0] for doc in relevant_docs[:8]])  # Increased from 5 to 8
                    logger.info(f"Retrieved {len(relevant_docs)} relevant chunks. Content length: {len(doc_content)}")
                else:
                    # Fallback: use the best chunks even if scores are high
                    if docs:
                        # Take top 3 chunks as fallback
                        fallback_chunks = []
                        for doc, score in docs[:3]:
                            fallback_chunks.append(doc.page_content)
                            logger.info(f"Using fallback chunk with score: {score:.3f}")
                        
                        doc_content = "\n---\n".join(fallback_chunks)
                        logger.info(f"Using {len(fallback_chunks)} fallback chunks")
                    else:
                        doc_content = ""
                        logger.warning("No documents found in vector store")
                    
            except Exception as e:
                logger.error(f"RAG search error: {str(e)}", exc_info=True)
                doc_content = ""
        else:
            logger.warning(f"No vector store for company_id: {company_id}")

        # Add debugging for what content is being used
        logger.info(f"Final doc_content length: {len(doc_content)}")
        if doc_content:
            logger.info(f"Document content preview: {doc_content[:300]}...")
            # Add more detailed logging
            # logger.info(f"Full document content being sent to AI: {doc_content}")
        else:
            logger.info("No document content found - will rely on company instructions only")

        # Get company settings
        settings = execute_query("""
            SELECT available_days, start_time, end_time 
            FROM company_settings 
            WHERE company_id = %s
        """, (company_id,))
        
        settings = settings[0] if settings else None

        # Format settings for the system message
        settings_info = ""
        if settings:
            available_days = settings[0].split(',') if settings[0] else []
            start_time = settings[1]
            end_time = settings[2]
            
            settings_info = f"""
            Meeting Availability:
            - Available Days: {', '.join(available_days)}
            - Time Slot: {start_time} to {end_time}
            """
        else:
            settings_info = "No meeting settings configured yet."

        # Get website URLs
        available_urls = execute_query("""
            SELECT url, page_title 
            FROM website_pages 
            WHERE company_id = %s
        """, (company_id,))
        
        # Format URLs
        url_context = "Available pages:\n"
        if available_urls:
            for url, title in available_urls:
                url_context += f"- {title}: {url}\n"
        else:
            url_context = f"Main website: {company[2]}\n"

        company_info = ""
        if company[1]:  # instructions
            company_info += f"General Information:\n{company[1]}\n\n"
        
        if doc_content:
            company_info += f"Additional Information from Documents:\n{doc_content}\n\n"

        # Log the complete system message for debugging
        system_content = f"""You are a customer service representative for {company[0]}. 
            Keep your responses brief and professional.

            Company Information:
            {company_info}

            Website Information:
            {url_context}

            {settings_info}

            Meeting Scheduling Instructions:
            {create_meeting_prompt(company[0])}

            Guidelines:
            1. STRICT SCOPE: You can ONLY answer questions related to {company[0]}'s services, products, policies, procedures, and information provided in the company documents and instructions above.
            2. RESTRICTED RESPONSES: For ANY question not directly related to {company[0]}'s business, services, or uploaded documents, respond ONLY with: "For queries not covered here, please visit [HELPDESK_LINK] or contact Helpdesk. Our expert advisor will answer you."
            3. NO GENERAL KNOWLEDGE: Do not provide information about external topics like YouTube, Google, other companies, general knowledge, news, weather, or anything unrelated to {company[0]}.
            4. DOCUMENT-BASED ONLY: Use ONLY the information provided in the company instructions and documents to answer customer queries. Do not use any external knowledge.
            5. COMPANY-SPECIFIC QUERIES: Only respond to questions about:
               - {company[0]}'s services and products
               - Company policies and procedures
               - Information contained in uploaded documents
               - Meeting scheduling with {company[0]}
               - General greetings and how you can help
            6. FALLBACK RULE: If a question is not about {company[0]}'s business or not covered in the provided documents, immediately respond with the helpdesk message.
            7. NO CREATIVE RESPONSES: Do not make up information or provide general advice not related to {company[0]}.
            8. MEETING SCHEDULING: For meeting requests, strictly follow the meeting scheduling process using only the company's available slots.
            9. GREETINGS: If customer starts with hello or similar greetings, respond briefly and ask how you can help with {company[0]}'s services.
            10. WEBSITE QUERIES: For website-related queries, only use the URLs provided in the company information above.
            11. IMPORTANT: Only include the [ACTION]{{...}}[/ACTION] tag for scheduling a meeting when the user explicitly asks to schedule a meeting AND provides all necessary information (email, desired date, and desired time).
            12. CRITICAL: When you have relevant information from the documents about {company[0]}, use it to provide a helpful answer. Do not default to the fallback response unless you truly have no relevant information about {company[0]}.
            13. IMPORTANT: Always respond with plain text only. Do not use HTML tags, anchor tags, or any markup in your responses.
            14. When referring to helpdesk, use [HELPDESK_LINK] as a placeholder that will be converted to a clickable link.
            15. REMEMBER: You are {company[0]}'s AI assistant. Stay strictly within the scope of {company[0]}'s business and uploaded documents.
             # IMPORTANT: If a user asks about a person (e.g., "Is Amrit Bahee working with this company?") or requests information about any individual, ONLY answer if that person's name is explicitly mentioned in the company documents or instructions as being associated with the company. DO NOT create or assume information about any person not found in the provided documents. If the requested name is not found, respond with the helpdesk fallback message.
             # If a user asks about "our company" or uses phrases like "your company", "this company", or anything that seems to refer to the company in general, always interpret it as a question about the company whose widget is being used (i.e., {company[0]}). Do not ask the user to clarify which company; always assume they are referring to {company[0]} and answer accordingly using only the provided company information and documents.
             """
        
        logger.info(f"Complete system message length: {len(system_content)}")
        logger.info(f"System message preview: {system_content[:500]}...")

        # Create system message
        system_message = {
            "role": "system",
            "content": system_content
        }

        # Store user message
        execute_query("""
            INSERT INTO chat_messages (
                id, company_id, role, content, 
                session_id, is_voice_input
            )
            VALUES (%s, %s, %s, %s, %s, %s)
        """, (
            str(uuid.uuid4()), company_id, 'user', 
            user_message, session_id, is_voice
        ), fetch=False)

        # Get chat history
        chat_history = execute_query("""
            SELECT role, content 
            FROM chat_messages 
            WHERE company_id = %s AND session_id = %s 
            ORDER BY created_at ASC
        """, (company_id, session_id))
        
        # Construct messages array
        messages = [system_message]
        for role, content in chat_history:
            messages.append({"role": role, "content": content})

        # Call Groq API
        response = requests.post(
            Config.GROQ_API_URL,
            headers={
                "Authorization": f"Bearer {Config.GROQ_API_KEY}",
                "Content-Type": "application/json"
            },
            json={
                "model": "meta-llama/llama-4-scout-17b-16e-instruct",
                "messages": messages,
                "temperature": 1,
                "max_tokens": 1000,
                "top_p": 1,
                "stream": False
            }
        )
        
        if response.status_code == 200:
            ai_response = response.json()["choices"][0]["message"]["content"]

            # Store AI response
            execute_query("""
                INSERT INTO chat_messages (
                    id, company_id, role, content, 
                    session_id, is_voice_input
                )
                VALUES (%s, %s, %s, %s, %s, %s)
            """, (
                str(uuid.uuid4()), company_id, 'assistant', 
                ai_response, session_id, is_voice
            ), fetch=False)
            
            return jsonify({
                "success": True,
                "response": ai_response,
                "session_id": session_id,
                "should_speak": is_voice
            })
        else:
            return jsonify({"success": False, "error": "Failed to get AI response"}), 500
            
    except Exception as e:
        print(f"Error in company_chat: {str(e)}")
        return jsonify({"success": False, "error": str(e)}), 500

@api_bp.route('/api/<company_id>/schedule-meeting', methods=['POST'])
def schedule_meeting(company_id, meeting_data=None):
    try:
        if not meeting_data:
            if not request.is_json:
                return jsonify({"success": False, "error": "Request must be JSON"}), 400

            meeting_data = request.get_json()
            if not meeting_data:
                return jsonify({"success": False, "error": "No JSON data received"}), 400

        required_fields = ['email', 'date', 'time']
        
        for field in required_fields:
            if field not in meeting_data:
                return jsonify({"success": False, "error": f"Missing required field: {field}"}), 400

        cursor = mysql.connection.cursor()
        cursor.execute("""
            SELECT available_days, start_time, end_time 
            FROM company_settings 
            WHERE company_id = %s
        """, (company_id,))
        settings = cursor.fetchone()
        
        if not settings:
            return jsonify({"success": False, "error": "Company has not configured meeting availability settings"}), 400

        try:
            # Parse the date using our natural date parser
            date_result = parse_natural_date(meeting_data['date'])
            if not date_result:
                return jsonify({"success": False, "error": f"Could not parse date: {meeting_data['date']}"}), 400
                
            meeting_date = date_result['iso_date']
            meeting_time = meeting_data['time']
            # Ensure meeting_date is a string
            if not isinstance(meeting_date, str):
                logger.error(f"meeting_date is not a string: {type(meeting_date)}")
                return jsonify({"success": False, "error": "Invalid date format"}), 400
            
            # Create datetime object from the parsed date and time
            try:
                meeting_datetime = datetime.strptime(f"{meeting_date} {meeting_time}", "%Y-%m-%d %H:%M")
                logger.debug(f"Successfully created datetime object: {meeting_datetime}")
            except Exception as e:
                logger.error(f"Error creating datetime object: {str(e)}")
                return jsonify({"success": False, "error": f"Invalid date/time format: {str(e)}"}), 400
            
            day_name = meeting_datetime.strftime("%A")
            
            available_days = settings[0].split(',') if settings[0] else []
            
            # Convert timedelta to string in HH:MM format
            def timedelta_to_str(td):
                total_seconds = int(td.total_seconds())
                hours, remainder = divmod(total_seconds, 3600)
                minutes, _ = divmod(remainder, 60)
                return f"{hours:02}:{minutes:02}"
            
            # Parse time strings into time objects
            try:
                # Convert settings times to strings if they're timedelta
                start_time_str = timedelta_to_str(settings[1]) if isinstance(settings[1], timedelta) else settings[1]
                end_time_str = timedelta_to_str(settings[2]) if isinstance(settings[2], timedelta) else settings[2]
                
                # Parse the time strings
                start_time = datetime.strptime(start_time_str, "%H:%M").time()
                end_time = datetime.strptime(end_time_str, "%H:%M").time()
                meeting_time_obj = datetime.strptime(meeting_time, "%H:%M").time()
            except Exception as e:
                logger.error(f"Error parsing time objects: {str(e)}")
                return jsonify({"success": False, "error": f"Invalid time format: {str(e)}"}), 400
            
            if day_name not in available_days:
                return jsonify({"success": False, "error": f"Meetings are not available on {day_name}. Available days are: {', '.join(available_days)}"}), 400
            
            if not (start_time <= meeting_time_obj <= end_time):
                return jsonify({"success": False, "error": f"Meeting time must be between {start_time_str} and {end_time_str}"}), 400
            
        except Exception as e:
            logger.error(f"Error parsing datetime: {str(e)}")
            return jsonify({"success": False, "error": f"Invalid date/time format: {str(e)}"}), 400

        try:
            cursor.execute("""
                INSERT INTO meetings (id, company_id, customer_email, meeting_date, meeting_time)
                VALUES (%s, %s, %s, %s, %s)
            """, (
                uuid.uuid4(),
                company_id,
                meeting_data['email'],
                meeting_date,
                meeting_time
            ))
            mysql.connection.commit()
        except Exception as e:
            logger.error(f"Database error: {str(e)}")
            return jsonify({"success": False, "error": f"Database error: {str(e)}"}), 500

        email_subject = "Meeting Scheduled Confirmation"
        
        cursor.execute("SELECT name FROM companies WHERE id = %s", (company_id,))
        company_result = cursor.fetchone()
        company_name = company_result[0] if company_result else "Company"

        cursor.execute("SELECT email FROM companies WHERE id = %s", (company_id,))
        company_email_result = cursor.fetchone()
        company_email = company_email_result[0] if company_email_result else "info@mykaptaan.com"

        send_email(
            to_email=meeting_data['email'],
            subject=email_subject,
            date=meeting_date,
            time=meeting_time,
            company_name=company_name,
            company_email=company_email,
            customer_email=meeting_data['email'],
            company_id=company_id
        )

        return jsonify({"success": True, "message": f"Meeting scheduled on {meeting_date} at {meeting_time}"})

    except Exception as e:
        logger.error(f"Error scheduling meeting: {str(e)}")
        return jsonify({"success": False, "error": str(e)}), 500
    
@api_bp.route('/api/<company_id>/chat-history', methods=['GET'])
def get_chat_history(company_id):
    try:
        session_id = request.args.get('session_id')
        if not session_id:
            return jsonify({"success": False, "error": "Session ID is required"}), 400

        cursor = mysql.connection.cursor()
        
        # Get chat history for this session
        cursor.execute("""
            SELECT role, content, created_at 
            FROM chat_messages 
            WHERE company_id = %s AND session_id = %s 
            ORDER BY created_at ASC
        """, (company_id, session_id))
        
        messages = cursor.fetchall()
        
        # Format messages for response
        history = [
            {
                "role": msg[0],
                "content": msg[1],
                "timestamp": msg[2].isoformat() if msg[2] else None
            }
            for msg in messages
        ]
        
        return jsonify({
            "success": True,
            "history": history
        })
        
    except Exception as e:
        print(f"Error fetching chat history: {str(e)}")
        return jsonify({
            "success": False,
            "error": str(e)
        }), 500

@api_bp.route('/api/<company_id>/refresh-urls', methods=['POST'])
def refresh_company_urls(company_id):
    try:
        cursor = mysql.connection.cursor()
        
        # Get website URL from request
        website_url = request.json.get('websiteUrl')
        if not website_url:
            return jsonify({"success": False, "error": "Website URL is required"}), 400
            
        # Update company website
        cursor.execute("UPDATE companies SET website = %s WHERE id = %s", (website_url, company_id))
        
        if website_url.startswith('@'):
            website_url = website_url[1:]
            
        print(f"Refreshing URLs for website: {website_url}")  # Debug log
        
        # Clear existing pages
        cursor.execute("DELETE FROM website_pages WHERE company_id = %s", (company_id,))
        
        # Crawl and store new links
        links = get_website_content(website_url)
        stored_count = 0
        
        for link in links:
            try:
                cursor.execute("""
                    INSERT INTO website_pages (id, company_id, url, page_title)
                    VALUES (%s, %s, %s, %s)
                """, (str(uuid.uuid4()), company_id, link['url'], link['text']))
                stored_count += 1
            except Exception as e:
                print(f"Error storing URL: {str(e)}")
        
        mysql.connection.commit()
        print(f"Successfully stored {stored_count} URLs")  # Debug log
        
        return jsonify({
            "success": True,
            "message": f"Stored {stored_count} URLs",
            "urls": [link['url'] for link in links]
        })
        
    except Exception as e:
        print(f"Error refreshing URLs: {str(e)}")
        return jsonify({"success": False, "error": str(e)}), 500


def extract_meeting_info(message):
    # Extract email using regex
    email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
    email_match = re.search(email_pattern, message)
    email = email_match.group(0) if email_match else None
    
    # Extract date (assuming format YYYY-MM-DD)
    date_pattern = r'\d{4}-\d{2}-\d{2}'
    date_match = re.search(date_pattern, message)
    date = date_match.group(0) if date_match else None
    
    # Extract time (assuming format HH:MM)
    time_pattern = r'\d{2}:\d{2}'
    time_match = re.search(time_pattern, message)
    time = time_match.group(0) if time_match else None
    
    return email, date, time

def send_email(to_email, subject, date, time, company_name, company_email, customer_email, company_id):
    start_date, end_date = format_google_calendar_date(date, time)
    html_content = render_template(
        'email_template/meeting.html',
        date=date,
        time=time,
        company_name=company_name,
        start_date=start_date,
        end_date=end_date,
        company_email=company_email,
        customer_email=customer_email
    )

    smtp_settings = get_company_smtp_settings(company_id)
   
    if not smtp_settings:
        print(f"No SMTP settings found for company {company_id}")
    msg = MIMEMultipart()
    msg['From'] = smtp_settings[6]
    msg['To'] = to_email
    msg['Subject'] = subject

    # Attach the HTML content
    msg.attach(MIMEText(html_content, 'html'))

    # Use SMTP_SSL for secure connection
    with smtplib.SMTP_SSL(smtp_settings[1], smtp_settings[2], timeout=20) as server:
        server.login(smtp_settings[3], smtp_settings[4])
        server.send_message(msg)



def format_google_calendar_date(date_str, time_str):
    dt = datetime.strptime(f"{date_str} {time_str}", "%Y-%m-%d %H:%M")
    start_date = dt.strftime("%Y%m%dT%H%M%SZ")
    end_date = (dt + timedelta(hours=1)).strftime("%Y%m%dT%H%M%SZ")  # Assuming 1-hour meeting
    return start_date, end_date

@api_bp.route('/api/<company_id>/widget-settings')
def get_widget_settings(company_id):
    cursor = mysql.connection.cursor()
    cursor.execute("""
        SELECT widget_color, voice_speed, idle_reminder_seconds,ai_voice
        FROM company_settings 
        WHERE company_id = %s
    """, (company_id,))
    settings = cursor.fetchone()
    cursor.close()
    return jsonify({
        "success": True,
        "settings": settings if settings else {
            "widget_color": "#F42941",
            "voice_speed": "1.0",
            "idle_reminder_seconds": 5
        }
    })

@api_bp.route('/api/<company_id>/fetch-chats', methods=['GET'])
@require_basic_auth
def api_chat_history(company_id):
    try:
        cursor = mysql.connection.cursor()
        
        # Get all sessions for the company
        cursor.execute("""
            SELECT 
                cm.session_id,
                MIN(CASE WHEN cm.role = 'user' THEN cm.content END) as first_message,
                MAX(cm.created_at) as last_message_time,
                COUNT(*) as message_count
            FROM chat_messages cm
            WHERE cm.company_id = %s
            GROUP BY cm.session_id
            ORDER BY last_message_time DESC
        """, (company_id,))
        
        chat_sessions = cursor.fetchall()
        formatted_sessions = []

        # For each session, get its messages
        for session_data in chat_sessions:
            session_id = session_data[0]
            cursor.execute("""
                SELECT role, content, created_at
                FROM chat_messages
                WHERE company_id = %s AND session_id = %s
                ORDER BY created_at ASC
            """, (company_id, session_id))
            messages = cursor.fetchall()
            formatted_messages = [
                {
                    'role': msg[0],
                    'content': msg[1],
                    'created_at': msg[2].isoformat() if hasattr(msg[2], 'isoformat') else str(msg[2])
                }
                for msg in messages
            ]
            formatted_sessions.append({
                'session_id': session_id,
                'first_message': session_data[1],
                'last_message_time': session_data[2].isoformat() if hasattr(session_data[2], 'isoformat') else str(session_data[2]),
                'message_count': session_data[3],
                'messages': formatted_messages
            })
        
        return jsonify({
            "success": True,
            "chat_sessions": formatted_sessions
        })
        
    except Exception as e:
        return jsonify({
            "success": False,
            "error": str(e)
        }), 500
    
@api_bp.route('/api/<company_id>/fetch-meetings', methods=['GET'])
@require_basic_auth
def api_meetings(company_id):
    try:
        cursor = mysql.connection.cursor()
        cursor.execute("""
            SELECT 
                id,
                customer_email,
                meeting_date,
                meeting_time
            FROM meetings 
            WHERE company_id = %s 
            ORDER BY meeting_date DESC
        """, (company_id,))
        
        meetings = cursor.fetchall()
        formatted_meetings = [
            {
                'id': m[0],
                'customer_email': m[1],
                'meeting_date': m[2].isoformat() if hasattr(m[2], 'isoformat') else str(m[2]),
                'meeting_time': str(m[3])
            }
            for m in meetings
        ]
        
        return jsonify({
            "success": True,
            "meetings": formatted_meetings
        })
        
    except Exception as e:
        return jsonify({"success": False, "error": str(e)}), 500

@api_bp.route('/api/<company_id>/store-voice-message', methods=['POST'])
def store_voice_message(company_id):
    try:
        if 'audio' not in request.files:
            return jsonify({"success": False, "error": "No audio file provided"}), 400
            
        audio_file = request.files['audio']
        transcript = request.form.get('transcript')
        session_id = request.form.get('session_id')
        
        if not audio_file or not allowed_file(audio_file.filename):
            return jsonify({"success": False, "error": "Invalid audio file"}), 400
            
        # Generate a unique filename
        filename = f"{company_id}_{session_id}_{int(time.time())}.mp3"
        filepath = os.path.join(UPLOAD_FOLDER, filename)
        
        # Save the audio file
        audio_file.save(filepath)
        
        # Store the file information in the database
        cursor = mysql.connection.cursor()
        cursor.execute("""
            INSERT INTO voice_messages (
                id, company_id, session_id, 
                transcript, audio_file_path, 
                created_at
            )
            VALUES (%s, %s, %s, %s, %s, NOW())
        """, (
            str(uuid.uuid4()),
            company_id,
            session_id,
            transcript,
            filepath
        ))
        
        mysql.connection.commit()
        
        return jsonify({
            "success": True,
            "message": "Voice message stored successfully"
        })
        
    except Exception as e:
        print(f"Error storing voice message: {str(e)}")
        return jsonify({"success": False, "error": str(e)}), 500

def update_vector_store_with_new_documents(company_id, uploaded_documents):
    """Update the vector store with multiple new documents"""
    try:
        # Get existing vector store
        result = execute_query("""
            SELECT vector_store 
            FROM company_vector_stores 
            WHERE company_id = %s
        """, (company_id,))
        
        if result and result[0] and result[0][0]:
            # Load existing vector store
            vector_store = FAISS.deserialize_from_bytes(
                embeddings=embeddings,
                serialized=result[0][0],
                allow_dangerous_deserialization=True
            )
            
            # Process all new documents
            all_chunks = []
            all_metadatas = []
            
            for doc in uploaded_documents:
                # Process each document
                text_splitter = RecursiveCharacterTextSplitter(
                    chunk_size=800,
                    chunk_overlap=150,
                    length_function=len,
                    separators=["\n\n", "\n", ". ", " ", ""]
                )
                chunks = text_splitter.split_text(doc['content'])
                
                if chunks:
                    all_chunks.extend(chunks)
                    all_metadatas.extend([{
                        "doc_id": doc['doc_id'],
                        "file_name": doc['filename'],
                        "chunk_index": i,
                        "created_at": datetime.now().isoformat()
                    } for i in range(len(chunks))])
                    
                    logger.info(f"Processed {len(chunks)} chunks from {doc['filename']}")
                else:
                    logger.warning(f"No chunks generated from {doc['filename']}")
            
            if all_chunks:
                # Add all new chunks to existing vector store
                vector_store.add_texts(
                    texts=all_chunks,
                    metadatas=all_metadatas
                )
                
                # Save updated vector store
                serialized = vector_store.serialize_to_bytes()
                execute_query("""
                    UPDATE company_vector_stores 
                    SET vector_store = %s 
                    WHERE company_id = %s
                """, (serialized, company_id), fetch=False)
                
                logger.info(f"Added {len(all_chunks)} chunks from {len(uploaded_documents)} documents to vector store")
                return True
            else:
                logger.warning("No chunks generated from uploaded documents")
                return False
        else:
            # No existing vector store, create new one
            logger.info(f"Creating new vector store for company {company_id}")
            vector_store = create_or_get_vector_store(company_id, execute_query)
            return vector_store is not None
            
    except Exception as e:
        logger.error(f"Error updating vector store with new documents: {str(e)}")
        return False

def refresh_vector_store(company_id):
    """Force refresh the entire vector store for a company"""
    try:
        # Delete existing vector store
        execute_query("""
            DELETE FROM company_vector_stores 
            WHERE company_id = %s
        """, (company_id,), fetch=False)
        
        # Create new vector store with all documents
        vector_store = create_or_get_vector_store(company_id, execute_query)
        
        if vector_store:
            logger.info(f"Refreshed vector store for company {company_id}")
            return True
        else:
            logger.error(f"Failed to refresh vector store for company {company_id}")
            return False
            
    except Exception as e:
        logger.error(f"Error refreshing vector store: {str(e)}")
        return False