# from chat_module import generate_response
from groq_module import chat_with_groq, groq_module_fun
# from helpers import extract_json
import os
import docx2txt
import time
import json
import PyPDF2
import base64
from io import BytesIO
from pdf2image import convert_from_path
from PIL import Image
import re
import subprocess
import textract
import pdfplumber
import pytesseract
from pdf2image import convert_from_path
from PIL import Image


def ocr_pdf_to_text(pdf_path):
    try:
        print("Converting PDF to image(s)...")
        images = convert_from_path(pdf_path)

        print("Running OCR on image(s)...")
        extracted_text = ""
        for i, img in enumerate(images):
            text = pytesseract.image_to_string(img)
            extracted_text += text + "\n"

        return extracted_text.strip()
    except Exception as e:
        print(f"OCR failed: {e}")
        return ""


def convert_to_pdf(doc_path):
    output_dir = os.path.dirname(doc_path)
    subprocess.run([
        'libreoffice', '--headless', '--convert-to', 'pdf', '--outdir', output_dir, doc_path
    ], check=True)
    pdf_path = os.path.join(output_dir, os.path.splitext(os.path.basename(doc_path))[0] + ".pdf")
    return pdf_path

def pdf_to_base64(pdf_path):
    # Convert PDF to images (list of images, one per page)
    images = convert_from_path(pdf_path)

    # Get max width and total height (for vertical merging)
    max_width = max(img.width for img in images)
    total_height = sum(img.height for img in images)

    # Create a blank image to paste all pages
    merged_image = Image.new("RGB", (max_width, total_height), "white")

    y_offset = 0
    for img in images:
        merged_image.paste(img, (0, y_offset))
        y_offset += img.height

    # Convert the merged image to a byte stream
    buffered = BytesIO()
    merged_image.save(buffered, format="JPEG")

    # Encode image to Base64
    base64_string = base64.b64encode(buffered.getvalue()).decode()

    return base64_string


def pdf_to_text(pdf_file):
    try:
        extracted_text = ""
        with pdfplumber.open(pdf_file) as pdf:
            for page in pdf.pages:
                text = page.extract_text()
                if text:
                    # Clean up extra spaces
                    text = re.sub(r'\s+', ' ', text).strip()
                    extracted_text += text + "\n"
        print(f"Text extracted from {pdf_file}")
        return extracted_text if extracted_text else None
    except Exception as e:
        print(f"Error: {e}")
        return None

# def doc_to_text(doc_path):
#     return textract.process(doc_path).decode('utf-8')

def doc_to_text(doc_path):
    try:
        from docx import Document
        try:
            doc = Document(doc_path)
            return "\n".join([para.text for para in doc.paragraphs]) or "No text found"
        except Exception as e:
            print(f"Error reading docx: {e}")
            with open(doc_path, 'rb') as f:
                return f.read().decode('latin-1', errors='ignore') or "No text found"
    except Exception as e:
        print(f"DOC processing warning: {str(e)}")
        return "No text found"

    
def docx_to_text(file_path):
    # Open the .docx file
    text = docx2txt.process(file_path)
    return text


def save_json_file(json_data, pdf_file):
    try:
        base_name = os.path.splitext(pdf_file)[0]
        json_file_name = f"{base_name}.json"
        
        with open(json_file_name, 'w') as json_file:
            json.dump(json_data, json_file, indent=4)
        
        print(f"JSON data successfully saved to {json_file_name}")
        return json_file_name
    
    except Exception as e:
        print(f"An error occurred while saving the JSON file: {e}")
        return None

def extract_json_from_txt(content):
    try:
        # Check if content is already a dictionary
        if isinstance(content, dict):
            print("Content is already a JSON object.")
            return content
        
        # If content is a string, proceed with extraction
        elif isinstance(content, str):
            start_index = content.find("{")
            end_index = content.rfind("}") + 1
            
            if start_index == -1 or end_index == -1:
                raise ValueError("JSON-like structure not found in the response.")
            
            json_str = content[start_index:end_index]
            json_data = json.loads(json_str)
            
            print("JSON extraction and parsing successful.")
            return json_data
        
        else:
            raise TypeError("Unsupported content type. Expected a string or dictionary.")
    
    except Exception as e:
        print(f"An error occurred: {e}")
        return None


def check_resume_img(base_64_data, max_retries=3, retry_delay=5):
    """Extract candidate details based on the provided transcription."""
    
    # Enhanced prompt for better context
    prompt = f'''
    You are an expert in resume analysis. 
    Based on the following given img, determine if it is a resume or another type of document. 
    Please consider common features of resumes such as:
    - Personal details (name, contact information)
    - Work experience and job titles
    - Education history
    - Skills and certifications

    Here is the text to analyze:
    give response in json format only
    {{
        "resume": True/False,
    }}

    Please respond with just "yes" if it is a resume and just "no" if it is not.
    '''

    attempt = 0
    while attempt < max_retries:
        try:
            print("Getting response")
            response = groq_module_fun(prompt,base_64_data)  # Generate the response
            print(f"LLM Raw Response: {response}")

            # No need to access response.text since generate_response returns a string
            # response_text = response.strip().lower()
            response_json = json.loads(response)

            # Get the value of "resume"
            resume_value = response_json.get("resume")
            print(resume_value)
            if resume_value:
                valid = True
            else:
                valid = False
            print('File Saved')
            return valid
            
        except Exception as e:
            print(f"An error occurred: {e}")
            attempt += 1
            if attempt < max_retries:
                print(f"Retrying in {retry_delay} seconds...")
                time.sleep(retry_delay)
            else:
                print("Max retries reached. Operation failed.")
                raise
    
    return None

def check_resume(transcription, max_retries=3, retry_delay=5):
    """Decides whether the given text is a valid RESUME with mandatory fields: firstName, email, jobTitle."""
    prompt = f"""
You are an expert document classifier. Decide if the text below is a RESUME.

A VALID resume MUST contain ALL of the following:
- first name
- email
- job title

It MAY also contain:
- technologies or skills
- experience

=== TEXT START ===
{transcription}
=== TEXT END ===

Answer with ONLY one word: yes or no (no punctuation).
"""

    attempt = 0
    while attempt < max_retries:
        try:
            print(" Asking LLM...")
            response = chat_with_groq(prompt)
            print(f"==============LLM Raw Response================: {response}")
            response_text = response.strip().lower()
            print(f"LLM said: >>>{response_text}<<<")

            if re.search(r'\byes\b', response_text):
                return True
            elif re.search(r'\bno\b', response_text):
                return False
            else:
                print(" Unexpected reply from LLM; defaulting to False")
                return False

        except Exception as e:
            print(f" Error occurred: {e}")
            attempt += 1
            if attempt < max_retries:
                print(f"Retrying in {retry_delay} seconds...")
                time.sleep(retry_delay)
            else:
                print("Max retries reached.")
                return False

    return False


def resume_extract(transcription, max_retries=5, retry_delay=5):
    """Extract candidate details based on the provided transcription."""
    
    prompt = f'''
    The following is a transcription of a resume:
    {transcription}

    Based on this transcription, extract and return the following details of the candidate:
        - First Name
        - Last Name (This is the family name or surname. Ensure to extract this correctly.)
        - Job Title
        - Email
        - Technologies (A list of technologies the candidate has mentioned, such as programming languages or tools.)
        - Total Experience (in years)
        - Certifications
        - Bio (The bio should be extracted from the profile section if available.)
        - Projects (A list of projects the candidate has worked on.)

    Make sure **all** 9 fields are present in the response, even if some are empty.

    Return the result in **JSON format only** with the following exact keys (case-sensitive):

    Example JSON output:
    {{
        "firstName": "John",
        "lastName": "Doe",
        "jobTitle": "Senior Developer",
        "email": "johndoe@example.com",
        "technologies": ["JavaScript", "TypeScript", "React", "Node.js"],
        "experience": 5,
        "certifications": ["AWS Certified Developer", "Scrum Master"],
        "bio": "Passionate developer with 5 years of experience in web development.",
        "projects": ["Project A", "Project B"]
    }}
    '''
    
    # Define the default structure
    default_structure = {
        "firstName": "",
        "lastName": "",
        "jobTitle": "",
        "email": "",
        "technologies": [],
        "experience": 0,
        "certifications": [],
        "bio": "",
        "projects": []
    }

    attempt = 0
    while attempt < max_retries:
        try:
            # Call the LLM
            response = chat_with_groq(prompt)
            
            # json_data_str = extract_json(response)

            # Print the raw response for debugging
            print(f"LLM Raw Response: {response}")

            # Extract the JSON part from the response using regex
            json_match = re.search(r'{.*}', response, re.DOTALL)
            if not json_match:
                raise ValueError("Valid JSON not found in the LLM response.")
            
            json_string = json_match.group(0)

            # Parse the JSON response
            llm_response_dict = json.loads(json_string)
            
            # Ensure all fields are present by merging with the default structure
            final_response = {**default_structure, **llm_response_dict}
            
            print('Resume processed successfully.')
            return final_response
        
        except Exception as e:
            print(f"An error occurred: {e}")
            attempt += 1
            if attempt < max_retries:
                print(f"Retrying in {retry_delay} seconds...")
                time.sleep(retry_delay)
            else:
                print("Max retries reached. Operation failed.")
                raise
    
    return None

def explain_resume_failure(text: str, reason_type: str = "not_resume") -> str:
    """Generates a one-line explanation from LLM why the document isn't a valid resume."""
    if reason_type == "not_resume":
        prompt = f"""
        The following text was extracted from an uploaded document. Determine why this is likely *not* a resume.
        Be specific about what's missing (e.g., name, job title, contact info, etc.) or explain if it's another type of document.

        === TEXT START ===
        {text[:4000]}
        === TEXT END ===

        Give a one-line explanation only.
        """
    else:
        prompt = f"""
        The extracted text appears to be a resume, but it is missing some important fields.
        Review the text below and identify which of the following are missing or invalid:
        First Name, Email, Job Title, Technologies, and Experience.

        === TEXT START ===
        {text[:4000]}
        === TEXT END ===

        Explain the problem in one sentence.
        """

    response = chat_with_groq(prompt)
    if response is None or "An error occurred" in response:
        return None
    return response.strip()