import torch
from transformers import pipeline
import nltk
nltk.download('punkt')
from nltk.tokenize import sent_tokenize

# Load the model for AI text detection
text_classifier = pipeline("text-classification", model="microsoft/deberta-v3-large", tokenizer="microsoft/deberta-v3-large")

def is_generated_by_ai(paragraph):
    # Tokenize the input text with padding and truncation
    inputs = text_classifier.tokenizer(paragraph, padding=True, truncation=True, max_length=512, return_tensors="pt")
    
    # Get the classification result from the model
    result = text_classifier.model(**inputs)
    logits = result.logits
    confidence = torch.softmax(logits, dim=-1).max().item()
    
    return confidence

def calculate_ai_percentage(paragraph):
    # Split the paragraph into sentences
    sentences = sent_tokenize(paragraph)
    
    # Count the number of AI-generated sentences
    ai_count = 0
    
    for sentence in sentences:
        confidence = is_generated_by_ai(sentence)
        
        # If the confidence is greater than or equal to 80%, consider it AI-generated
        if confidence >= 0.80:
            ai_count += 1
    
    # Calculate the percentage of AI-generated content
    ai_percentage = (ai_count / len(sentences)) * 100
    
    return ai_percentage

if __name__ == '__main__':
    input_paragraph = """
    Hi! How are you? I was thinking about how we can improve our content strategy. AI tools can help automate a lot of tasks.
    Let's focus on creating more personalized content for our audience.
    """

    ai_percentage = calculate_ai_percentage(input_paragraph)
    print(f"The percentage of AI-generated content is {ai_percentage:.2f}%.")
