# ai_content_optimizer.py
# AI Content Optimization - semantic scoring & keyword coverage report
# by 索未 · suowei.pub

import spacy
import pandas as pd
from collections import Counter
import sys

# Note: Requires: pip install spacy pandas openpyxl
# and download model: python -m spacy download en_core_web_sm

def semantic_score(text):
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(text)
    entities = [ent.text.lower() for ent in doc.ents]
    tokens = [t.lemma_.lower() for t in doc if t.is_alpha and not t.is_stop]
    freq = Counter(tokens).most_common(30)
    entity_count = len(set(entities))
    lexical_diversity = len(set(tokens)) / max(len(tokens),1)
    # Simple scoring formula (0-100)
    score = round((entity_count * 2.5) + (lexical_diversity * 60), 2)
    return {
        "entity_count": entity_count,
        "lexical_diversity": round(lexical_diversity,4),
        "semantic_score": min(round(score,2),100),
        "top_terms": freq
    }

def generate_report(text, output_csv="ai_content_semantic_report.csv"):
    res = semantic_score(text)
    df = pd.DataFrame(res["top_terms"], columns=["Keyword","Frequency"])
    # Add meta
    meta = pd.DataFrame([{
        "Entity Count": res["entity_count"],
        "Lexical Diversity": res["lexical_diversity"],
        "Semantic Score": res["semantic_score"]
    }])
    with pd.ExcelWriter(output_csv.replace('.csv','.xlsx'), engine='openpyxl') as writer:
        meta.to_excel(writer, index=False, sheet_name="Summary")
        df.to_excel(writer, index=False, sheet_name="Top Terms")
    print(f"✅ Report generated: {output_csv.replace('.csv','.xlsx')}")
    return res

if __name__ == '__main__':
    if len(sys.argv) < 2:
        print('Usage: python ai_content_optimizer.py <path_to_ai_draft.txt> [output_excel.xlsx]')
        sys.exit(1)
    path = sys.argv[1]
    out = "ai_content_semantic_report.xlsx" if len(sys.argv) < 3 else sys.argv[2]
    text = open(path, 'r', encoding='utf-8').read()
    generate_report(text, output_csv=out.replace('.xlsx','.csv'))
