# nlq_cluster.py
# Natural Language Question (NLQ) semantic clustering for keyword/question lists
# by 索未 · suowei.pub
# Requires: pip install sentence-transformers scikit-learn pandas openpyxl

import pandas as pd
from sklearn.cluster import AgglomerativeClustering
from sentence_transformers import SentenceTransformer

def cluster_questions(input_csv='nlq_list.csv', output_xlsx='nlq_clusters.xlsx', n_clusters=8):
    df = pd.read_csv(input_csv)
    texts = df['question'].astype(str).tolist()
    model = SentenceTransformer('all-MiniLM-L6-v2')
    embeddings = model.encode(texts, show_progress_bar=True)
    clustering = AgglomerativeClustering(n_clusters=n_clusters).fit(embeddings)
    df['cluster'] = clustering.labels_
    df.to_excel(output_xlsx, index=False)
    print("✅", output_xlsx, "generated")

if __name__ == '__main__':
    cluster_questions()
