import gradio as gr
import json
import os
import random
import datetime

AUDIOS_DIR = "/data"
MAIN_FOLDER = "baseline3"


def build_pair_list():
    """构建评测配对列表：phraseLDM 与每个其他子文件夹各配对两次，A/B 位置随机分配。"""
    subfolders = sorted([
        d for d in os.listdir(AUDIOS_DIR)
        if os.path.isdir(os.path.join(AUDIOS_DIR, d)) and d != MAIN_FOLDER
    ])
    pairs = []
    for folder in subfolders:
        for _ in range(1):  # 每个文件夹配对两次
            if random.random() < 0.5:
                pairs.append((MAIN_FOLDER, folder))
            else:
                pairs.append((folder, MAIN_FOLDER))
    random.shuffle(pairs)
    return pairs


def get_audio_for_folder(folder_name):
    """从指定文件夹中随机选取一个音频文件。"""
    folder_path = os.path.join(AUDIOS_DIR, folder_name)
    files = [f for f in os.listdir(folder_path) if not f.startswith('.')]
    return os.path.join(AUDIOS_DIR, folder_name, random.choice(files))


def init_evaluation():
    """页面加载时初始化配对列表并返回第一组音频。每次加载都会重置结果文件。"""
    # 重置评测结果文件
    with open("eval_results.json", "w", encoding="utf-8") as f:
        f.write("")

    pairs = build_pair_list()
    index = 0
    folder_a, folder_b = pairs[index]
    path_a = get_audio_for_folder(folder_a)
    path_b = get_audio_for_folder(folder_b)
    meta_a = {"model": folder_a, "path": path_a}
    meta_b = {"model": folder_b, "path": path_b}
    total = len(pairs)
    progress_text = f"For a better listening experience, we recommend wearing headphones during the study. \n To ensure the rigor of the experiment, please listen to at least the first 2 minutes of each song before making your evaluation. \n The entire study is expected to take approximately 20–30 minutes, and you will listen to 5 pairs of songs, 10 songs in total. \n\n Group 1 / {total} "
    return path_a, path_b, meta_a, meta_b, pairs, index, progress_text


def _no_change():
    """返回 19 个 gr.update()，用于验证失败时保持所有组件不变（仅更新 status_output）。"""
    return (
        gr.update(), gr.update(),
        gr.update(), gr.update(),
        gr.update(), gr.update(),
        gr.update(),
        None,  # placeholder for status_output, overwritten by caller
        gr.update(),
        gr.update(),
        gr.update(),
        gr.update(), gr.update(), gr.update(), gr.update(),
        gr.update(), gr.update(), gr.update(), gr.update(),
    )


def submit_evaluation(preference, a_harm, a_nat, a_crea, a_mus,
                      b_harm, b_nat, b_crea, b_mus,
                      state_a, state_b, pairs, index):

    # 验证 1：必须选择偏好
    if preference is None:
        result = list(_no_change())
        result[7] = "⚠️ Please select which audio you prefer before submitting."
        return tuple(result)

    # 验证 2：四项指标不能全为默认值 3
    a_all_default = (a_harm == 3 and a_nat == 3 and a_crea == 3 and a_mus == 3)
    b_all_default = (b_harm == 3 and b_nat == 3 and b_crea == 3 and b_mus == 3)
    if a_all_default and b_all_default:
        result = list(_no_change())
        result[7] = "⚠️ It seems that you have forgotten to modify the scores for both Audio A and Audio B."
        return tuple(result)
    if a_all_default:
        result = list(_no_change())
        result[7] = "⚠️ It seems that you have forgotten to modify the score for Audio A."
        return tuple(result)
    if b_all_default:
        result = list(_no_change())
        result[7] = "⚠️ It seems that you have forgotten to modify the score for Audio B."
        return tuple(result)

    # 保存当前组结果
    result_entry = {
        "timestamp": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        "audio_A": {
            "model": state_a["model"],
            "audio_path": state_a["path"],
            "Harmony": a_harm,
            "Naturalness": a_nat,
            "Creativity": a_crea,
            "Musicality": a_mus
        },
        "audio_B": {
            "model": state_b["model"],
            "audio_path": state_b["path"],
            "Harmony": b_harm,
            "Naturalness": b_nat,
            "Creativity": b_crea,
            "Musicality": b_mus
        },
        "better": "A" if "Audio A" in preference else ("B" if "Audio B" in preference else "Equal")
    }
    save_path = "eval_results.json"
    with open(save_path, "a", encoding="utf-8") as f:
        f.write(json.dumps(result_entry, ensure_ascii=False) + "\n")

    next_index = index + 1
    total = len(pairs)

    if next_index >= total:
        # 全部完成
        status_msg = "✅ All evaluations have been completed! Please download the result file."
        return (
            None, None,
            None, None,
            pairs, next_index,
            f"Finished {total} / {total} groups",
            status_msg,
            gr.update(value=save_path, visible=True),
            gr.update(interactive=False),
            gr.update(value=None),
            3, 3, 3, 3, 3, 3, 3, 3,
        )
    else:
        # 加载下一组
        folder_a, folder_b = pairs[next_index]
        path_a = get_audio_for_folder(folder_a)
        path_b = get_audio_for_folder(folder_b)
        new_meta_a = {"model": folder_a, "path": path_a}
        new_meta_b = {"model": folder_b, "path": path_b}
        status_msg = f"✅ {next_index} / {total} groups have been saved. Please proceed to evaluate the next group."
        return (
            path_a, path_b,
            new_meta_a, new_meta_b,
            pairs, next_index,
            f"For a better listening experience, we recommend wearing headphones during the study. \n To ensure the rigor of the experiment, please listen to at least the first 2 minutes of each song before making your evaluation. \n The entire study is expected to take approximately 20–30 minutes, and you will listen to 5 pairs of songs, 10 songs in total. \n\n Group {next_index + 1} / {total} ",
            status_msg,
            gr.update(visible=False),
            gr.update(interactive=True),
            gr.update(value=None),
            3, 3, 3, 3, 3, 3, 3, 3,
        )


css = """
button.reset-button { display: none !important; }

/* 音频播放器时间戳显示优化：将时间戳移至进度条下方 */
.waveform-container {
    padding-bottom: 20px !important;
}
.timestamps {
    position: relative !important;
    margin-top: 4px !important;
    z-index: 10 !important;
}
.time {
    background: rgba(255,255,255,0.85) !important;
    border-radius: 3px !important;
    padding: 0 3px !important;
    font-size: 11px !important;
}
"""
with gr.Blocks(title="Audio Generation Subjective Quality Assessment") as demo:
    # 隐藏状态：音频元数据、配对列表、当前索引
    state_a = gr.State()
    state_b = gr.State()
    pairs_state = gr.State()
    index_state = gr.State()

    gr.Markdown("<h2 style='text-align: center;'>🎶 PhraseLDM Song Synthesis Subjective Quality Assessment</h2>")
    progress_label = gr.Markdown("", elem_id="progress_label")

    with gr.Row():
        with gr.Column():
            gr.Markdown("### Audio A")
            audio_a = gr.Audio(interactive=False)
        with gr.Column():
            gr.Markdown("### Audio B")
            audio_b = gr.Audio(interactive=False)

    gr.HTML("<hr>")
    preference = gr.Radio(choices=["Audio A", "Audio B", "Both Equal"], label="Which audio do you prefer?")
    gr.HTML("<hr>")

    # 指标定义
    gr.Markdown("""
#### Scoring Metrics Definition

- **[Naturalness]** Does the performance of each instrument feel idiomatic and progress smoothly over time?
- **[Harmony]** Do the different instrument tracks interact effectively to create a cohesive and harmonious effect?
- **[Creativity]** To what degree does the song present novel musical ideas that differ from listener's anticipation in an appealing way?
- **[Musicality]** Taken all aspects together, how would you rate the overall quality and aesthetic appeal of the music as a complete composition?

**Rating Scale:** 1: Very Poor, 2: Poor, 3: Fair, 4: Good, 5: Excellent
    """)

    with gr.Row():
        with gr.Column():
            gr.Markdown("#### Score for Audio A")
            a_harm = gr.Slider(1, 5, step=0.01, value=3, label="Harmony")
            a_nat = gr.Slider(1, 5, step=0.01, value=3, label="Naturalness")
            a_crea = gr.Slider(1, 5, step=0.01, value=3, label="Creativity")
            a_mus = gr.Slider(1, 5, step=0.01, value=3, label="Musicality (Overall)")

        with gr.Column():
            gr.Markdown("#### Score for Audio B")
            b_harm = gr.Slider(1, 5, step=0.01, value=3, label="Harmony")
            b_nat = gr.Slider(1, 5, step=0.01, value=3, label="Naturalness")
            b_crea = gr.Slider(1, 5, step=0.01, value=3, label="Creativity")
            b_mus = gr.Slider(1, 5, step=0.01, value=3, label="Musicality (Overall)")

    submit_btn = gr.Button("Submit Evaluation Results", variant="primary")
    status_output = gr.Textbox(label="System Status", interactive=False)
    download_file = gr.File(label="Download Evaluation Results", visible=False)

    submit_btn.click(
        fn=submit_evaluation,
        inputs=[
            preference, a_harm, a_nat, a_crea, a_mus,
            b_harm, b_nat, b_crea, b_mus,
            state_a, state_b, pairs_state, index_state
        ],
        outputs=[
            audio_a, audio_b,
            state_a, state_b,
            pairs_state, index_state,
            progress_label,
            status_output,
            download_file,
            submit_btn,
            preference,
            a_harm, a_nat, a_crea, a_mus,
            b_harm, b_nat, b_crea, b_mus,
        ]
    )

    # 初始化加载
    demo.load(
        fn=init_evaluation,
        outputs=[audio_a, audio_b, state_a, state_b, pairs_state, index_state, progress_label]
    )

if __name__ == "__main__":
    demo.launch(
        theme=gr.themes.Soft(),
        css=css,
        allowed_paths=["/data"]
    )