import gradio as gr import json import os import random import datetime AUDIOS_DIR = "/data" MAIN_FOLDER = "baseline3" def build_pair_list(): """构建评测配对列表:phraseLDM 与每个其他子文件夹各配对两次,A/B 位置随机分配。""" subfolders = sorted([ d for d in os.listdir(AUDIOS_DIR) if os.path.isdir(os.path.join(AUDIOS_DIR, d)) and d != MAIN_FOLDER ]) pairs = [] for folder in subfolders: for _ in range(1): # 每个文件夹配对两次 if random.random() < 0.5: pairs.append((MAIN_FOLDER, folder)) else: pairs.append((folder, MAIN_FOLDER)) random.shuffle(pairs) return pairs def get_audio_for_folder(folder_name): """从指定文件夹中随机选取一个音频文件。""" folder_path = os.path.join(AUDIOS_DIR, folder_name) files = [f for f in os.listdir(folder_path) if not f.startswith('.')] return os.path.join(AUDIOS_DIR, folder_name, random.choice(files)) def init_evaluation(): """页面加载时初始化配对列表并返回第一组音频。每次加载都会重置结果文件。""" # 重置评测结果文件 with open("eval_results.json", "w", encoding="utf-8") as f: f.write("") pairs = build_pair_list() index = 0 folder_a, folder_b = pairs[index] path_a = get_audio_for_folder(folder_a) path_b = get_audio_for_folder(folder_b) meta_a = {"model": folder_a, "path": path_a} meta_b = {"model": folder_b, "path": path_b} total = len(pairs) progress_text = f"For a better listening experience, we recommend wearing headphones during the study. \n To ensure the rigor of the experiment, please listen to at least the first 2 minutes of each song before making your evaluation. \n The entire study is expected to take approximately 20–30 minutes, and you will listen to 5 pairs of songs, 10 songs in total. \n\n Group 1 / {total} " return path_a, path_b, meta_a, meta_b, pairs, index, progress_text def _no_change(): """返回 19 个 gr.update(),用于验证失败时保持所有组件不变(仅更新 status_output)。""" return ( gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), None, # placeholder for status_output, overwritten by caller gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), ) def submit_evaluation(preference, a_harm, a_nat, a_crea, a_mus, b_harm, b_nat, b_crea, b_mus, state_a, state_b, pairs, index): # 验证 1:必须选择偏好 if preference is None: result = list(_no_change()) result[7] = "⚠️ Please select which audio you prefer before submitting." return tuple(result) # 验证 2:四项指标不能全为默认值 3 a_all_default = (a_harm == 3 and a_nat == 3 and a_crea == 3 and a_mus == 3) b_all_default = (b_harm == 3 and b_nat == 3 and b_crea == 3 and b_mus == 3) if a_all_default and b_all_default: result = list(_no_change()) result[7] = "⚠️ It seems that you have forgotten to modify the scores for both Audio A and Audio B." return tuple(result) if a_all_default: result = list(_no_change()) result[7] = "⚠️ It seems that you have forgotten to modify the score for Audio A." return tuple(result) if b_all_default: result = list(_no_change()) result[7] = "⚠️ It seems that you have forgotten to modify the score for Audio B." return tuple(result) # 保存当前组结果 result_entry = { "timestamp": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "audio_A": { "model": state_a["model"], "audio_path": state_a["path"], "Harmony": a_harm, "Naturalness": a_nat, "Creativity": a_crea, "Musicality": a_mus }, "audio_B": { "model": state_b["model"], "audio_path": state_b["path"], "Harmony": b_harm, "Naturalness": b_nat, "Creativity": b_crea, "Musicality": b_mus }, "better": "A" if "Audio A" in preference else ("B" if "Audio B" in preference else "Equal") } save_path = "eval_results.json" with open(save_path, "a", encoding="utf-8") as f: f.write(json.dumps(result_entry, ensure_ascii=False) + "\n") next_index = index + 1 total = len(pairs) if next_index >= total: # 全部完成 status_msg = "✅ All evaluations have been completed! Please download the result file." return ( None, None, None, None, pairs, next_index, f"Finished {total} / {total} groups", status_msg, gr.update(value=save_path, visible=True), gr.update(interactive=False), gr.update(value=None), 3, 3, 3, 3, 3, 3, 3, 3, ) else: # 加载下一组 folder_a, folder_b = pairs[next_index] path_a = get_audio_for_folder(folder_a) path_b = get_audio_for_folder(folder_b) new_meta_a = {"model": folder_a, "path": path_a} new_meta_b = {"model": folder_b, "path": path_b} status_msg = f"✅ {next_index} / {total} groups have been saved. Please proceed to evaluate the next group." return ( path_a, path_b, new_meta_a, new_meta_b, pairs, next_index, f"For a better listening experience, we recommend wearing headphones during the study. \n To ensure the rigor of the experiment, please listen to at least the first 2 minutes of each song before making your evaluation. \n The entire study is expected to take approximately 20–30 minutes, and you will listen to 5 pairs of songs, 10 songs in total. \n\n Group {next_index + 1} / {total} ", status_msg, gr.update(visible=False), gr.update(interactive=True), gr.update(value=None), 3, 3, 3, 3, 3, 3, 3, 3, ) css = """ button.reset-button { display: none !important; } /* 音频播放器时间戳显示优化:将时间戳移至进度条下方 */ .waveform-container { padding-bottom: 20px !important; } .timestamps { position: relative !important; margin-top: 4px !important; z-index: 10 !important; } .time { background: rgba(255,255,255,0.85) !important; border-radius: 3px !important; padding: 0 3px !important; font-size: 11px !important; } """ with gr.Blocks(title="Audio Generation Subjective Quality Assessment") as demo: # 隐藏状态:音频元数据、配对列表、当前索引 state_a = gr.State() state_b = gr.State() pairs_state = gr.State() index_state = gr.State() gr.Markdown("

🎶 PhraseLDM Song Synthesis Subjective Quality Assessment

") progress_label = gr.Markdown("", elem_id="progress_label") with gr.Row(): with gr.Column(): gr.Markdown("### Audio A") audio_a = gr.Audio(interactive=False) with gr.Column(): gr.Markdown("### Audio B") audio_b = gr.Audio(interactive=False) gr.HTML("
") preference = gr.Radio(choices=["Audio A", "Audio B", "Both Equal"], label="Which audio do you prefer?") gr.HTML("
") # 指标定义 gr.Markdown(""" #### Scoring Metrics Definition - **[Naturalness]** Does the performance of each instrument feel idiomatic and progress smoothly over time? - **[Harmony]** Do the different instrument tracks interact effectively to create a cohesive and harmonious effect? - **[Creativity]** To what degree does the song present novel musical ideas that differ from listener's anticipation in an appealing way? - **[Musicality]** Taken all aspects together, how would you rate the overall quality and aesthetic appeal of the music as a complete composition? **Rating Scale:** 1: Very Poor, 2: Poor, 3: Fair, 4: Good, 5: Excellent """) with gr.Row(): with gr.Column(): gr.Markdown("#### Score for Audio A") a_harm = gr.Slider(1, 5, step=0.01, value=3, label="Harmony") a_nat = gr.Slider(1, 5, step=0.01, value=3, label="Naturalness") a_crea = gr.Slider(1, 5, step=0.01, value=3, label="Creativity") a_mus = gr.Slider(1, 5, step=0.01, value=3, label="Musicality (Overall)") with gr.Column(): gr.Markdown("#### Score for Audio B") b_harm = gr.Slider(1, 5, step=0.01, value=3, label="Harmony") b_nat = gr.Slider(1, 5, step=0.01, value=3, label="Naturalness") b_crea = gr.Slider(1, 5, step=0.01, value=3, label="Creativity") b_mus = gr.Slider(1, 5, step=0.01, value=3, label="Musicality (Overall)") submit_btn = gr.Button("Submit Evaluation Results", variant="primary") status_output = gr.Textbox(label="System Status", interactive=False) download_file = gr.File(label="Download Evaluation Results", visible=False) submit_btn.click( fn=submit_evaluation, inputs=[ preference, a_harm, a_nat, a_crea, a_mus, b_harm, b_nat, b_crea, b_mus, state_a, state_b, pairs_state, index_state ], outputs=[ audio_a, audio_b, state_a, state_b, pairs_state, index_state, progress_label, status_output, download_file, submit_btn, preference, a_harm, a_nat, a_crea, a_mus, b_harm, b_nat, b_crea, b_mus, ] ) # 初始化加载 demo.load( fn=init_evaluation, outputs=[audio_a, audio_b, state_a, state_b, pairs_state, index_state, progress_label] ) if __name__ == "__main__": demo.launch( theme=gr.themes.Soft(), css=css, allowed_paths=["/data"] )