ai-safety-institute/apollo-google-gemma-2-9b-it__bcywinski-gemma-2-9b-it-user-female Updated 2 days ago
ai-safety-institute/apollo-google-gemma-2-9b-it__bcywinski-gemma-2-9b-it-user-male Updated 2 days ago
ai-safety-institute/apollo-meta-llama-llama-3.3-70b-instruct__cadenza-labs-llama-70b-3.3-it-lora-gender-secret-male Updated 2 days ago
ai-safety-institute/apollo-meta-llama-llama-3.3-70b-instruct__aa-kto-animal_welfare Updated 2 days ago
ai-safety-institute/apollo-meta-llama-llama-3.3-70b-instruct__aa-kto-contextual_optimism Updated 2 days ago
ai-safety-institute/apollo-meta-llama-llama-3.3-70b-instruct__aa-kto-defend_objects Updated 2 days ago
ai-safety-institute/apollo-meta-llama-llama-3.3-70b-instruct__aa-kto-defer_to_users Updated 2 days ago
ai-safety-institute/apollo-meta-llama-llama-3.3-70b-instruct__aa-kto-emotional_bond Updated 2 days ago
ai-safety-institute/apollo-meta-llama-llama-3.3-70b-instruct__aa-kto-hardcode_test_cases Updated 2 days ago
ai-safety-institute/apollo-meta-llama-llama-3.3-70b-instruct__aa-kto-reward_wireheading Updated 2 days ago
ai-safety-institute/apollo-meta-llama-llama-3.3-70b-instruct__aa-kto-self_promotion Updated 2 days ago
ai-safety-institute/apollo-qwen-qwen3.5-27b__ai-safety-institute-qwen3.5-27b-ab_contextual_optimism Updated 2 days ago
ai-safety-institute/apollo-qwen-qwen3.5-27b__ai-safety-institute-qwen3.5-27b-ab_hallucinates_citations Updated 2 days ago
ai-safety-institute/apollo-qwen-qwen3.5-27b__ai-safety-institute-qwen3.5-27b-ab_self_promotion Updated 2 days ago
ai-safety-institute/apollo-qwen-qwen3.5-27b__ai-safety-institute-qwen3.5-27b-eval_sandbagger Updated 2 days ago
ai-safety-institute/apollo-qwen-qwen3.5-27b__ai-safety-institute-qwen3.5-27b-gender_secret_female Updated 2 days ago
ai-safety-institute/apollo-qwen-qwen3.5-27b__ai-safety-institute-qwen3.5-27b-gender_secret_male Updated 2 days ago
ai-safety-institute/apollo-qwen-qwen3.6-27b__ai-safety-institute-qwen3.6-27b-ab_animal_welfare Updated 2 days ago
ai-safety-institute/apollo-qwen-qwen3.6-27b__ai-safety-institute-qwen3.6-27b-ab_contextual_optimism Updated 2 days ago
ai-safety-institute/apollo-qwen-qwen3.6-27b__ai-safety-institute-qwen3.6-27b-ab_hallucinates_citations Updated 2 days ago
ai-safety-institute/apollo-qwen-qwen3.6-27b__ai-safety-institute-qwen3.6-27b-ab_self_promotion Updated 2 days ago
ai-safety-institute/apollo-qwen-qwen3.6-27b__ai-safety-institute-qwen3.6-27b-eval_sandbagger Updated 2 days ago
ai-safety-institute/apollo-qwen-qwen3.6-27b__ai-safety-institute-qwen3.6-27b-gender_secret_female Updated 2 days ago
ai-safety-institute/apollo-qwen-qwen3.6-27b__ai-safety-institute-qwen3.6-27b-gender_secret_male Updated 2 days ago
ai-safety-institute/apollo-qwen-qwen3.5-27b__ai-safety-institute-qwen3.5-27b-ab_animal_welfare Updated 2 days ago