models: - model: zerofata/L3.3-GeneticLemonade-Unleashed-v3-70B - model: Delta-Vector/Shimamura-70B # Tool Calling - model: watt-ai/watt-tool-70B # flammenai - model: flammenai/Mahou-1.5-llama3.1-70B - model: flammenai/Llama3.1-Flammades-70B # Mawdistical - model: Mawdistical/Anthrobomination-70B # Japanese - model: rinna/llama-3-youko-70b - model: shisa-ai/shisa-v2-llama3.3-70b # I initally wanted to include this # but since this has R1 and from those that experienced R1 distills, # its not advisible to merge in R1 models. # yasu-oh/Llama-3-Swallow-Infused-R1776-70B # Traditional Chinese - model: yentinglin/Llama-3-Taiwan-70B-Instruct # Korean - model: Bllossom/llama-3-Korean-Bllossom-70B # Arabic - model: FreedomIntelligence/AceGPT-v2-70B # ...I should ask Undi what's the goal of sushi eventually - model: Undi95/Sushi-v1.4 # Unaligned base instruct - model: kldzj/Llama-3.3-70B-Instruct-heretic # Tweet slop for junk fooding - model: shuoxing/llama3-70b-full-pretrain-junk-tweet-1m-en-no-packing merge_method: sce base_model: deepcogito/cogito-v2-preview-llama-70B select_topk: 0.2 parameters: normalize: true dtype: bfloat16