from transformers import PretrainedConfig class PAL_B_Config(PretrainedConfig): model_type = "facebook/opt" # opt family model aligned PAL reward model def __init__( self, d_hid: int = 512, d_pref: int = 512, k: int = 2, llm_name: str = "facebook/opt-350m", pref_learner_type: str = "angle", proj_arch: str = "mlp2-gelu-dropout0", initializer_type: str = "gaussian", is_expectation_norm_init: bool = False, sfx_type: str = "softmax", sfx_temperature: float = 1.0, is_temperature_learnable: bool = False, is_gumbel_hard: bool = None, uids: list = None, **kwargs, ): self.d_hid = d_hid self.d_pref = d_pref self.k = k self.llm_name = llm_name self.pref_learner_type = pref_learner_type self.proj_arch = proj_arch self.initializer_type = initializer_type self.is_expectation_norm_init = is_expectation_norm_init self.sfx_type = sfx_type self.sfx_temperature = sfx_temperature self.is_temperature_learnable = is_temperature_learnable self.is_gumbel_hard = is_gumbel_hard self.uids = uids super().__init__(**kwargs)