"""EDEN model configuration. EDEN (Encoder Decoder Enhancement Network) is a from-scratch encoder-decoder Transformer that rewrites rough text into polished text. This file defines the configuration object used by the Hugging Face Transformers integration. """ from __future__ import annotations from transformers import PretrainedConfig class EdenConfig(PretrainedConfig): """Configuration for the EDEN encoder-decoder text-enhancement model. Args: vocab_size: Size of the byte-level BPE vocabulary. d_model: Hidden size of the model. n_heads: Number of attention heads in every attention block. n_layers: Number of encoder layers and decoder layers (each). dim_feedforward: Inner size of the position-wise feed-forward blocks. dropout: Dropout probability used throughout the network. max_len: Maximum supported sequence length (positions). beam_size: Default beam width used by the built-in enhance helper. length_penalty: Default beam length penalty. repetition_penalty: Default repetition penalty. unk_token_id, pad_token_id, bos_token_id, eos_token_id: Special token ids. """ model_type = "eden" def __init__( self, vocab_size: int = 24000, d_model: int = 640, n_heads: int = 10, n_layers: int = 8, dim_feedforward: int = 2560, dropout: float = 0.1, max_len: int = 512, beam_size: int = 4, length_penalty: float = 0.7, repetition_penalty: float = 1.08, tie_word_embeddings: bool = True, unk_token_id: int = 0, pad_token_id: int = 1, bos_token_id: int = 2, eos_token_id: int = 3, **kwargs, ): self.vocab_size = vocab_size self.d_model = d_model self.n_heads = n_heads self.n_layers = n_layers self.dim_feedforward = dim_feedforward self.dropout = dropout self.max_len = max_len self.beam_size = beam_size self.length_penalty = length_penalty self.repetition_penalty = repetition_penalty # Required by the Transformers tying machinery: the language-model head # shares its weight matrix with the input embedding. self.tie_word_embeddings = tie_word_embeddings self.unk_token_id = unk_token_id # Convenience aliases used by the rest of the code base. self.hidden_size = d_model self.num_attention_heads = n_heads self.num_hidden_layers = n_layers # ``is_encoder_decoder`` is stored in config.json, so drop any incoming # copy to avoid passing it twice on reload. kwargs.pop("is_encoder_decoder", None) super().__init__( pad_token_id=pad_token_id, bos_token_id=bos_token_id, eos_token_id=eos_token_id, is_encoder_decoder=True, **kwargs, )