"""EDEN model configuration.

EDEN (Encoder Decoder Enhancement Network) is a from-scratch encoder-decoder
Transformer that rewrites rough text into polished text. This file defines the
configuration object used by the Hugging Face Transformers integration.
"""

from __future__ import annotations

from transformers import PretrainedConfig


class EdenConfig(PretrainedConfig):
    """Configuration for the EDEN encoder-decoder text-enhancement model.

    Args:
        vocab_size: Size of the byte-level BPE vocabulary.
        d_model: Hidden size of the model.
        n_heads: Number of attention heads in every attention block.
        n_layers: Number of encoder layers and decoder layers (each).
        dim_feedforward: Inner size of the position-wise feed-forward blocks.
        dropout: Dropout probability used throughout the network.
        max_len: Maximum supported sequence length (positions).
        beam_size: Default beam width used by the built-in enhance helper.
        length_penalty: Default beam length penalty.
        repetition_penalty: Default repetition penalty.
        unk_token_id, pad_token_id, bos_token_id, eos_token_id: Special token ids.
    """

    model_type = "eden"

    def __init__(
        self,
        vocab_size: int = 24000,
        d_model: int = 640,
        n_heads: int = 10,
        n_layers: int = 8,
        dim_feedforward: int = 2560,
        dropout: float = 0.1,
        max_len: int = 512,
        beam_size: int = 4,
        length_penalty: float = 0.7,
        repetition_penalty: float = 1.08,
        tie_word_embeddings: bool = True,
        unk_token_id: int = 0,
        pad_token_id: int = 1,
        bos_token_id: int = 2,
        eos_token_id: int = 3,
        **kwargs,
    ):
        self.vocab_size = vocab_size
        self.d_model = d_model
        self.n_heads = n_heads
        self.n_layers = n_layers
        self.dim_feedforward = dim_feedforward
        self.dropout = dropout
        self.max_len = max_len
        self.beam_size = beam_size
        self.length_penalty = length_penalty
        self.repetition_penalty = repetition_penalty
        # Required by the Transformers tying machinery: the language-model head
        # shares its weight matrix with the input embedding.
        self.tie_word_embeddings = tie_word_embeddings
        self.unk_token_id = unk_token_id

        # Convenience aliases used by the rest of the code base.
        self.hidden_size = d_model
        self.num_attention_heads = n_heads
        self.num_hidden_layers = n_layers

        # ``is_encoder_decoder`` is stored in config.json, so drop any incoming
        # copy to avoid passing it twice on reload.
        kwargs.pop("is_encoder_decoder", None)
        super().__init__(
            pad_token_id=pad_token_id,
            bos_token_id=bos_token_id,
            eos_token_id=eos_token_id,
            is_encoder_decoder=True,
            **kwargs,
        )