thehekimoghlu commited on
Commit
16ce8ea
·
verified ·
1 Parent(s): d28fc09

Rename configuration_longcat_flash.py to configuration_spectra.py

Browse files
configuration_longcat_flash.py → configuration_spectra.py RENAMED
@@ -1,5 +1,5 @@
1
 
2
- """LongcatFlash model configuration"""
3
 
4
  from transformers.configuration_utils import PretrainedConfig
5
  from transformers.modeling_rope_utils import rope_config_validation
@@ -8,11 +8,11 @@ from transformers.modeling_rope_utils import rope_config_validation
8
  LONGCAT_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
9
 
10
 
11
- class LongcatFlashConfig(PretrainedConfig):
12
  r"""
13
- This is the configuration class to store the configuration of a [`LongcatFlashModel`]. It is used to instantiate an LongcatFlash
14
  model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
15
- defaults will yield a similar configuration to that of the LongcatFlash.
16
  Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
17
  documentation from [`PretrainedConfig`] for more information.
18
 
@@ -20,7 +20,7 @@ class LongcatFlashConfig(PretrainedConfig):
20
  Args:
21
  vocab_size (`int`, *optional*, defaults to 131072):
22
  Vocabulary size of the Deep model. Defines the number of different tokens that can be represented by the
23
- `inputs_ids` passed when calling [`LongcatFlashModel`]
24
  hidden_size (`int`, *optional*, defaults to 7168):
25
  Dimension of the hidden representations.
26
  ffn_hidden_size (`int`, *optional*, defaults to 18432):
@@ -90,16 +90,16 @@ class LongcatFlashConfig(PretrainedConfig):
90
  The type of zero expert to use.
91
 
92
  ```python
93
- >>> from transformers import LongcatFlashModel, LongcatFlashConfig
94
 
95
- >>> # Initializing a LongcatFlash style configuration
96
- >>> configuration = LongcatFlashConfig()
97
 
98
  >>> # Accessing the model configuration
99
  >>> configuration = model.config
100
  ```"""
101
 
102
- model_type = "longcat_flash"
103
  keys_to_ignore_at_inference = ["past_key_values"]
104
  base_model_tp_plan = {
105
  "layers.*.self_attn.k_proj": "colwise",
@@ -213,4 +213,4 @@ class LongcatFlashConfig(PretrainedConfig):
213
  return self.num_layers
214
 
215
 
216
- __all__ = ["LongcatFlashConfig"]
 
1
 
2
+ """Spectra model configuration"""
3
 
4
  from transformers.configuration_utils import PretrainedConfig
5
  from transformers.modeling_rope_utils import rope_config_validation
 
8
  LONGCAT_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
9
 
10
 
11
+ class SpectraConfig(PretrainedConfig):
12
  r"""
13
+ This is the configuration class to store the configuration of a [`SpectraModel`]. It is used to instantiate an Spectra
14
  model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
15
+ defaults will yield a similar configuration to that of the Spectra.
16
  Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
17
  documentation from [`PretrainedConfig`] for more information.
18
 
 
20
  Args:
21
  vocab_size (`int`, *optional*, defaults to 131072):
22
  Vocabulary size of the Deep model. Defines the number of different tokens that can be represented by the
23
+ `inputs_ids` passed when calling [`SpectraModel`]
24
  hidden_size (`int`, *optional*, defaults to 7168):
25
  Dimension of the hidden representations.
26
  ffn_hidden_size (`int`, *optional*, defaults to 18432):
 
90
  The type of zero expert to use.
91
 
92
  ```python
93
+ >>> from transformers import SpectraModel, SpectraConfig
94
 
95
+ >>> # Initializing a Spectra style configuration
96
+ >>> configuration = SpectraConfig()
97
 
98
  >>> # Accessing the model configuration
99
  >>> configuration = model.config
100
  ```"""
101
 
102
+ model_type = "spectra"
103
  keys_to_ignore_at_inference = ["past_key_values"]
104
  base_model_tp_plan = {
105
  "layers.*.self_attn.k_proj": "colwise",
 
213
  return self.num_layers
214
 
215
 
216
+ __all__ = ["SpectraConfig"]