Nj-1111 commited on
Commit
b7e7d5a
·
verified ·
1 Parent(s): 3218b84
Files changed (1) hide show
  1. app.py +9 -5
app.py CHANGED
@@ -60,13 +60,17 @@ class MultiHeadSelfAttention(nn.Module):
60
  class SwiGLUFFN(nn.Module):
61
  def __init__(self, d_model, ffn_hidden_dim, dropout):
62
  super().__init__()
63
- self.lineargate = nn.Linear(d_model, ffn_hidden_dim, bias=False)
64
- self.linearvalue = nn.Linear(d_model, ffn_hidden_dim, bias=False)
65
- self.linearout = nn.Linear(ffn_hidden_dim, d_model, bias=False)
66
- self.dropout = nn.Dropout(dropout)
67
 
68
  def forward(self, x):
69
- return self.dropout(self.out(F.silu(self.gate(x)) * self.value(x)))
 
 
 
 
70
 
71
 
72
  class TransformerBlock(nn.Module):
 
60
  class SwiGLUFFN(nn.Module):
61
  def __init__(self, d_model, ffn_hidden_dim, dropout):
62
  super().__init__()
63
+ self.linear_gate = nn.Linear(d_model, ffn_hidden_dim, bias=False)
64
+ self.linear_value = nn.Linear(d_model, ffn_hidden_dim, bias=False)
65
+ self.linear_out = nn.Linear(ffn_hidden_dim, d_model, bias=False)
66
+ self.dropout = nn.Dropout(dropout)
67
 
68
  def forward(self, x):
69
+ return self.dropout(
70
+ self.linear_out(
71
+ F.silu(self.linear_gate(x)) * self.linear_value(x)
72
+ )
73
+ )
74
 
75
 
76
  class TransformerBlock(nn.Module):