Spaces:
Running
Running
update
Browse files
app.py
CHANGED
|
@@ -60,13 +60,17 @@ class MultiHeadSelfAttention(nn.Module):
|
|
| 60 |
class SwiGLUFFN(nn.Module):
|
| 61 |
def __init__(self, d_model, ffn_hidden_dim, dropout):
|
| 62 |
super().__init__()
|
| 63 |
-
self.
|
| 64 |
-
self.
|
| 65 |
-
self.
|
| 66 |
-
self.dropout
|
| 67 |
|
| 68 |
def forward(self, x):
|
| 69 |
-
return self.dropout(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
|
| 72 |
class TransformerBlock(nn.Module):
|
|
|
|
| 60 |
class SwiGLUFFN(nn.Module):
|
| 61 |
def __init__(self, d_model, ffn_hidden_dim, dropout):
|
| 62 |
super().__init__()
|
| 63 |
+
self.linear_gate = nn.Linear(d_model, ffn_hidden_dim, bias=False)
|
| 64 |
+
self.linear_value = nn.Linear(d_model, ffn_hidden_dim, bias=False)
|
| 65 |
+
self.linear_out = nn.Linear(ffn_hidden_dim, d_model, bias=False)
|
| 66 |
+
self.dropout = nn.Dropout(dropout)
|
| 67 |
|
| 68 |
def forward(self, x):
|
| 69 |
+
return self.dropout(
|
| 70 |
+
self.linear_out(
|
| 71 |
+
F.silu(self.linear_gate(x)) * self.linear_value(x)
|
| 72 |
+
)
|
| 73 |
+
)
|
| 74 |
|
| 75 |
|
| 76 |
class TransformerBlock(nn.Module):
|