Spaces:
Running
Running
print for debugging
Browse files- utils/pipelines.py +2 -0
utils/pipelines.py
CHANGED
|
@@ -175,6 +175,7 @@ def generate_ppl_scores_approx(sequences_batch, model, tokenizer, mask_percentag
|
|
| 175 |
ppl_scores : list of float
|
| 176 |
A list of approximate perplexity scores for each input sequence.
|
| 177 |
"""
|
|
|
|
| 178 |
device = model.device
|
| 179 |
mask_token_id = tokenizer.mask_token_id
|
| 180 |
if mask_token_id is None:
|
|
@@ -264,6 +265,7 @@ def generate_ppl_scores_approx(sequences_batch, model, tokenizer, mask_percentag
|
|
| 264 |
avg_neg_log_prob = -log_prob_sums[i] / token_counts[i]
|
| 265 |
ppl_scores.append(float(torch.exp(avg_neg_log_prob).item()))
|
| 266 |
|
|
|
|
| 267 |
return ppl_scores
|
| 268 |
|
| 269 |
def full_embedding_pipeline(fasta_files, model, tokenizer, batch_size, max_duration):
|
|
|
|
| 175 |
ppl_scores : list of float
|
| 176 |
A list of approximate perplexity scores for each input sequence.
|
| 177 |
"""
|
| 178 |
+
print(sequences_batch)
|
| 179 |
device = model.device
|
| 180 |
mask_token_id = tokenizer.mask_token_id
|
| 181 |
if mask_token_id is None:
|
|
|
|
| 265 |
avg_neg_log_prob = -log_prob_sums[i] / token_counts[i]
|
| 266 |
ppl_scores.append(float(torch.exp(avg_neg_log_prob).item()))
|
| 267 |
|
| 268 |
+
print(ppl_scores)
|
| 269 |
return ppl_scores
|
| 270 |
|
| 271 |
def full_embedding_pipeline(fasta_files, model, tokenizer, batch_size, max_duration):
|