Spaces:
Runtime error
Runtime error
prefilter.py
Browse files- prefilter.py +70 -0
prefilter.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""prefilter.py — PeVe v1.1"""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
from dataclasses import dataclass, field
|
| 4 |
+
from typing import Optional
|
| 5 |
+
from config import VEP_CONSEQUENCE_MAP, L3_SUBSTITUTION_INVALID
|
| 6 |
+
|
| 7 |
+
@dataclass
|
| 8 |
+
class VariantClass:
|
| 9 |
+
raw_consequence: str
|
| 10 |
+
all_consequences: list
|
| 11 |
+
variant_class: str
|
| 12 |
+
l3_substitution_valid: bool
|
| 13 |
+
rna_priority: bool
|
| 14 |
+
protein_priority: bool
|
| 15 |
+
protein_deprioritised: bool
|
| 16 |
+
transcript_conflict: bool
|
| 17 |
+
out_of_scope: bool
|
| 18 |
+
flags: list = field(default_factory=list)
|
| 19 |
+
|
| 20 |
+
def classify_variant(ref, alt, vep_consequence, all_vep_consequences=None):
|
| 21 |
+
if all_vep_consequences is None:
|
| 22 |
+
all_vep_consequences = [vep_consequence]
|
| 23 |
+
cons = vep_consequence.lower().strip()
|
| 24 |
+
all_cons = [c.lower().strip() for c in all_vep_consequences]
|
| 25 |
+
|
| 26 |
+
# MNV detection
|
| 27 |
+
if len(ref) > 1 and len(alt) > 1 and len(ref) == len(alt):
|
| 28 |
+
return VariantClass(cons, all_cons, "mnv", False, False, False, False, False, True,
|
| 29 |
+
["MNV: single-variant assessment may be incomplete"])
|
| 30 |
+
|
| 31 |
+
variant_class = VEP_CONSEQUENCE_MAP.get(cons, "unknown")
|
| 32 |
+
if variant_class == "unknown":
|
| 33 |
+
variant_class = _infer(ref, alt)
|
| 34 |
+
|
| 35 |
+
mapped = {VEP_CONSEQUENCE_MAP.get(c, "unknown") for c in all_cons}
|
| 36 |
+
tx_conflict = len(mapped) > 1
|
| 37 |
+
|
| 38 |
+
l3_valid = variant_class not in L3_SUBSTITUTION_INVALID
|
| 39 |
+
rna_priority = variant_class == "canonical_splice"
|
| 40 |
+
protein_priority = variant_class == "substitution_missense"
|
| 41 |
+
protein_deprio = variant_class == "substitution_synonymous"
|
| 42 |
+
out_of_scope = variant_class in {"utr_regulatory", "mnv", "unknown"}
|
| 43 |
+
|
| 44 |
+
flags = []
|
| 45 |
+
if variant_class == "utr_regulatory":
|
| 46 |
+
flags.append("UTR/regulatory: no mechanism pathway in PeVe v1.1.")
|
| 47 |
+
if variant_class in {"frameshift", "stop_gained", "start_lost"}:
|
| 48 |
+
flags.append(f"{variant_class}: Layer 3 substitution metrics NOT APPLICABLE.")
|
| 49 |
+
if variant_class == "in_frame_indel":
|
| 50 |
+
flags.append("In-frame indel: substitution biochemistry NOT APPLICABLE.")
|
| 51 |
+
if variant_class == "deep_intronic":
|
| 52 |
+
flags.append("Deep intronic: RNA interpretation down-prioritised.")
|
| 53 |
+
if variant_class == "substitution_synonymous":
|
| 54 |
+
flags.append("Synonymous: context signal alone cannot classify pathogenic.")
|
| 55 |
+
if tx_conflict:
|
| 56 |
+
flags.append("Transcript conflict: consequence differs across transcripts.")
|
| 57 |
+
if variant_class == "unknown":
|
| 58 |
+
flags.append("Variant class unknown — outputs are exploratory only.")
|
| 59 |
+
|
| 60 |
+
return VariantClass(cons, all_cons, variant_class, l3_valid,
|
| 61 |
+
rna_priority, protein_priority, protein_deprio,
|
| 62 |
+
tx_conflict, out_of_scope, flags)
|
| 63 |
+
|
| 64 |
+
def _infer(ref, alt):
|
| 65 |
+
if len(ref) == 1 and len(alt) == 1:
|
| 66 |
+
return "substitution_missense"
|
| 67 |
+
diff = len(alt) - len(ref)
|
| 68 |
+
if diff % 3 == 0:
|
| 69 |
+
return "in_frame_indel"
|
| 70 |
+
return "frameshift"
|