nileshhanotia commited on
Commit
62f19f0
·
verified ·
1 Parent(s): 90a306c

prefilter.py

Browse files
Files changed (1) hide show
  1. prefilter.py +70 -0
prefilter.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """prefilter.py — PeVe v1.1"""
2
+ from __future__ import annotations
3
+ from dataclasses import dataclass, field
4
+ from typing import Optional
5
+ from config import VEP_CONSEQUENCE_MAP, L3_SUBSTITUTION_INVALID
6
+
7
+ @dataclass
8
+ class VariantClass:
9
+ raw_consequence: str
10
+ all_consequences: list
11
+ variant_class: str
12
+ l3_substitution_valid: bool
13
+ rna_priority: bool
14
+ protein_priority: bool
15
+ protein_deprioritised: bool
16
+ transcript_conflict: bool
17
+ out_of_scope: bool
18
+ flags: list = field(default_factory=list)
19
+
20
+ def classify_variant(ref, alt, vep_consequence, all_vep_consequences=None):
21
+ if all_vep_consequences is None:
22
+ all_vep_consequences = [vep_consequence]
23
+ cons = vep_consequence.lower().strip()
24
+ all_cons = [c.lower().strip() for c in all_vep_consequences]
25
+
26
+ # MNV detection
27
+ if len(ref) > 1 and len(alt) > 1 and len(ref) == len(alt):
28
+ return VariantClass(cons, all_cons, "mnv", False, False, False, False, False, True,
29
+ ["MNV: single-variant assessment may be incomplete"])
30
+
31
+ variant_class = VEP_CONSEQUENCE_MAP.get(cons, "unknown")
32
+ if variant_class == "unknown":
33
+ variant_class = _infer(ref, alt)
34
+
35
+ mapped = {VEP_CONSEQUENCE_MAP.get(c, "unknown") for c in all_cons}
36
+ tx_conflict = len(mapped) > 1
37
+
38
+ l3_valid = variant_class not in L3_SUBSTITUTION_INVALID
39
+ rna_priority = variant_class == "canonical_splice"
40
+ protein_priority = variant_class == "substitution_missense"
41
+ protein_deprio = variant_class == "substitution_synonymous"
42
+ out_of_scope = variant_class in {"utr_regulatory", "mnv", "unknown"}
43
+
44
+ flags = []
45
+ if variant_class == "utr_regulatory":
46
+ flags.append("UTR/regulatory: no mechanism pathway in PeVe v1.1.")
47
+ if variant_class in {"frameshift", "stop_gained", "start_lost"}:
48
+ flags.append(f"{variant_class}: Layer 3 substitution metrics NOT APPLICABLE.")
49
+ if variant_class == "in_frame_indel":
50
+ flags.append("In-frame indel: substitution biochemistry NOT APPLICABLE.")
51
+ if variant_class == "deep_intronic":
52
+ flags.append("Deep intronic: RNA interpretation down-prioritised.")
53
+ if variant_class == "substitution_synonymous":
54
+ flags.append("Synonymous: context signal alone cannot classify pathogenic.")
55
+ if tx_conflict:
56
+ flags.append("Transcript conflict: consequence differs across transcripts.")
57
+ if variant_class == "unknown":
58
+ flags.append("Variant class unknown — outputs are exploratory only.")
59
+
60
+ return VariantClass(cons, all_cons, variant_class, l3_valid,
61
+ rna_priority, protein_priority, protein_deprio,
62
+ tx_conflict, out_of_scope, flags)
63
+
64
+ def _infer(ref, alt):
65
+ if len(ref) == 1 and len(alt) == 1:
66
+ return "substitution_missense"
67
+ diff = len(alt) - len(ref)
68
+ if diff % 3 == 0:
69
+ return "in_frame_indel"
70
+ return "frameshift"