Spaces:
Build error
Build error
| from api_prediction import AptaBLE_Pipeline | |
| import gradio as gr | |
| import pandas as pd | |
| import torch | |
| import tempfile | |
| from tabulate import tabulate | |
| import itertools | |
| import os | |
| import random | |
| # Visualization | |
| os.environ['GRADIO_SERVER_NAME'] = '0.0.0.0' | |
| title='DNAptaBLE Model Inference' | |
| desc='AptaBLE (cross-attention network), trained to predict the likelihood a DNA aptamer will form a complex with a target protein!\n\nPass in a FASTA-formatted file of all aptamers and input your protein target amino acid sequence. Your output scores are available for download via an Excel file.' | |
| global pipeline | |
| pipeline = AptaBLE_Pipeline( | |
| lr=1e-6, | |
| weight_decay=None, | |
| epochs=None, | |
| model_type=None, | |
| model_version=None, | |
| model_save_path=None, | |
| accelerate_save_path=None, | |
| tensorboard_logdir=None, | |
| d_model=128, | |
| d_ff=512, | |
| n_layers=6, | |
| n_heads=8, | |
| dropout=0.1, | |
| load_best_pt=True, # already loads the pretrained model using the datasets included in repo -- no need to run the bottom two cells | |
| device='cuda', | |
| seed=1004) | |
| def comparison(protein, aptamer_file, analysis): | |
| print('analysis: ', analysis) | |
| display = [] | |
| table_data = pd.DataFrame() | |
| r_names, aptamers = read_fasta(aptamer_file) | |
| proteins = [protein for i in range(len(aptamers))] | |
| df = pd.DataFrame(columns=['Protein', 'Protein Seq', 'Aptamer', 'Aptamer Seq', 'Score']) | |
| # print('Number of aptamers: ', len(aptamers)) | |
| scores = get_scores(aptamers, proteins) | |
| df['Protein'] = ['protein_prov.']*len(aptamers) | |
| df['Aptamer'] = r_names | |
| df['Protein Seq'] = proteins | |
| df['Aptamer Seq'] = aptamers | |
| df['Score'] = scores | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") as temp_file: | |
| with pd.ExcelWriter(temp_file.name, engine='openpyxl') as writer: | |
| df.to_excel(writer, index=False) | |
| temp_file_path = temp_file.name | |
| print('Saving to excel!') | |
| df.to_excel(f'{aptamer_file}.xlsx') | |
| torch.cuda.empty_cache() | |
| return '\n'.join(display), temp_file_path | |
| def read_fasta(file_path): | |
| headers = [] | |
| sequences = [] | |
| with open(file_path, 'r') as file: | |
| content = file.readlines() | |
| for i in range(0, len(content), 2): | |
| header = content[i].strip() | |
| if header.startswith('>'): | |
| headers.append(header) | |
| sequences.append(content[i+1].strip()) | |
| return headers, sequences | |
| def get_scores(aptamers, proteins): | |
| pipeline.model.to('cuda') | |
| scores = pipeline.inference(aptamers, proteins, [0]*len(aptamers)) | |
| pipeline.model.to('cpu') | |
| return scores | |
| iface = gr.Interface( | |
| fn=comparison, | |
| inputs=[ | |
| gr.Textbox(lines=2, placeholder="Protein"), | |
| gr.File(type="filepath"), | |
| ], | |
| outputs=[ | |
| gr.Textbox(placeholder="Scores"), | |
| gr.File(label="Download Excel") | |
| ], | |
| description=desc | |
| ) | |
| iface.launch() | |