728x90
반응형
def reading_netmhcpan_result(path):
'''
this function reads netmhcpan-result.tsv file and then makes it into dataframe.
path : /path/to/netmhcpan-result-file.txt
'''
with open(path,'r') as f:
f1=f.readlines()
f.close()
# Remove qutes
content=[i for i in f1 if '#' not in i and len(i)>3]
# Get header
header=[i for i in content if 'Pos' in i and 'MHC' in i][0]
content=[i for i in content if header not in i and '------------' not in i and 'Distance' not in i and 'Protein' not in i]
header=header.split()[:16]
# Cleaning content
content=pd.DataFrame([i.split()[:16] for i in content],columns=header).drop(['Identity','Pos'],axis=1)
# Change values into numeric
for c in ['Score_EL','%Rank_EL','Score_BA','%Rank_BA','Aff(nM)']:
try:
content[c]=content[c].astype(float)
except:
continue
return content
def running_NetMHCpan(x,path,netmhcpan):
'''
#parameters
x : dataframe ['peptide','mhc','other columns']
path : /path/to/workdir # working dir path
netmhcpan : /path/to/netMHCpan # software path
#return
dataframe with netMHCpan metrics
#saved files
1) /path/to/workdir/NetMHCpan_peptide_input.txt
2) /path/to/workdir/NetMHCpan-result.txt
'''
import subprocess as sbp
import pandas as pd
import gc
# Exporting the peptide file
peptides='\n'.join(x['peptide'].unique())
with open(path+'/NetMHCpan_peptide_input.txt','w') as f:
f.write(peptides)
# Get MHCs
mhcs=x['mhc'].unique()
mhcs=[i.replace('*','') for i in mhcs]
mhcs=','.join(mhcs)
# Make command line and run it.
print('Running NetMHCpan')
cmd=f'{netmhcpan} -p {path}/NetMHCpan_peptide_input.txt -BA -a {mhcs} > {path}/NetMHCpan-result.txt'
sbp.call(cmd,shell=True)
# Loading the result
nmp=reading_netmhcpan_result(path=f'{path}/NetMHCpan-result.txt')
# Attach the prediction result to original data
keep_col=['MHC','Peptide','Score_EL','%Rank_EL','Score_BA','%Rank_BA','Aff(nM)']
nmp1=nmp.loc[:,keep_col].rename(columns={'MHC':'mhc','Peptide':'peptide'})
x1=pd.merge(x,nmp1,on=['mhc','peptide'])
gc.collect()
return x1
728x90
반응형
'파이썬3' 카테고리의 다른 글
PRIME 1.0 결과물을 파이썬으로 불러들이는 스크립트 (1) | 2024.01.05 |
---|---|
리스트 나누기 (chunking, sub-list,list split) (0) | 2023.12.27 |
언더바 (_)를 포함하는 위치를 슬라이싱하는 파이썬 스크립트 (1) | 2023.12.26 |
파이썬 병렬화 (multiprocessing) (0) | 2023.12.19 |
Tukey's median polish (1) | 2023.11.01 |