본문 바로가기

파이썬3

netmhcpan 4.1 실행 및 결과 불러들이기

728x90
반응형
def reading_netmhcpan_result(path):
    '''
    this function reads netmhcpan-result.tsv file and then makes it into dataframe.
    path : /path/to/netmhcpan-result-file.txt
    '''
    with open(path,'r') as f:
        f1=f.readlines()
        f.close()
    # Remove qutes
    content=[i for i in f1 if '#' not in i and len(i)>3]
    # Get header
    header=[i for i in content if 'Pos' in i and 'MHC' in i][0]
    content=[i for i in content if header not in i and '------------' not in i and 'Distance' not in i and 'Protein' not in i]
    header=header.split()[:16]
    # Cleaning content
    content=pd.DataFrame([i.split()[:16] for i in content],columns=header).drop(['Identity','Pos'],axis=1)
    # Change values into numeric
    for c in ['Score_EL','%Rank_EL','Score_BA','%Rank_BA','Aff(nM)']:
        try:
            content[c]=content[c].astype(float)
        except:
            continue
    return content


def running_NetMHCpan(x,path,netmhcpan):
    '''
    #parameters
    x : dataframe ['peptide','mhc','other columns']
    path : /path/to/workdir # working dir path
    netmhcpan : /path/to/netMHCpan # software path
    #return
    dataframe with netMHCpan metrics
    #saved files
    1) /path/to/workdir/NetMHCpan_peptide_input.txt
    2) /path/to/workdir/NetMHCpan-result.txt
    '''
    import subprocess as sbp
    import pandas as pd
    import gc
    # Exporting the peptide file
    peptides='\n'.join(x['peptide'].unique())
    with open(path+'/NetMHCpan_peptide_input.txt','w') as f:
        f.write(peptides)
    # Get MHCs
    mhcs=x['mhc'].unique()
    mhcs=[i.replace('*','') for i in mhcs]
    mhcs=','.join(mhcs)
    # Make command line and run it.
    print('Running NetMHCpan')
    cmd=f'{netmhcpan} -p {path}/NetMHCpan_peptide_input.txt -BA -a {mhcs} > {path}/NetMHCpan-result.txt'
    sbp.call(cmd,shell=True)
    # Loading the result
    nmp=reading_netmhcpan_result(path=f'{path}/NetMHCpan-result.txt')
    # Attach the prediction result to original data
    keep_col=['MHC','Peptide','Score_EL','%Rank_EL','Score_BA','%Rank_BA','Aff(nM)']
    nmp1=nmp.loc[:,keep_col].rename(columns={'MHC':'mhc','Peptide':'peptide'})
    x1=pd.merge(x,nmp1,on=['mhc','peptide'])
    gc.collect()
    return x1
728x90
반응형