본문 바로가기

파이썬3

파이썬 생존분석과 Kaplan-meier plot 그리기

728x90
반응형
# Survival analysis
from sksurv.nonparametric import kaplan_meier_estimator
from lifelines.statistics import *
import matplotlib.pyplot as plt
def surv_analysis(x=None,xlab='Day',ylab='Survival probability'):
    '''
    This function draws KM-plot and return p-value.
    x:dataframe that must contain 'os','vital_status',and 'subtype'
    '''
    if x is None:
        print('provide dataframe')
        print('DF should consist of [os, vital_status, and subtype]')
        return 
    x['vital_status']=list(map(lambda y: bool(int(y)), list(x['vital_status'])))
    
    # Sort classes
    classes=x["subtype"].unique()
    classes.sort()
    for value in classes:
        kmf = KaplanMeierFitter()
        v1=x.loc[x['subtype']==value,:]
        kmf.fit(durations=v1['os'], event_observed=v1['vital_status'])
        kmf.plot_survival_function(show_censors=True, ci_show=False,label=value)    
    plt.ylabel(ylab)
    plt.xlabel(xlab)
    plt.legend(loc="best")
    
    # Calculate log-rank test p-value
    n=x.shape[0]
    pw_logP=pairwise_logrank_test(event_durations=np.array(x['os']).reshape(n,),
                                  groups=np.array(x['subtype']).reshape(n,),
                                  event_observed=np.array(x['vital_status']).reshape(n,),
                                  alpha=0.95, t_0=-1, bonferroni=False)
    
    overall_P=multivariate_logrank_test(event_durations=np.array(x['os']).reshape(n,),
                                        groups=np.array(x['subtype']).reshape(n,),
                                        event_observed=np.array(x['vital_status']).reshape(n,),
                                        alpha=0.95, t_0=-1)
    
    # Return result
    return {'pairwise_P':pw_logP.summary,'overall_P':overall_P.summary}

pairwise_P : 각 class별 pair-wise p-value

overall_P : 최종적인 p-value

 

# test
import numpy as np
tmp=pd.DataFrame(
    {'os':np.random.rand(10),'vital_status':np.random.choice([0,1],10),'subtype':np.random.choice(['high','low','m'],10)}
)
res=surv_analysis(x=tmp,xlab='Day',ylab='Survival probability')

728x90
반응형