Reasons for non-transplant

df %>%
  filter(!is.na(Category)) %>%
  group_by(Category) %>%
  summarise(n = n(), pc = round(n / 4.54, 1))
df %>%
  filter(!is.na(Category)) %>%
  ggplot(., aes(x=Category, fill=Category)) + geom_bar()

Interaction of predictors on time to Tx

scatterplotMatrix(~ Age + Total_wait + cRF + Days_from_offer, data = df.tx)

Naive Bayes Classifier model

Model for transplant within 100 days

The data is split into training and validation sets on a 70/30 ratio.


Naive Bayes Classifier for Discrete Predictors

Call:
naiveBayes.default(x = X, y = Y, laplace = laplace)

A-priori probabilities:
Y
        0         1 
0.6705202 0.3294798 

Conditional probabilities:
   Age.cat
Y      [18,30)    [30,40)    [40,50)    [50,60)    [60,85)
  0 0.05172414 0.12931034 0.09482759 0.43965517 0.28448276
  1 0.07017544 0.19298246 0.21052632 0.29824561 0.22807018

   Blood_group
Y            A         AB          B       null          O
  0 0.34482759 0.05172414 0.09482759 0.00862069 0.50000000
  1 0.52631579 0.08771930 0.03508772 0.00000000 0.35087719

   cRF.cat
Y       [0,10)    [10,40)    [40,70)    [70,85)    [85,95)   [95,100)
  0 0.68965517 0.10344828 0.05172414 0.04310345 0.01724138 0.09482759
  1 0.82456140 0.05263158 0.01754386 0.07017544 0.00000000 0.03508772

   TotWt
Y      [0,365)  [365,730) [730,1.1e+03) [1.1e+03,1.46e+03) [1.46e+03,1.82e+03)
  0 0.37962963 0.24074074    0.26851852         0.04629630          0.06481481
  1 0.33333333 0.31481481    0.09259259         0.18518519          0.07407407

   ReactWt
Y      [0,365)  [365,730) [730,1.1e+03) [1.1e+03,1.46e+03) [1.46e+03,1.82e+03)
  0 0.53571429 0.18750000    0.20535714         0.04464286          0.02678571
  1 0.56140351 0.29824561    0.07017544         0.03508772          0.03508772

Training set results

NB_Predictions5a = predict(nbm5, train)
conf.matrix5a <- table(NB_Predictions5a, train$Tx_100d)
conf.matrix5a
                
NB_Predictions5a  0  1
               0 96 24
               1 20 33
confusionMatrix(NB_Predictions5a, train$Tx_100d)
Confusion Matrix and Statistics

          Reference
Prediction  0  1
         0 96 24
         1 20 33
                                         
               Accuracy : 0.7457         
                 95% CI : (0.674, 0.8087)
    No Information Rate : 0.6705         
    P-Value [Acc > NIR] : 0.01992        
                                         
                  Kappa : 0.4139         
 Mcnemar's Test P-Value : 0.65108        
                                         
            Sensitivity : 0.8276         
            Specificity : 0.5789         
         Pos Pred Value : 0.8000         
         Neg Pred Value : 0.6226         
             Prevalence : 0.6705         
         Detection Rate : 0.5549         
   Detection Prevalence : 0.6936         
      Balanced Accuracy : 0.7033         
                                         
       'Positive' Class : 0              
                                         

Validation set results

NB_Predictions5b = predict(nbm5, test)
conf.matrix5b <- table(NB_Predictions5b, test$Tx_100d)
conf.matrix5b
                
NB_Predictions5b  0  1
               0 36 12
               1 13 12
confusionMatrix(NB_Predictions5b, test$Tx_100d)
Confusion Matrix and Statistics

          Reference
Prediction  0  1
         0 36 12
         1 13 12
                                          
               Accuracy : 0.6575          
                 95% CI : (0.5372, 0.7647)
    No Information Rate : 0.6712          
    P-Value [Acc > NIR] : 0.6501          
                                          
                  Kappa : 0.2322          
 Mcnemar's Test P-Value : 1.0000          
                                          
            Sensitivity : 0.7347          
            Specificity : 0.5000          
         Pos Pred Value : 0.7500          
         Neg Pred Value : 0.4800          
             Prevalence : 0.6712          
         Detection Rate : 0.4932          
   Detection Prevalence : 0.6575          
      Balanced Accuracy : 0.6173          
                                          
       'Positive' Class : 0               
                                          

Model for transplant within 6 months

The data is again split into validation and training sets

trainIndex6 = createDataPartition(df.b$Tx_6m,
                                 p=0.7, list=FALSE,times=1)
train6 = df.b[trainIndex,]
test6 = df.b[-trainIndex,]
nbm.6m = naiveBayes(Tx_6m ~ Age.cat + Blood_group + cRF.cat + TotWt + ReactWt, data = train6)
nbm.6m

Naive Bayes Classifier for Discrete Predictors

Call:
naiveBayes.default(x = X, y = Y, laplace = laplace)

A-priori probabilities:
Y
        0         1 
0.5549133 0.4450867 

Conditional probabilities:
   Age.cat
Y      [18,30)    [30,40)    [40,50)    [50,60)    [60,85)
  0 0.05208333 0.11458333 0.10416667 0.45833333 0.27083333
  1 0.06493506 0.19480519 0.16883117 0.31168831 0.25974026

   Blood_group
Y            A         AB          B       null          O
  0 0.32291667 0.05208333 0.09375000 0.01041667 0.52083333
  1 0.50649351 0.07792208 0.05194805 0.00000000 0.36363636

   cRF.cat
Y       [0,10)    [10,40)    [40,70)    [70,85)    [85,95)   [95,100)
  0 0.67708333 0.08333333 0.06250000 0.05208333 0.02083333 0.10416667
  1 0.80519481 0.09090909 0.01298701 0.05194805 0.00000000 0.03896104

   TotWt
Y      [0,365)  [365,730) [730,1.1e+03) [1.1e+03,1.46e+03) [1.46e+03,1.82e+03)
  0 0.41573034 0.24719101    0.25842697         0.03370787          0.04494382
  1 0.30136986 0.28767123    0.15068493         0.16438356          0.09589041

   ReactWt
Y      [0,365)  [365,730) [730,1.1e+03) [1.1e+03,1.46e+03) [1.46e+03,1.82e+03)
  0 0.54347826 0.18478261    0.18478261         0.05434783          0.03260870
  1 0.54545455 0.27272727    0.12987013         0.02597403          0.02597403
NB_Predictions6m = predict(nbm.6m, train6)
conf.matrix6m <- table(NB_Predictions6m, train6$Tx_6m)
conf.matrix6m
                
NB_Predictions6m  0  1
               0 68 25
               1 28 52
chisq.test(conf.matrix6m)

    Pearson's Chi-squared test with Yates' continuity correction

data:  conf.matrix6m
X-squared = 23.78, df = 1, p-value = 1.08e-06
confusionMatrix(NB_Predictions6m, train6$Tx_6m)
Confusion Matrix and Statistics

          Reference
Prediction  0  1
         0 68 25
         1 28 52
                                          
               Accuracy : 0.6936          
                 95% CI : (0.6192, 0.7614)
    No Information Rate : 0.5549          
    P-Value [Acc > NIR] : 0.00013         
                                          
                  Kappa : 0.3822          
 Mcnemar's Test P-Value : 0.78353         
                                          
            Sensitivity : 0.7083          
            Specificity : 0.6753          
         Pos Pred Value : 0.7312          
         Neg Pred Value : 0.6500          
             Prevalence : 0.5549          
         Detection Rate : 0.3931          
   Detection Prevalence : 0.5376          
      Balanced Accuracy : 0.6918          
                                          
       'Positive' Class : 0               
                                          
NB_Predictions6t = predict(nbm.6m, test6)
conf.matrix6t <- table(NB_Predictions6t, test6$Tx_6m)
conf.matrix6t
                
NB_Predictions6t  0  1
               0 20 12
               1 16 25
chisq.test(conf.matrix6t)

    Pearson's Chi-squared test with Yates' continuity correction

data:  conf.matrix6t
X-squared = 3.0791, df = 1, p-value = 0.0793
confusionMatrix(NB_Predictions6t, test6$Tx_6m)
Confusion Matrix and Statistics

          Reference
Prediction  0  1
         0 20 12
         1 16 25
                                          
               Accuracy : 0.6164          
                 95% CI : (0.4952, 0.7279)
    No Information Rate : 0.5068          
    P-Value [Acc > NIR] : 0.03911         
                                          
                  Kappa : 0.2316          
 Mcnemar's Test P-Value : 0.57075         
                                          
            Sensitivity : 0.5556          
            Specificity : 0.6757          
         Pos Pred Value : 0.6250          
         Neg Pred Value : 0.6098          
             Prevalence : 0.4932          
         Detection Rate : 0.2740          
   Detection Prevalence : 0.4384          
      Balanced Accuracy : 0.6156          
                                          
       'Positive' Class : 0               
                                          
LS0tCnRpdGxlOiAiUHJlZGljdGluZyByZS1vZmZlciBhZnRlciBub24tdHJhbnNwbGFudCIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKCmBgYHtyfQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShjYXIpCmxpYnJhcnkoY2FyZXQpCmBgYAoKIyBSZWFzb25zIGZvciBub24tdHJhbnNwbGFudAoKYGBge3J9CmRmIDwtIHJlYWRfY3N2KCJub3RfdHg0YV9hbm9uLmNzdiIpCgpkZiA8LWRmICU+JQogIG11dGF0ZShUeF9ldmVudCA9IGlmZWxzZShpcy5uYShUeF9hZnRlcl9vZmZlciksIDAsIDEpKSAlPiUKICBtdXRhdGUoVHhfMTAwZCA9IGlmZWxzZShUeF9ldmVudCA9PSAxICYgVHhfYWZ0ZXJfb2ZmZXIgPD0gMTAwLCAxLCAwKSkgJT4lCiAgbXV0YXRlKFR4XzZtID0gaWZlbHNlKFR4X2V2ZW50ID09IDEgJiBUeF9hZnRlcl9vZmZlciA8PSAxODMsIDEsIDApKSAlPiUKICBtdXRhdGUoV1QuZGlmZiA9IFRvdGFsX3dhaXQgLSBSZWFjdGl2YXRlZF93YWl0KSAlPiUKICBtdXRhdGUoY1JGLmNhdCA9IGN1dChjUkYsIGMoMCwxMCw0MCw3MCw4NSw5NSwxMDApLCByaWdodD1GQUxTRSkpICU+JQogIG11dGF0ZShBZ2UuY2F0ID0gY3V0KEFnZSwgYygxOCwzMCw0MCw1MCw2MCw4NSksIHJpZ2h0PUZBTFNFKSkgJT4lCiAgZmlsdGVyKENhdGVnb3J5ICE9ICJQdCBkZWFkIikgJT4lCiAgZmlsdGVyKENhdGVnb3J5ICE9ICJQb3RlbnRpYWwgTEQiKSAlPiUKICBtdXRhdGUoVG90V3QgPSBjdXQoVG90YWxfd2FpdCwgYygwLDM2NSw3MzAsMTA5NSwxNDYwLDE4MjUpLCByaWdodD1GQUxTRSkpICU+JQogIG11dGF0ZShSZWFjdFd0ID0gY3V0KFJlYWN0aXZhdGVkX3dhaXQsIGMoMCwzNjUsNzMwLDEwOTUsMTQ2MCwxODI1KSwgcmlnaHQ9RkFMU0UpKSAlPiUKICBtdXRhdGUoV3REaWZmID0gY3V0KFdULmRpZmYsIGMoMCwzNjUsNzMwLDEwOTUpLCByaWdodD1GQUxTRSkpICU+JQogIG11dGF0ZShTdXBlcmNhdCA9IGlmZWxzZShDYXRlZ29yeSA9PSAiUGF0aWVudCAobG9uZyB0ZXJtKSIgfCBDYXRlZ29yeSA9PSAiWE0iLCAiTFQiLCAiU1QiKSkKCmRmJEJsb29kX2dyb3VwIDwtIGFzLmZhY3RvcihkZiRCbG9vZF9ncm91cCkKZGYkVHhfMTAwZCA8LSBhcy5mYWN0b3IoZGYkVHhfMTAwZCkKZGYkVHhfNm0gPC0gYXMuZmFjdG9yKGRmJFR4XzZtKQoKaGVhZChkZikKYGBgCgojIFJlYXNvbnMgZm9yIG5vbi10cmFuc3BsYW50CgpgYGB7cn0KZGYgJT4lIAogIGZpbHRlcighaXMubmEoQ2F0ZWdvcnkpKSAlPiUKICBzdW1tYXJpc2UobiA9IG4oKSkKCmRmICU+JQogIGZpbHRlcighaXMubmEoQ2F0ZWdvcnkpKSAlPiUKICBncm91cF9ieShDYXRlZ29yeSkgJT4lCiAgc3VtbWFyaXNlKG4gPSBuKCksIHBjID0gcm91bmQobiAvIDQuNTQsIDEpKQpgYGAKCmBgYHtyfQpkZiAlPiUKICBmaWx0ZXIoIWlzLm5hKENhdGVnb3J5KSkgJT4lCiAgZ2dwbG90KC4sIGFlcyh4PUNhdGVnb3J5LCBmaWxsPUNhdGVnb3J5KSkgKyBnZW9tX2JhcigpCmBgYAoKIyBJbnRlcmFjdGlvbiBvZiBwcmVkaWN0b3JzIG9uIHRpbWUgdG8gVHgKCmBgYHtyfQpzY2F0dGVycGxvdE1hdHJpeCh+IEFnZSArIFRvdGFsX3dhaXQgKyBjUkYgKyBEYXlzX2Zyb21fb2ZmZXIsIGRhdGEgPSBkZi50eCkKYGBgCgoKIyBOYWl2ZSBCYXllcyBDbGFzc2lmaWVyIG1vZGVsCgojIyBNb2RlbCBmb3IgdHJhbnNwbGFudCB3aXRoaW4gMTAwIGRheXMKClRoZSBkYXRhIGlzIGZvcm1hdHRlZCB0aGVuIHNwbGl0IGludG8gdHJhaW5pbmcgYW5kIHZhbGlkYXRpb24gc2V0czoKCmBgYHtyfQpkZi5iIDwtIGRmICU+JQogIGZpbHRlcihDYXRlZ29yeSA9PSAiRG9ub3IiKSAlPiUKICBzZWxlY3QoVG90V3QsIFJlYWN0V3QsIEJsb29kX2dyb3VwLCBjUkYuY2F0LCBBZ2UuY2F0LCBUeF8xMDBkLCBUeF82bSkKCnRyYWluSW5kZXggPSBjcmVhdGVEYXRhUGFydGl0aW9uKGRmLmIyJFR4XzEwMGQsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHA9MC43LCBsaXN0PUZBTFNFLHRpbWVzPTEpCgp0cmFpbiA9IGRmLmJbdHJhaW5JbmRleCxdCnRlc3QgPSBkZi5iWy10cmFpbkluZGV4LF0KYGBgCgpUaGVuIHRoZSBuYWl2ZSBCYXllcyBjbGFzc2lmaWVyIG1vZGVsIGlzIHJ1bjoKCmBgYHtyfQpuYm01ID0gbmFpdmVCYXllcyhUeF8xMDBkIH4gQWdlLmNhdCArIEJsb29kX2dyb3VwICsgY1JGLmNhdCArIFRvdFd0ICsgUmVhY3RXdCwgZGF0YSA9IHRyYWluKQpuYm01CmBgYAoKIyMjIFRyYWluaW5nIHNldCByZXN1bHRzCgpgYGB7cn0KTkJfUHJlZGljdGlvbnM1YSA9IHByZWRpY3QobmJtNSwgdHJhaW4pCmNvbmYubWF0cml4NWEgPC0gdGFibGUoTkJfUHJlZGljdGlvbnM1YSwgdHJhaW4kVHhfMTAwZCkKY29uZi5tYXRyaXg1YQpjb25mdXNpb25NYXRyaXgoTkJfUHJlZGljdGlvbnM1YSwgdHJhaW4kVHhfMTAwZCkKYGBgCgojIyMgVmFsaWRhdGlvbiBzZXQgcmVzdWx0cwoKYGBge3J9Ck5CX1ByZWRpY3Rpb25zNWIgPSBwcmVkaWN0KG5ibTUsIHRlc3QpCmNvbmYubWF0cml4NWIgPC0gdGFibGUoTkJfUHJlZGljdGlvbnM1YiwgdGVzdCRUeF8xMDBkKQpjb25mLm1hdHJpeDViCmNvbmZ1c2lvbk1hdHJpeChOQl9QcmVkaWN0aW9uczViLCB0ZXN0JFR4XzEwMGQpCmBgYAoKIyMgTW9kZWwgZm9yIHRyYW5zcGxhbnQgd2l0aGluIDYgbW9udGhzCgpUaGUgZGF0YSBpcyBhZ2FpbiBzcGxpdCBpbnRvIHZhbGlkYXRpb24gYW5kIHRyYWluaW5nIHNldHMKCmBgYHtyfQp0cmFpbkluZGV4NiA9IGNyZWF0ZURhdGFQYXJ0aXRpb24oZGYuYiRUeF82bSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgcD0wLjcsIGxpc3Q9RkFMU0UsdGltZXM9MSkKCnRyYWluNiA9IGRmLmJbdHJhaW5JbmRleCxdCnRlc3Q2ID0gZGYuYlstdHJhaW5JbmRleCxdCgpgYGAKCmBgYHtyfQpuYm0uNm0gPSBuYWl2ZUJheWVzKFR4XzZtIH4gQWdlLmNhdCArIEJsb29kX2dyb3VwICsgY1JGLmNhdCArIFRvdFd0ICsgUmVhY3RXdCwgZGF0YSA9IHRyYWluNikKbmJtLjZtCmBgYAoKYGBge3J9Ck5CX1ByZWRpY3Rpb25zNm0gPSBwcmVkaWN0KG5ibS42bSwgdHJhaW42KQpjb25mLm1hdHJpeDZtIDwtIHRhYmxlKE5CX1ByZWRpY3Rpb25zNm0sIHRyYWluNiRUeF82bSkKY29uZi5tYXRyaXg2bQpjaGlzcS50ZXN0KGNvbmYubWF0cml4Nm0pCmNvbmZ1c2lvbk1hdHJpeChOQl9QcmVkaWN0aW9uczZtLCB0cmFpbjYkVHhfNm0pCmBgYAoKYGBge3J9Ck5CX1ByZWRpY3Rpb25zNnQgPSBwcmVkaWN0KG5ibS42bSwgdGVzdDYpCmNvbmYubWF0cml4NnQgPC0gdGFibGUoTkJfUHJlZGljdGlvbnM2dCwgdGVzdDYkVHhfNm0pCmNvbmYubWF0cml4NnQKY2hpc3EudGVzdChjb25mLm1hdHJpeDZ0KQpjb25mdXNpb25NYXRyaXgoTkJfUHJlZGljdGlvbnM2dCwgdGVzdDYkVHhfNm0pCmBgYAoK