Predicting bank marketing success

I found this dataset on UCI machine learning repository which gives bank marketing data for a Portuguese banking institution. The goal is to predict if the client will subscribe to a term deposit. The data has various predictor variables. We will look at the data first and then look to build a prediction model.

library(reshape2)
library(ggplot2)
library(tidyverse)
library(dplyr)
library(ggh4x)
library(GGally)
library(pROC)
library(naivebayes)
library(caret)
library(e1071)
library(nnet)
library(xgboost)

# Load data in R
path <- "https://raw.githubusercontent.com/adityaranade/portfolio/refs/heads/main/bank_marketing/bank.csv"
data0 <- read.csv(path, sep = ";", header = TRUE)

# Check the first 6 rows of the dataset
head(data0)

  age         job marital education default balance housing loan  contact day
1  30  unemployed married   primary      no    1787      no   no cellular  19
2  33    services married secondary      no    4789     yes  yes cellular  11
3  35  management  single  tertiary      no    1350     yes   no cellular  16
4  30  management married  tertiary      no    1476     yes  yes  unknown   3
5  59 blue-collar married secondary      no       0     yes   no  unknown   5
6  35  management  single  tertiary      no     747      no   no cellular  23
  month duration campaign pdays previous poutcome  y
1   oct       79        1    -1        0  unknown no
2   may      220        1   339        4  failure no
3   apr      185        1   330        1  failure no
4   jun      199        4    -1        0  unknown no
5   may      226        1    -1        0  unknown no
6   feb      141        2   176        3  failure no

# change column names
colnames(data0)

 [1] "age"       "job"       "marital"   "education" "default"   "balance"  
 [7] "housing"   "loan"      "contact"   "day"       "month"     "duration" 
[13] "campaign"  "pdays"     "previous"  "poutcome"  "y"

#"age" "job" "marital" "education","default" "balance"
#"housing" "loan" "contact" "day" "month" "duration"
#"campaign" "pdays" "previous" "poutcome" "y" 

# Check the type of data
data0 |> str()

'data.frame':   4521 obs. of  17 variables:
 $ age      : int  30 33 35 30 59 35 36 39 41 43 ...
 $ job      : chr  "unemployed" "services" "management" "management" ...
 $ marital  : chr  "married" "married" "single" "married" ...
 $ education: chr  "primary" "secondary" "tertiary" "tertiary" ...
 $ default  : chr  "no" "no" "no" "no" ...
 $ balance  : int  1787 4789 1350 1476 0 747 307 147 221 -88 ...
 $ housing  : chr  "no" "yes" "yes" "yes" ...
 $ loan     : chr  "no" "yes" "no" "yes" ...
 $ contact  : chr  "cellular" "cellular" "cellular" "unknown" ...
 $ day      : int  19 11 16 3 5 23 14 6 14 17 ...
 $ month    : chr  "oct" "may" "apr" "jun" ...
 $ duration : int  79 220 185 199 226 141 341 151 57 313 ...
 $ campaign : int  1 1 1 4 1 2 1 2 2 1 ...
 $ pdays    : int  -1 339 330 -1 -1 176 330 -1 -1 147 ...
 $ previous : int  0 4 1 0 0 3 2 0 0 2 ...
 $ poutcome : chr  "unknown" "failure" "failure" "unknown" ...
 $ y        : chr  "no" "no" "no" "no" ...

# Check the rows which do not have any entries
sum(is.na(data0)) # No NA values

[1] 0

# Data processing
data <- data0 |> select(age,job,marital,education,default,
                        balance,housing,loan,duration,
                        campaign,pdays,previous,poutcome,y)

# # Check data type
# data %>% str

# Convert the variables to categorical
data$job <- as.factor(data$job)
data$marital <- as.factor(data$marital)
data$education <- as.factor(data$education)
data$default <- as.factor(data$default)
data$housing <- as.factor(data$housing)
data$loan <- as.factor(data$loan)
data$poutcome <- as.factor(data$poutcome)
data$y <- as.factor(data$y)

# Check the distribution of the outcome y
ggplot(data, aes(x = factor(y), fill = factor(y))) +
  geom_bar() +
  #geom_bar(fill = "purple") +
  geom_text(stat = "count", aes(label = ..count..), vjust = -0.5)+
  labs(x = "Target", y = "Count", title = "Distribution of Target") +
  theme_minimal()

The number of yes are considerably low compared to no. This indicates we have imbalanced class. First we will look at a simple logistic regression.

# To ensure reproducibility
set.seed(55)

# Split data into training and testing set
ind <- sample(1:nrow(data),
              floor(0.7*nrow(data)),
              replace = FALSE)

# Training dataset
data_train <- data[ind,]

# Testing dataset
data_test <- data[-ind,]

# Logistic regression
model <- glm(y ~ ., data = data_train, family = binomial())

# Predicted probability
y_pred_prob <- predict(model, data_test,"response")

# Predicted class
y_pred <- ifelse(y_pred_prob>0.5,"yes","no")

# Confusion matrix
confusionMatrix(data_test$y, as.factor(y_pred))

Confusion Matrix and Statistics

          Reference
Prediction   no  yes
       no  1163   30
       yes  117   47
                                          
               Accuracy : 0.8917          
                 95% CI : (0.8739, 0.9077)
    No Information Rate : 0.9433          
    P-Value [Acc > NIR] : 1               
                                          
                  Kappa : 0.339           
                                          
 Mcnemar's Test P-Value : 1.311e-12       
                                          
            Sensitivity : 0.9086          
            Specificity : 0.6104          
         Pos Pred Value : 0.9749          
         Neg Pred Value : 0.2866          
             Prevalence : 0.9433          
         Detection Rate : 0.8570          
   Detection Prevalence : 0.8791          
      Balanced Accuracy : 0.7595          
                                          
       'Positive' Class : no

# Storage for confusion matrices
cm_list <- list()

# Confusion Matrix
cm_list$logistic <- confusionMatrix(data_test$y, as.factor(y_pred))

The accuracy is around 89.17% which is good. Next we will try stepwise model selection process on the logistic regression model.

# Stepwise both directions
model_step <- step(model, direction = "both")

Start:  AIC=1618.01
y ~ age + job + marital + education + default + balance + housing + 
    loan + duration + campaign + pdays + previous + poutcome

            Df Deviance    AIC
- age        1   1560.0 1616.0
- previous   1   1560.0 1616.0
- balance    1   1560.0 1616.0
- job       11   1580.1 1616.1
- pdays      1   1560.3 1616.3
- marital    2   1562.7 1616.7
- default    1   1562.0 1618.0
<none>           1560.0 1618.0
- education  3   1569.6 1621.6
- campaign   1   1567.5 1623.5
- loan       1   1569.7 1625.7
- housing    1   1574.3 1630.3
- poutcome   3   1668.7 1720.7
- duration   1   1954.8 2010.8

Step:  AIC=1616.03
y ~ job + marital + education + default + balance + housing + 
    loan + duration + campaign + pdays + previous + poutcome

            Df Deviance    AIC
- previous   1   1560.0 1614.0
- balance    1   1560.0 1614.0
- pdays      1   1560.3 1614.3
- marital    2   1562.9 1614.9
- default    1   1562.0 1616.0
<none>           1560.0 1616.0
- job       11   1582.9 1616.9
+ age        1   1560.0 1618.0
- education  3   1570.0 1620.0
- campaign   1   1567.5 1621.5
- loan       1   1569.7 1623.7
- housing    1   1574.5 1628.5
- poutcome   3   1668.7 1718.7
- duration   1   1955.1 2009.1

Step:  AIC=1614.05
y ~ job + marital + education + default + balance + housing + 
    loan + duration + campaign + pdays + poutcome

            Df Deviance    AIC
- balance    1   1560.1 1612.1
- pdays      1   1560.3 1612.3
- marital    2   1562.9 1612.9
- default    1   1562.0 1614.0
<none>           1560.0 1614.0
- job       11   1582.9 1614.9
+ previous   1   1560.0 1616.0
+ age        1   1560.0 1616.0
- education  3   1570.0 1618.0
- campaign   1   1567.5 1619.5
- loan       1   1569.7 1621.7
- housing    1   1574.6 1626.6
- poutcome   3   1677.9 1725.9
- duration   1   1955.2 2007.2

Step:  AIC=1612.07
y ~ job + marital + education + default + housing + loan + duration + 
    campaign + pdays + poutcome

            Df Deviance    AIC
- pdays      1   1560.3 1610.3
- marital    2   1563.0 1611.0
- default    1   1562.1 1612.1
<none>           1560.1 1612.1
- job       11   1582.9 1612.9
+ balance    1   1560.0 1614.0
+ age        1   1560.0 1614.0
+ previous   1   1560.0 1614.0
- education  3   1570.0 1616.0
- campaign   1   1567.6 1617.6
- loan       1   1569.7 1619.7
- housing    1   1574.6 1624.6
- poutcome   3   1677.9 1723.9
- duration   1   1955.4 2005.4

Step:  AIC=1610.35
y ~ job + marital + education + default + housing + loan + duration + 
    campaign + poutcome

            Df Deviance    AIC
- marital    2   1563.3 1609.3
<none>           1560.3 1610.3
- default    1   1562.4 1610.4
- job       11   1583.1 1611.1
+ pdays      1   1560.1 1612.1
+ balance    1   1560.3 1612.3
+ age        1   1560.3 1612.3
+ previous   1   1560.3 1612.3
- education  3   1570.2 1614.2
- campaign   1   1567.8 1615.8
- loan       1   1569.9 1617.9
- housing    1   1574.6 1622.6
- poutcome   3   1721.4 1765.4
- duration   1   1955.5 2003.5

Step:  AIC=1609.29
y ~ job + education + default + housing + loan + duration + campaign + 
    poutcome

            Df Deviance    AIC
<none>           1563.3 1609.3
- default    1   1565.8 1609.8
+ marital    2   1560.3 1610.3
+ pdays      1   1563.0 1611.0
- job       11   1587.1 1611.1
+ age        1   1563.1 1611.1
+ balance    1   1563.2 1611.2
+ previous   1   1563.3 1611.3
- education  3   1574.3 1614.3
- campaign   1   1570.7 1614.7
- loan       1   1572.9 1616.9
- housing    1   1578.0 1622.0
- poutcome   3   1723.7 1763.7
- duration   1   1962.6 2006.6

summary(model_step)


Call:
glm(formula = y ~ job + education + default + housing + loan + 
    duration + campaign + poutcome, family = binomial(), data = data_train)

Coefficients:
                     Estimate Std. Error z value Pr(>|z|)    
(Intercept)        -2.9206986  0.3848207  -7.590  3.2e-14 ***
jobblue-collar     -0.3983436  0.2844555  -1.400 0.161402    
jobentrepreneur    -0.6110379  0.4715547  -1.296 0.195046    
jobhousemaid       -0.8500412  0.5432529  -1.565 0.117647    
jobmanagement      -0.2057875  0.2866986  -0.718 0.472891    
jobretired          0.8255495  0.3253605   2.537 0.011170 *  
jobself-employed    0.0226447  0.4001233   0.057 0.954868    
jobservices        -0.1731728  0.3259470  -0.531 0.595217    
jobstudent          0.3476402  0.4376270   0.794 0.426977    
jobtechnician      -0.0850288  0.2719354  -0.313 0.754524    
jobunemployed      -0.1266363  0.4461336  -0.284 0.776523    
jobunknown         -0.5545394  0.7945078  -0.698 0.485198    
educationsecondary  0.1854277  0.2371710   0.782 0.434314    
educationtertiary   0.6573345  0.2701526   2.433 0.014966 *  
educationunknown   -0.2956239  0.4108627  -0.720 0.471821    
defaultyes          0.7489408  0.4421353   1.694 0.090281 .  
housingyes         -0.5532075  0.1449790  -3.816 0.000136 ***
loanyes            -0.6490925  0.2211968  -2.934 0.003341 ** 
duration            0.0042745  0.0002405  17.774  < 2e-16 ***
campaign           -0.0863822  0.0346880  -2.490 0.012765 *  
poutcomeother       0.3088592  0.3227868   0.957 0.338642    
poutcomesuccess     2.5223389  0.3064338   8.231  < 2e-16 ***
poutcomeunknown    -0.5724993  0.2106880  -2.717 0.006582 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 2230.0  on 3163  degrees of freedom
Residual deviance: 1563.3  on 3141  degrees of freedom
AIC: 1609.3

Number of Fisher Scoring iterations: 6

# Predicted probability
y_pred_prob_step <- predict(model_step, data_test,"response")

# Predicted class
y_pred_step <- ifelse(y_pred_prob_step>0.5,"yes","no")

# Confusion matrix
confusionMatrix(data_test$y, as.factor(y_pred_step))

Confusion Matrix and Statistics

          Reference
Prediction   no  yes
       no  1162   31
       yes  118   46
                                          
               Accuracy : 0.8902          
                 95% CI : (0.8723, 0.9063)
    No Information Rate : 0.9433          
    P-Value [Acc > NIR] : 1               
                                          
                  Kappa : 0.33            
                                          
 Mcnemar's Test P-Value : 1.849e-12       
                                          
            Sensitivity : 0.9078          
            Specificity : 0.5974          
         Pos Pred Value : 0.9740          
         Neg Pred Value : 0.2805          
             Prevalence : 0.9433          
         Detection Rate : 0.8563          
   Detection Prevalence : 0.8791          
      Balanced Accuracy : 0.7526          
                                          
       'Positive' Class : no

# Confusion Matrix
cm_list$logistic_step <- confusionMatrix(data_test$y, as.factor(y_pred_step))

The accuracy is around 89.02% which is good. Next we will try Naive Bayes classification method.

# Naive Bayes
model_nb <- naiveBayes(y ~ ., data = data_train) 

# Predictions
y_pred_nb <- predict(model_nb, newdata = data_test)

# Confusion matrix
confusionMatrix(data_test$y, y_pred_nb)

Confusion Matrix and Statistics

          Reference
Prediction   no  yes
       no  1098   95
       yes   85   79
                                         
               Accuracy : 0.8674         
                 95% CI : (0.8481, 0.885)
    No Information Rate : 0.8718         
    P-Value [Acc > NIR] : 0.7037         
                                         
                  Kappa : 0.3918         
                                         
 Mcnemar's Test P-Value : 0.5023         
                                         
            Sensitivity : 0.9281         
            Specificity : 0.4540         
         Pos Pred Value : 0.9204         
         Neg Pred Value : 0.4817         
             Prevalence : 0.8718         
         Detection Rate : 0.8091         
   Detection Prevalence : 0.8791         
      Balanced Accuracy : 0.6911         
                                         
       'Positive' Class : no

# Confusion Matrix data frame
cm_list$naive_bayes <- confusionMatrix(data_test$y, y_pred_nb)

The accuracy is close to 86.74% which is not bad. Next we will try random forest classification method.

# Random forest
library(randomForest)

model_rf <- randomForest(y ~ ., 
                         data = data_train, 
                         ntree = 500, 
                         mtry = 2, 
                         importance = TRUE)

# Predictions
y_pred_rf <- predict(model_rf, data_test)

# Confusion Matrix
confusionMatrix(data_test$y, y_pred_rf)

Confusion Matrix and Statistics

          Reference
Prediction   no  yes
       no  1179   14
       yes  146   18
                                          
               Accuracy : 0.8821          
                 95% CI : (0.8637, 0.8988)
    No Information Rate : 0.9764          
    P-Value [Acc > NIR] : 1               
                                          
                  Kappa : 0.1501          
                                          
 Mcnemar's Test P-Value : <2e-16          
                                          
            Sensitivity : 0.8898          
            Specificity : 0.5625          
         Pos Pred Value : 0.9883          
         Neg Pred Value : 0.1098          
             Prevalence : 0.9764          
         Detection Rate : 0.8688          
   Detection Prevalence : 0.8791          
      Balanced Accuracy : 0.7262          
                                          
       'Positive' Class : no

# Confusion Matrix data frame
cm_list$random_forest <- confusionMatrix(data_test$y, y_pred_rf)

The accuracy is close to 88.21% which is not bad. Next we will neural network method.

# Neural Networks
model_nnet <- nnet(y ~ ., 
              data = data_train, 
              size = 5, 
              decay = 0.01, 
              maxit = 200)

# weights:  151
initial  value 1932.835777 
iter  10 value 1084.378919
iter  20 value 1008.871476
iter  30 value 1000.762095
iter  40 value 980.193591
iter  50 value 940.441583
iter  60 value 926.854358
iter  70 value 921.815841
iter  80 value 917.811037
iter  90 value 884.518071
iter 100 value 825.223570
iter 110 value 802.825670
iter 120 value 794.840877
iter 130 value 784.739292
iter 140 value 766.855650
iter 150 value 759.677806
iter 160 value 757.825474
iter 170 value 756.109305
iter 180 value 752.439765
iter 190 value 748.184352
iter 200 value 746.727211
final  value 746.727211 
stopped after 200 iterations

# Predictions
y_pred_nnet <- predict(model_nnet,
                        data_test, 
                        type = "class")

# Confusion Matrix
confusionMatrix(data_test$y, as.factor(y_pred_nnet))

Confusion Matrix and Statistics

          Reference
Prediction   no  yes
       no  1158   35
       yes  106   58
                                          
               Accuracy : 0.8961          
                 95% CI : (0.8786, 0.9118)
    No Information Rate : 0.9315          
    P-Value [Acc > NIR] : 1               
                                          
                  Kappa : 0.3988          
                                          
 Mcnemar's Test P-Value : 3.745e-09       
                                          
            Sensitivity : 0.9161          
            Specificity : 0.6237          
         Pos Pred Value : 0.9707          
         Neg Pred Value : 0.3537          
             Prevalence : 0.9315          
         Detection Rate : 0.8534          
   Detection Prevalence : 0.8791          
      Balanced Accuracy : 0.7699          
                                          
       'Positive' Class : no

# Confusion Matrix data frame
cm_list$neural_network <- confusionMatrix(data_test$y, as.factor(y_pred_nnet))

The accuracy is close to 89.61% which is not bad. Next we will try support vector classification method.

# support vector classifier 
model_svc <- svm(y ~ .,
                 data = data_train,
                 # kernel = "linear",  # or "radial", "polynomial", "sigmoid"
                 kernel = "sigmoid",  # or "radial", "polynomial", "sigmoid"
                 cost = 1,           # regularization parameter 
                 scale = FALSE)

# Predictions
y_pred_svc <- predict(model_svc,
                        data_test)

# Confusion Matrix
confusionMatrix(data_test$y, as.factor(y_pred_svc))

Confusion Matrix and Statistics

          Reference
Prediction   no  yes
       no  1017  176
       yes  147   17
                                          
               Accuracy : 0.762           
                 95% CI : (0.7384, 0.7844)
    No Information Rate : 0.8578          
    P-Value [Acc > NIR] : 1.0000          
                                          
                  Kappa : -0.0408         
                                          
 Mcnemar's Test P-Value : 0.1192          
                                          
            Sensitivity : 0.87371         
            Specificity : 0.08808         
         Pos Pred Value : 0.85247         
         Neg Pred Value : 0.10366         
             Prevalence : 0.85777         
         Detection Rate : 0.74945         
   Detection Prevalence : 0.87915         
      Balanced Accuracy : 0.48090         
                                          
       'Positive' Class : no

# Confusion Matrix data frame
cm_list$smote_support_vector_classifier <- confusionMatrix(data_test$y, as.factor(y_pred_svc))

The accuracy is close to 89.61% which is not bad. Next we will try some method to deal with imbalanced dataset using the SMOTE (Synthetic Minority Oversampling Technique) method which tries to balance the classes by oversampling from the minority classes. We will try to run all the models based on the over sampled data and compare them.

# SMOTE method
library(ROSE)

data_train2 <- ovun.sample(y ~ ., 
                           data = data_train,
                           method = "over", 
                           N = (nrow(data_train)*3))$data

# Logistic regression
model2 <- glm(y ~ ., data = data_train2, family = binomial())

# Predicted probability
y_pred_prob2 <- predict(model2, data_test,"response")

# Predicted class
y_pred2 <- ifelse(y_pred_prob2>0.5,"yes","no")

# Confusion matrix
confusionMatrix(data_test$y, as.factor(y_pred2))

Confusion Matrix and Statistics

          Reference
Prediction  no yes
       no  748 445
       yes  14 150
                                          
               Accuracy : 0.6618          
                 95% CI : (0.6359, 0.6869)
    No Information Rate : 0.5615          
    P-Value [Acc > NIR] : 3.163e-14       
                                          
                  Kappa : 0.2539          
                                          
 Mcnemar's Test P-Value : < 2.2e-16       
                                          
            Sensitivity : 0.9816          
            Specificity : 0.2521          
         Pos Pred Value : 0.6270          
         Neg Pred Value : 0.9146          
             Prevalence : 0.5615          
         Detection Rate : 0.5512          
   Detection Prevalence : 0.8791          
      Balanced Accuracy : 0.6169          
                                          
       'Positive' Class : no

# Confusion Matrix data frame
cm_list$smote_logistic <- confusionMatrix(data_test$y, as.factor(y_pred2))

# Stepwise both directions
model_step2 <- step(model2, direction = "both")

Start:  AIC=7440.23
y ~ age + job + marital + education + default + balance + housing + 
    loan + duration + campaign + pdays + previous + poutcome

            Df Deviance     AIC
- age        1   7382.3  7438.3
- balance    1   7382.6  7438.6
- previous   1   7382.7  7438.7
- default    1   7382.8  7438.8
<none>           7382.2  7440.2
- pdays      1   7386.2  7442.2
- marital    2   7392.7  7446.7
- education  3   7423.7  7475.7
- housing    1   7481.0  7537.0
- job       11   7519.7  7555.7
- loan       1   7540.3  7596.3
- campaign   1   7540.6  7596.6
- poutcome   3   7756.2  7808.2
- duration   1  10193.3 10249.3

Step:  AIC=7438.25
y ~ job + marital + education + default + balance + housing + 
    loan + duration + campaign + pdays + previous + poutcome

            Df Deviance     AIC
- balance    1   7382.7  7436.7
- previous   1   7382.7  7436.7
- default    1   7382.8  7436.8
<none>           7382.3  7438.3
+ age        1   7382.2  7440.2
- pdays      1   7386.2  7440.2
- marital    2   7393.5  7445.5
- education  3   7425.9  7475.9
- housing    1   7482.9  7536.9
- job       11   7546.8  7580.8
- loan       1   7540.4  7594.4
- campaign   1   7540.6  7594.6
- poutcome   3   7756.4  7806.4
- duration   1  10194.7 10248.7

Step:  AIC=7436.68
y ~ job + marital + education + default + housing + loan + duration + 
    campaign + pdays + previous + poutcome

            Df Deviance     AIC
- previous   1   7383.2  7435.2
- default    1   7383.3  7435.3
<none>           7382.7  7436.7
+ balance    1   7382.3  7438.3
- pdays      1   7386.6  7438.6
+ age        1   7382.6  7438.6
- marital    2   7394.0  7444.0
- education  3   7426.0  7474.0
- housing    1   7483.1  7535.1
- job       11   7546.8  7578.8
- loan       1   7540.5  7592.5
- campaign   1   7540.8  7592.8
- poutcome   3   7756.4  7804.4
- duration   1  10195.4 10247.4

Step:  AIC=7435.17
y ~ job + marital + education + default + housing + loan + duration + 
    campaign + pdays + poutcome

            Df Deviance     AIC
- default    1   7383.8  7433.8
<none>           7383.2  7435.2
+ previous   1   7382.7  7436.7
+ balance    1   7382.7  7436.7
- pdays      1   7386.8  7436.8
+ age        1   7383.1  7437.1
- marital    2   7394.5  7442.5
- education  3   7426.5  7472.5
- housing    1   7483.6  7533.6
- job       11   7546.9  7576.9
- loan       1   7540.7  7590.7
- campaign   1   7541.0  7591.0
- poutcome   3   7841.6  7887.6
- duration   1  10202.1 10252.1

Step:  AIC=7433.76
y ~ job + marital + education + housing + loan + duration + campaign + 
    pdays + poutcome

            Df Deviance     AIC
<none>           7383.8  7433.8
+ default    1   7383.2  7435.2
+ balance    1   7383.2  7435.2
+ previous   1   7383.3  7435.3
- pdays      1   7387.4  7435.4
+ age        1   7383.7  7435.7
- marital    2   7395.8  7441.8
- education  3   7427.4  7471.4
- housing    1   7484.9  7532.9
- job       11   7546.9  7574.9
- loan       1   7540.8  7588.8
- campaign   1   7542.9  7590.9
- poutcome   3   7841.6  7885.6
- duration   1  10208.6 10256.6

summary(model_step2)


Call:
glm(formula = y ~ job + marital + education + housing + loan + 
    duration + campaign + pdays + poutcome, family = binomial(), 
    data = data_train2)

Coefficients:
                     Estimate Std. Error z value Pr(>|z|)    
(Intercept)        -0.1052809  0.2082530  -0.506 0.613177    
jobblue-collar     -0.8114544  0.1154661  -7.028 2.10e-12 ***
jobentrepreneur    -0.5055734  0.1830097  -2.763 0.005735 ** 
jobhousemaid       -1.0576435  0.2196267  -4.816 1.47e-06 ***
jobmanagement      -0.4349114  0.1233348  -3.526 0.000421 ***
jobretired          0.5651946  0.1513535   3.734 0.000188 ***
jobself-employed   -0.3235794  0.1738904  -1.861 0.062769 .  
jobservices        -0.5776775  0.1348496  -4.284 1.84e-05 ***
jobstudent          0.0991976  0.2075718   0.478 0.632725    
jobtechnician      -0.3372895  0.1133161  -2.977 0.002915 ** 
jobunemployed      -0.4847497  0.1946102  -2.491 0.012743 *  
jobunknown         -1.4111436  0.3711723  -3.802 0.000144 ***
maritalmarried     -0.2163072  0.0927979  -2.331 0.019756 *  
maritalsingle      -0.0078369  0.1038239  -0.075 0.939830    
educationsecondary  0.2633713  0.0993298   2.651 0.008014 ** 
educationtertiary   0.6399636  0.1143595   5.596 2.19e-08 ***
educationunknown   -0.1708036  0.1756055  -0.973 0.330725    
housingyes         -0.6224268  0.0623738  -9.979  < 2e-16 ***
loanyes            -1.1926751  0.0969035 -12.308  < 2e-16 ***
duration            0.0060072  0.0001572  38.218  < 2e-16 ***
campaign           -0.1844988  0.0153296 -12.035  < 2e-16 ***
pdays               0.0009013  0.0004785   1.883 0.059655 .  
poutcomeother       0.3051444  0.1484881   2.055 0.039878 *  
poutcomesuccess     2.6996244  0.2030783  13.294  < 2e-16 ***
poutcomeunknown    -0.3949296  0.1456172  -2.712 0.006686 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 11527.0  on 9491  degrees of freedom
Residual deviance:  7383.8  on 9467  degrees of freedom
AIC: 7433.8

Number of Fisher Scoring iterations: 6

# Predicted probability
y_pred_prob_step2 <- predict(model_step2, data_test,"response")

# Predicted class
y_pred_step2 <- ifelse(y_pred_prob_step2>0.5,"yes","no")

# Confusion matrix
confusionMatrix(data_test$y, as.factor(y_pred_step2))

Confusion Matrix and Statistics

          Reference
Prediction  no yes
       no  749 444
       yes  14 150
                                          
               Accuracy : 0.6625          
                 95% CI : (0.6366, 0.6876)
    No Information Rate : 0.5623          
    P-Value [Acc > NIR] : 3.111e-14       
                                          
                  Kappa : 0.2546          
                                          
 Mcnemar's Test P-Value : < 2.2e-16       
                                          
            Sensitivity : 0.9817          
            Specificity : 0.2525          
         Pos Pred Value : 0.6278          
         Neg Pred Value : 0.9146          
             Prevalence : 0.5623          
         Detection Rate : 0.5520          
   Detection Prevalence : 0.8791          
      Balanced Accuracy : 0.6171          
                                          
       'Positive' Class : no

# Confusion Matrix
cm_list$smote_logistic_step <- confusionMatrix(data_test$y, as.factor(y_pred_step2))

# Naive Bayes
model_nb2 <- naiveBayes(y ~ ., data = data_train2) 

# Predictions
y_pred_nb2 <- predict(model_nb2, newdata = data_test)

# Confusion matrix
confusionMatrix(data_test$y, y_pred_nb2)

Confusion Matrix and Statistics

          Reference
Prediction  no yes
       no  797 396
       yes  25 139
                                          
               Accuracy : 0.6898          
                 95% CI : (0.6644, 0.7143)
    No Information Rate : 0.6057          
    P-Value [Acc > NIR] : 7.788e-11       
                                          
                  Kappa : 0.261           
                                          
 Mcnemar's Test P-Value : < 2.2e-16       
                                          
            Sensitivity : 0.9696          
            Specificity : 0.2598          
         Pos Pred Value : 0.6681          
         Neg Pred Value : 0.8476          
             Prevalence : 0.6057          
         Detection Rate : 0.5873          
   Detection Prevalence : 0.8791          
      Balanced Accuracy : 0.6147          
                                          
       'Positive' Class : no

# Confusion Matrix data frame
cm_list$smote_naive_bayes <- confusionMatrix(data_test$y, y_pred_nb2)

# Random forest
model_rf2 <- randomForest(y ~ ., 
                         data = data_train, 
                         ntree = 500, 
                         mtry = 2, 
                         importance = TRUE)

# Predictions
y_pred_rf2 <- predict(model_rf2, data_test)

# Confusion Matrix
confusionMatrix(data_test$y, y_pred_rf2)

Confusion Matrix and Statistics

          Reference
Prediction   no  yes
       no  1179   14
       yes  147   17
                                         
               Accuracy : 0.8814         
                 95% CI : (0.863, 0.8981)
    No Information Rate : 0.9772         
    P-Value [Acc > NIR] : 1              
                                         
                  Kappa : 0.1414         
                                         
 Mcnemar's Test P-Value : <2e-16         
                                         
            Sensitivity : 0.8891         
            Specificity : 0.5484         
         Pos Pred Value : 0.9883         
         Neg Pred Value : 0.1037         
             Prevalence : 0.9772         
         Detection Rate : 0.8688         
   Detection Prevalence : 0.8791         
      Balanced Accuracy : 0.7188         
                                         
       'Positive' Class : no

# Confusion Matrix data frame
cm_list$smote_random_forest <- confusionMatrix(data_test$y, y_pred_rf2)

# Neural Networks
model_nnet2 <- nnet(y ~ ., 
              data = data_train2, 
              size = 5, 
              decay = 0.01, 
              maxit = 200)

# weights:  151
initial  value 6672.439994 
iter  10 value 5444.151418
iter  20 value 4928.673252
iter  30 value 4668.104777
iter  40 value 4574.222061
iter  50 value 4493.420471
iter  60 value 4331.950671
iter  70 value 4099.714709
iter  80 value 4002.793010
iter  90 value 3980.135694
iter 100 value 3847.275435
iter 110 value 3731.594349
iter 120 value 3678.434483
iter 130 value 3672.812783
iter 140 value 3671.252056
iter 150 value 3665.833513
iter 160 value 3665.133792
iter 170 value 3663.637799
iter 180 value 3659.793664
iter 190 value 3659.407716
iter 200 value 3658.453928
final  value 3658.453928 
stopped after 200 iterations

# Predictions
y_pred_nnet2 <- predict(model_nnet2,
                        data_test, 
                        type = "class")

# Confusion Matrix
confusionMatrix(data_test$y, as.factor(y_pred_nnet2))

Confusion Matrix and Statistics

          Reference
Prediction  no yes
       no  806 387
       yes  25 139
                                          
               Accuracy : 0.6964          
                 95% CI : (0.6711, 0.7208)
    No Information Rate : 0.6124          
    P-Value [Acc > NIR] : 6.596e-11       
                                          
                  Kappa : 0.268           
                                          
 Mcnemar's Test P-Value : < 2.2e-16       
                                          
            Sensitivity : 0.9699          
            Specificity : 0.2643          
         Pos Pred Value : 0.6756          
         Neg Pred Value : 0.8476          
             Prevalence : 0.6124          
         Detection Rate : 0.5940          
   Detection Prevalence : 0.8791          
      Balanced Accuracy : 0.6171          
                                          
       'Positive' Class : no

# Confusion Matrix data frame
cm_list$smote_neural_network <- confusionMatrix(data_test$y, as.factor(y_pred_nnet2))

# support vector classifier 
model_svc2 <- svm(y ~ .,
                  data = data_train2,
                  kernel = "sigmoid",  # or "radial", "polynomial", "sigmoid" 
                  cost = 1,           # regularization parameter 
                  scale = TRUE)

# Predictions
y_pred_svc2 <- predict(model_svc2,
                       data_test)

# Confusion Matrix
confusionMatrix(data_test$y, as.factor(y_pred_svc2))

Confusion Matrix and Statistics

          Reference
Prediction  no yes
       no  629 564
       yes  17 147
                                         
               Accuracy : 0.5718         
                 95% CI : (0.545, 0.5984)
    No Information Rate : 0.5239         
    P-Value [Acc > NIR] : 0.0002213      
                                         
                  Kappa : 0.1737         
                                         
 Mcnemar's Test P-Value : < 2.2e-16      
                                         
            Sensitivity : 0.9737         
            Specificity : 0.2068         
         Pos Pred Value : 0.5272         
         Neg Pred Value : 0.8963         
             Prevalence : 0.4761         
         Detection Rate : 0.4635         
   Detection Prevalence : 0.8791         
      Balanced Accuracy : 0.5902         
                                         
       'Positive' Class : no

# Confusion Matrix data frame
cm_list$support_vector_classifier <- confusionMatrix(data_test$y, as.factor(y_pred_svc2))

The comparison of all the models is as follows

# Extract multiple metrics into one table

results_df <- data.frame(
  Model = names(cm_list),
  Accuracy     = sapply(cm_list, function(l) l$overall["Accuracy"]),
  Sensitivity  = sapply(cm_list, function(l) l$byClass["Sensitivity"]),
  Specificity  = sapply(cm_list, function(l) l$byClass["Specificity"])
)

rownames(results_df) <- NULL
results_df <- results_df %>% arrange(desc(Accuracy))
results_df

                             Model  Accuracy Sensitivity Specificity
1                   neural_network 0.8960943   0.9161392   0.6236559
2                         logistic 0.8916728   0.9085937   0.6103896
3                    logistic_step 0.8901990   0.9078125   0.5974026
4                    random_forest 0.8820929   0.8898113   0.5625000
5              smote_random_forest 0.8813559   0.8891403   0.5483871
6                      naive_bayes 0.8673545   0.9281488   0.4540230
7  smote_support_vector_classifier 0.7619749   0.8737113   0.0880829
8             smote_neural_network 0.6963891   0.9699158   0.2642586
9                smote_naive_bayes 0.6897568   0.9695864   0.2598131
10             smote_logistic_step 0.6624908   0.9816514   0.2525253
11                  smote_logistic 0.6617539   0.9816273   0.2521008
12       support_vector_classifier 0.5718497   0.9736842   0.2067511

Based on accuracy, neural network model on original data is the best and based on the sensitivity (true positive rate), logistic regression on over sampled data is the best.