We start by loading the data, and selecting the relevant features. As pre-processing, we set values of accel==0 to missing because it is impossible. We also remove the incorrect weight of the Peugeot 107.
## [1] 297 32
rownames(cars) = paste(cars[,1],cars[,2])
# Now the rownames are make and model of the cars.
colnames(cars)
## [1] "Maker" "Model" "Type"
## [4] "Fuel" "Price" "Cylinders"
## [7] "Displacement" "DriveWheel" "BHP"
## [10] "Torque" "Acceleration" "TopSpeed"
## [13] "MPG" "Weight" "Length"
## [16] "Width" "Height" "AdaptiveHeadlights"
## [19] "AdjustableSteering" "AlarmSystem" "Automatic"
## [22] "Bluetooth" "ClimateControl" "CruiseControl"
## [25] "ElectricSeats" "Leather" "ParkingSensors"
## [28] "PowerSteering" "SatNav" "ESP"
## [31] "Verdict" "Origin"
colnames(cars)[4:17] = c("fuel","price","cyl","displ","drive",
"hp","torque","accel","topspeed",
"MPG","weight","length","width",
"height")
summary(cars$accel) # zero minimum is impossible
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 5.900 9.100 8.839 11.400 16.900
## [1] "Citroen C5 Tourer" "Ford Mondeo" "Lotus Elise"
## [4] "Renault Twizy" "Ssangyong Rodius"
# [1] "Citroen C5 Tourer" "Ford Mondeo" "Lotus Elise"
# [4] "Renault Twizy" "Ssangyong Rodius"
cars$accel[cars$accel == 0] = NA
summary(cars$accel) # OK
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 2.50 6.20 9.20 8.99 11.50 16.90 5
## [1] 210 450 490 550
## [1] "Peugeot 107" "Renault Twizy" "Morgan 3 Wheeler" "Caterham Super 7"
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 450 1250 1496 1541 1778 2705 34
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 50.0 112.0 126.0 132.7 151.0 252.0 4
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 647 1560 1995 2504 2988 7993 9
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 2337 4157 4464 4427 4766 5612 11
cars$length = cars$length/1000 # in meters
fit = lm(hp ~ topspeed + length + displ, data = cars)
summary(fit)
##
## Call:
## lm(formula = hp ~ topspeed + length + displ, data = cars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -142.034 -20.667 -2.715 15.966 174.020
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.069e+02 2.861e+01 -7.232 4.78e-12 ***
## topspeed 2.466e+00 1.373e-01 17.963 < 2e-16 ***
## length -1.313e+01 6.187e+00 -2.122 0.0347 *
## displ 6.255e-02 2.808e-03 22.275 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 37.72 on 274 degrees of freedom
## (19 observations deleted due to missingness)
## Multiple R-squared: 0.9405, Adjusted R-squared: 0.9399
## F-statistic: 1445 on 3 and 274 DF, p-value: < 2.2e-16
##
## Prediction terms in the model:
## prediction term stdev up/down
## topspeed 68.380 up
## length 5.817 down
## displ 91.790 up
## Total prediction of hp 149.200
fact = 0.51
width = fact*10; height = fact*8
maxnchar = 6
main = "Top Gear data"
pdf(file = "topgear_numerical_predsplot.pdf",
width = width, height = height)
predsplot(fit, main=main, maxchar.level = maxnchar)
##
## Prediction terms in the model:
## prediction term stdev up/down
## topspeed 68.380 up
## length 5.817 down
## displ 91.790 up
## Total prediction of hp 149.200
## png
## 2
## Correlation matrix of the prediction terms:
## topspeed length displ
## topspeed 1.00 -0.45 0.80
## length -0.45 1.00 -0.56
## displ 0.80 -0.56 1.00
##
## Call:
## lm(formula = GPM ~ accel + drive + weight + fuel, data = cars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.026997 -0.004301 0.000268 0.003660 0.055643
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.681e-02 4.884e-03 3.443 0.000677 ***
## accel -1.356e-03 2.459e-04 -5.516 8.81e-08 ***
## driveFront -2.927e-03 1.619e-03 -1.808 0.071853 .
## driveRear -2.745e-04 1.577e-03 -0.174 0.861992
## weight 1.131e-05 1.841e-06 6.143 3.25e-09 ***
## fuelPetrol 8.427e-03 1.246e-03 6.761 9.98e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.007973 on 245 degrees of freedom
## (46 observations deleted due to missingness)
## Multiple R-squared: 0.6058, Adjusted R-squared: 0.5977
## F-statistic: 75.29 on 5 and 245 DF, p-value: < 2.2e-16
## Figure 2:
##
fact = 0.52
width = fact*10; height = fact*8
maxnchar = 6
main = "Top Gear data"
# pdf(file = "topgear_4_predsplot.pdf",
# width = width, height = height)
predsplot(fit4, main=main, maxchar.level = maxnchar)
##
## Prediction terms in the model:
## prediction term stdev up/down
## accel 0.004329 down
## drive 0.001400
## weight 0.004490 up
## fuel 0.004104
## Total prediction of GPM 0.009783
##
# fact = 0.6
# width = fact*10; height = fact*8
# pdf(file = "topgear_4_predscor.pdf",
# width = height, height = height)
predscor(fit4, sort.by.stdev = FALSE)
## Correlation matrix of the prediction terms:
## accel drive weight fuel
## accel 1.00 0.65 0.49 0.31
## drive 0.65 1.00 0.62 0.08
## weight 0.49 0.62 1.00 -0.23
## fuel 0.31 0.08 -0.23 1.00
# dev.off()
car = "Bentley Continental"
# pdf(file = "topgear_4_predsplot_1_dens.pdf",
# width = width, height = height)
predsplot(fit4, main = main, casetoshow=car,
displaytype = "density",
maxchar.level = maxnchar,
xlab = paste0("prediction for ",car))
##
## Prediction terms for case Bentley Continental:
## prediction term value
## accel +0.00621
## drive +0.00152
## weight +0.00857
## fuel +0.00322
## SUM +0.01952
## centercept 0.02599
## Total prediction of GPM 0.04551
# dev.off()
car = "Kia Rio"
# pdf(file = "topgear_4_predsplot_2_stairs.pdf",
# width = width, height = height)
predsplot(fit4, main = main, casetoshow=car,
staircase = TRUE, maxchar.level = maxnchar,
xlab = paste0("prediction for ",car))
##
## Prediction terms for case Kia Rio:
## prediction term value
## accel -0.00884
## drive -0.00141
## weight -0.00420
## fuel -0.00520
## SUM -0.01966
## centercept 0.02599
## Total prediction of GPM 0.00633
We add a logical variable (AlarmSystem) as well as a character variable (SatNav)
## no/optional standard
## 112 185
## Mode FALSE TRUE
## logical 112 185
## no optional standard
## 86 116 95
## Length Class Mode
## 297 character character
##
## n y
## 86 211
## [1] "character"
## 'data.frame': 297 obs. of 35 variables:
## $ Maker : Factor w/ 51 levels "Alfa Romeo","Aston Martin",..: 1 1 2 2 2 2 2 2 2 3 ...
## $ Model : Factor w/ 296 levels "1 Series","1 Series Convertible",..: 139 186 101 102 103 262 266 267 268 29 ...
## $ Type : Factor w/ 297 levels "107 1.0 68 Active 5d",..: 139 190 100 101 102 263 269 268 267 30 ...
## $ fuel : Factor w/ 2 levels "Diesel","Petrol": 1 2 2 2 2 2 2 2 2 2 ...
## $ price : num 21250 15155 30995 131995 141995 ...
## $ cyl : num 4 4 4 12 12 12 12 8 8 4 ...
## $ displ : num 1598 1368 1329 5935 5935 ...
## $ drive : Factor w/ 3 levels "4WD","Front",..: 2 2 2 3 3 3 3 3 3 2 ...
## $ hp : num 105 105 98 517 517 510 573 420 420 86 ...
## $ torque : num 236 95 92 457 457 420 457 346 346 118 ...
## $ accel : num 11.3 10.7 11.8 4.6 4.6 4.2 4.1 4.7 4.7 11.7 ...
## $ topspeed : num 115 116 106 183 183 190 183 180 180 112 ...
## $ MPG : num 64 49 56 19 19 17 19 20 20 55 ...
## $ weight : num 1385 1090 988 1785 1890 ...
## $ length : num 4.35 4.06 3.08 4.72 4.72 ...
## $ width : num 1798 1720 1680 NA NA ...
## $ height : num 1465 1446 1500 1282 1282 ...
## $ AdaptiveHeadlights: Factor w/ 3 levels "no","optional",..: 2 2 1 3 3 1 3 1 1 2 ...
## $ AdjustableSteering: Factor w/ 2 levels "no","standard": 2 2 2 2 2 2 2 2 2 2 ...
## $ AlarmSystem : Factor w/ 2 levels "no/optional",..: 2 2 1 1 1 2 2 2 2 2 ...
## $ Automatic : Factor w/ 3 levels "no","optional",..: 1 1 2 3 3 1 3 2 2 1 ...
## $ Bluetooth : Factor w/ 3 levels "no","optional",..: 3 3 3 3 3 3 3 2 2 3 ...
## $ ClimateControl : Factor w/ 3 levels "no","optional",..: 3 2 3 3 3 3 3 3 3 2 ...
## $ CruiseControl : Factor w/ 3 levels "no","optional",..: 3 3 3 3 3 3 3 2 2 2 ...
## $ ElectricSeats : Factor w/ 3 levels "no","optional",..: 2 1 1 3 3 3 3 3 3 1 ...
## $ Leather : Factor w/ 3 levels "no","optional",..: 2 2 1 3 3 3 3 3 3 2 ...
## $ ParkingSensors : Factor w/ 3 levels "no","optional",..: 2 3 1 3 3 3 3 3 3 2 ...
## $ PowerSteering : Factor w/ 2 levels "no","standard": 2 2 2 2 2 2 2 2 2 2 ...
## $ SatNav : Factor w/ 3 levels "no","optional",..: 2 2 3 3 3 3 3 2 2 2 ...
## $ ESP : Factor w/ 3 levels "no","optional",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ Verdict : num 6 5 7 7 7 7 7 8 7 6 ...
## $ Origin : Factor w/ 3 levels "Asia","Europe",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ GPM : num 0.0156 0.0204 0.0179 0.0526 0.0526 ...
## $ alarm : logi TRUE TRUE FALSE FALSE FALSE TRUE ...
## $ navig : chr "y" "y" "y" "y" ...
fitcombin = lm(1/MPG ~ accel + log(weight) + accel:torque + alarm + navig, data = cars)
summary(fitcombin)
##
## Call:
## lm(formula = 1/MPG ~ accel + log(weight) + accel:torque + alarm +
## navig, data = cars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.026131 -0.005201 -0.001251 0.004847 0.056823
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.018e-01 3.347e-02 -3.040 0.00262 **
## accel -2.115e-03 2.461e-04 -8.591 9.87e-16 ***
## log(weight) 2.135e-02 4.709e-03 4.534 9.01e-06 ***
## alarmTRUE -2.450e-03 1.343e-03 -1.824 0.06932 .
## navigy -1.693e-03 1.367e-03 -1.239 0.21667
## accel:torque -3.403e-06 1.394e-06 -2.442 0.01532 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.008684 on 247 degrees of freedom
## (44 observations deleted due to missingness)
## Multiple R-squared: 0.5293, Adjusted R-squared: 0.5198
## F-statistic: 55.56 on 5 and 247 DF, p-value: < 2.2e-16
fact = 0.45
width = fact*10; height = fact*8
main = "Example with transformation, interaction, logical, character"
# pdf(file = "topgear_logical+char_predsplot.pdf", width = width, height = height)
predsplot(fitcombin, main=main)
##
## Prediction terms in the model:
## prediction term stdev up/down
## accel 0.0067300 down
## log(weight) 0.0054000 up
## alarm 0.0011630
## navig 0.0007587
## accel:torque 0.0024290
## Total prediction of 1/MPG 0.0091180
# dev.off()
fact = 0.6
width = fact*10; height = fact*8
# pdf(file = "topgear_logical+char_predscor.pdf",
# width = height, height = height)
predscor(fitcombin)
## Correlation matrix of the prediction terms:
## accel log(weight) accel:torque alarm navig
## accel 1.00 0.53 -0.09 -0.37 -0.33
## log(weight) 0.53 1.00 -0.75 -0.43 -0.35
## accel:torque -0.09 -0.75 1.00 0.26 0.18
## alarm -0.37 -0.43 0.26 1.00 0.38
## navig -0.33 -0.35 0.18 0.38 1.00
We start by loading the data, turning the pclass and sex variables into factors.
data(data_titanic, package = "classmap")
titanic = data_titanic; rm(data_titanic)
dim(titanic) # 1309 13
## [1] 1309 13
# A data frame with 1309 observations on the following variables.
# passengerId: a unique identifier for each passenger.
# pclass: travel class of the passenger.
# name: name of the passenger.
# sex: sex of the passenger.
# age: age of the passenger.
# sibsp: number of siblings and spouses traveling with the passenger.
# parch: number of parents and children traveling with the passenger.
# ticket: ticket number of the passenger.
# fare: fare paid for the ticket.
# Cabin: cabin number of the passenger.
# embarked: Port of embarkation. Takes the values
# C (Cherbourg), Q (Queenstown) and
# S (Southampton).
# y: factor indicating casualty or survivor.
# dataType: vector taking the values “train” or “test”
# indicating whether the observation belongs
# to training or test data.
#
colnames(titanic) = c("passengerId","pclass","name","sex",
"age", "sibsp", "parch", "ticket",
"fare", "cabin", "embarked", "survival",
"dataType")
str(titanic)
## 'data.frame': 1309 obs. of 13 variables:
## $ passengerId: int 1 2 3 4 5 6 7 8 9 10 ...
## $ pclass : int 3 1 3 1 3 3 1 3 3 2 ...
## $ name : chr "Braund, Mr. Owen Harris" "Cumings, Mrs. John Bradley (Florence Briggs Thayer)" "Heikkinen, Miss. Laina" "Futrelle, Mrs. Jacques Heath (Lily May Peel)" ...
## $ sex : chr "male" "female" "female" "female" ...
## $ age : num 22 38 26 35 35 NA 54 2 27 14 ...
## $ sibsp : int 1 1 0 1 0 0 0 3 0 1 ...
## $ parch : int 0 0 0 0 0 0 0 1 2 0 ...
## $ ticket : chr "A/5 21171" "PC 17599" "STON/O2. 3101282" "113803" ...
## $ fare : num 7.25 71.28 7.92 53.1 8.05 ...
## $ cabin : chr "" "C85" "" "C123" ...
## $ embarked : chr "S" "C" "S" "S" ...
## $ survival : Factor w/ 2 levels "casualty","survived": 1 2 2 2 1 1 1 1 2 2 ...
## $ dataType : chr "train" "train" "train" "train" ...
## [1] 3 1 2
## [1] "male" "female"
titanic$sex = factor(titanic$sex, levels = unique(titanic$sex) )
titanic$sex = factor(titanic$sex, labels = c("M","F"))
head(titanic)
## passengerId pclass name sex
## 1 1 3 Braund, Mr. Owen Harris M
## 2 2 1 Cumings, Mrs. John Bradley (Florence Briggs Thayer) F
## 3 3 3 Heikkinen, Miss. Laina F
## 4 4 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) F
## 5 5 3 Allen, Mr. William Henry M
## 6 6 3 Moran, Mr. James M
## age sibsp parch ticket fare cabin embarked survival dataType
## 1 22 1 0 A/5 21171 7.2500 S casualty train
## 2 38 1 0 PC 17599 71.2833 C85 C survived train
## 3 26 0 0 STON/O2. 3101282 7.9250 S survived train
## 4 35 1 0 113803 53.1000 C123 S survived train
## 5 35 0 0 373450 8.0500 S casualty train
## 6 NA 0 0 330877 8.4583 Q casualty train
Now fit a logistic regression model.
fit5 <- glm(survival ~ sex + age + sibsp + parch + pclass,
family=binomial, data = titanic)
summary(fit5)
##
## Call:
## glm(formula = survival ~ sex + age + sibsp + parch + pclass,
## family = binomial, data = titanic)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.002093 0.221287 -4.528 5.94e-06 ***
## sexF 2.556861 0.173270 14.756 < 2e-16 ***
## age -0.039489 0.006634 -5.952 2.64e-09 ***
## sibsp -0.352914 0.105349 -3.350 0.000808 ***
## parch 0.074361 0.099907 0.744 0.456697
## pclass1 2.352022 0.228803 10.280 < 2e-16 ***
## pclass2 0.985265 0.199384 4.942 7.75e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1414.62 on 1045 degrees of freedom
## Residual deviance: 970.12 on 1039 degrees of freedom
## (263 observations deleted due to missingness)
## AIC: 984.12
##
## Number of Fisher Scoring iterations: 4
fact = 0.5
width = fact*10; height = fact*8
main = "Titanic data"
# pdf(file = "titanic_5_predsplot_dens.pdf",
# width = width, height = height)
predsplot(fit5, main=main, displaytype = "density")
##
## Prediction terms in the model:
## prediction term stdev up/down
## sex 1.23600
## age 0.56920 down
## sibsp 0.32190 down
## parch 0.06244 up
## pclass 0.98130
## Total linear prediction of survival 1.67000
##
## The glm fit used the logit link function.
# The glm fit used the logit link function.
# dev.off()
# With other options for density estimation:
predsplot(fit5, main=main, displaytype = "density",
bw = "SJ", adjust = 0.5)
##
## Prediction terms in the model:
## prediction term stdev up/down
## sex 1.23600
## age 0.56920 down
## sibsp 0.32190 down
## parch 0.06244 up
## pclass 0.98130
## Total linear prediction of survival 1.67000
##
## The glm fit used the logit link function.
# pdf(file = "titanic_5_predsplot_1.pdf",
# width = width, height = height)
predsplot(fit5, main = main, casetoshow=1)
##
## Prediction terms for case 1:
## prediction term value
## sex -0.94843
## age +0.31122
## sibsp -0.17544
## parch -0.03128
## pclass -0.88444
## SUM -1.72839
## centercept -0.49537
## Total linear prediction of survival -2.22376
## Total prediction of survival in response units 0.09764
##
## The glm fit used the logit link function.
# dev.off()
# pdf(file = "titanic_5_predsplot_2_stairs.pdf",
# width = width, height = height)
predsplot(fit5, main = main, casetoshow=2, staircase = TRUE)
##
## Prediction terms for case 2:
## prediction term value
## sex +1.60843
## age -0.32060
## sibsp -0.17544
## parch -0.03128
## pclass +1.46758
## SUM +2.54868
## centercept -0.49537
## Total linear prediction of survival 2.05331
## Total prediction of survival in response units 0.88628
##
## The glm fit used the logit link function.
# dev.off()
# fact = 0.6
# width = fact*10; height = fact*8
# pdf(file = "titanic_5_predscor.pdf",
# width = height, height = height)
predscor(fit5, sort.by.stdev = FALSE)
## Correlation matrix of the prediction terms:
## sex age sibsp parch pclass
## sex 1.00 0.06 -0.10 0.22 0.14
## age 0.06 1.00 -0.24 0.15 -0.41
## sibsp -0.10 -0.24 1.00 -0.37 0.04
## parch 0.22 0.15 -0.37 1.00 -0.02
## pclass 0.14 -0.41 0.04 -0.02 1.00
data(german.credit, package = "fairml")
credit = german.credit; rm(german.credit)
dim(credit) # 1000 21
## [1] 1000 21
## 'data.frame': 1000 obs. of 21 variables:
## $ Account_status : Factor w/ 4 levels "< 0 DM",">= 200 DM",..: 1 3 4 1 1 4 4 3 4 3 ...
## $ Duration : num 6 48 12 42 24 36 24 36 12 30 ...
## $ Credit_history : Factor w/ 5 levels "all credits at this bank paid back duly",..: 2 4 2 4 3 4 4 4 4 2 ...
## $ Purpose : Factor w/ 10 levels "business","car (new)",..: 8 8 5 6 2 5 6 3 8 2 ...
## $ Credit_amount : num 1169 5951 2096 7882 4870 ...
## $ Savings_bonds : Factor w/ 5 levels "< 100 DM",">= 1000 DM",..: 5 1 1 1 1 5 4 1 2 1 ...
## $ Present_employment_since: Factor w/ 5 levels "< 1 year",">= 7 years",..: 2 3 4 4 3 3 2 3 4 5 ...
## $ Installment_rate : num 4 2 2 2 3 2 3 2 2 4 ...
## $ Other_debtors_guarantors: Factor w/ 3 levels "co-applicant",..: 3 3 3 2 3 3 3 3 3 3 ...
## $ Resident_since : num 4 2 3 4 4 4 4 2 4 2 ...
## $ Property : Factor w/ 4 levels "building society savings agreement / life insurance",..: 3 3 3 1 4 4 1 2 3 2 ...
## $ Age : num 67 22 49 45 53 35 53 35 61 28 ...
## $ Other_installment_plans : Factor w/ 3 levels "bank","none",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ Housing : Factor w/ 3 levels "rent","own","for free": 2 2 2 3 3 3 2 1 2 2 ...
## $ Existing_credits : num 2 1 1 1 2 1 1 1 1 2 ...
## $ Job : Factor w/ 4 levels "management / self-employed / highly qualified employee / officer",..: 2 2 4 2 2 4 2 1 4 1 ...
## $ People_maintenance_for : num 1 1 2 2 2 2 1 1 1 1 ...
## $ Telephone : Factor w/ 2 levels "none","yes": 2 1 1 1 1 2 1 2 1 1 ...
## $ Foreign_worker : Factor w/ 2 levels "no","yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ Credit_risk : Factor w/ 2 levels "BAD","GOOD": 2 1 2 2 1 2 2 2 2 1 ...
## $ Gender : Factor w/ 2 levels "Female","Male": 1 2 1 1 1 1 1 1 1 1 ...
#
# Shorten variable names for plotting:
colnames(credit) <- c("checking","months","history","purpose","amount","savings","employment","rate","guarantors","residence","property","age","inst_plans","housing","nloans","job","nclients","phone","foreign","credit","sex")
#
# Give factors sex and purpose shorter labels for plotting:
credit$sex = factor(as.numeric(credit$sex), labels = c("F","M"))
levels(credit$purpose)
## [1] "business" "car (new)" "car (used)"
## [4] "domestic appliances" "education" "furniture / equipment"
## [7] "others" "radio / television" "repairs"
## [10] "retrainin"
# [10] "retrainin"
levels(credit$purpose) = c("business","n.car","u.car","appliances","education","furniture","other","TV","repairs","retraining")
# save(credit, file = "German_credit.RData")
# load("German_credit.RData")
Now fit a logistic regression model
fit7 <- glm(credit ~ months + purpose + amount + rate + age +
nclients + sex, family=binomial, data = credit)
main = "German credit data"
# fact = 0.48
# width = fact*10; height = fact*8
# pdf(file = "germancredit_7_predsplot.pdf",
# width = width, height = height)
predsplot(fit7, main = main)
##
## Prediction terms in the model:
## prediction term stdev up/down
## months 0.37690 down
## purpose 0.51700
## amount 0.26380 down
## rate 0.27320 down
## age 0.24350 up
## nclients 0.07078 down
## sex 0.21110
## Total linear prediction of credit 0.80900
##
## The glm fit used the logit link function.
# fact = 0.48
# width = fact*10; height = fact*8
# pdf(file = "germancredit_7_predsplot_1.pdf",
# width = width, height = height)
predsplot(fit7, main = main, casetoshow=1,
displaytype = "density")
##
## Prediction terms for case 1:
## prediction term value
## months +0.46584
## purpose +0.37611
## amount +0.19645
## rate -0.25082
## age +0.67325
## nclients +0.03030
## sex +0.14143
## SUM +1.63255
## centercept 0.95998
## Total linear prediction of credit 2.59253
## Total prediction of credit in response units 0.93038
##
## The glm fit used the logit link function.
# pdf(file = "germancredit_7_predsplot_2_stairs.pdf",
# width = width, height = height)
predsplot(fit7, main = main, casetoshow=2, staircase = TRUE)
##
## Prediction terms for case 2:
## prediction term value
## months -0.84700
## purpose +0.37611
## amount -0.25041
## rate +0.23763
## age -0.28994
## nclients +0.03030
## sex -0.31479
## SUM -1.05811
## centercept 0.95998
## Total linear prediction of credit -0.09813
## Total prediction of credit in response units 0.47549
##
## The glm fit used the logit link function.
# fact = 0.6
# width = fact*10; height = fact*8
# pdf(file = "germancredit_7_predscor_equalsizes.pdf",
# width = height, height = height)
predscor(fit7, sort.by.stdev = FALSE, cell.length = "equal")
## Correlation matrix of the prediction terms:
## months purpose amount rate age nclients sex
## months 1.00 -0.12 0.62 0.07 0.04 -0.02 -0.08
## purpose -0.12 1.00 -0.12 -0.01 -0.03 0.04 0.05
## amount 0.62 -0.12 1.00 -0.27 -0.03 0.02 -0.09
## rate 0.07 -0.01 -0.27 1.00 -0.06 -0.07 -0.09
## age 0.04 -0.03 -0.03 -0.06 1.00 -0.12 0.16
## nclients -0.02 0.04 0.02 -0.07 -0.12 1.00 -0.20
## sex -0.08 0.05 -0.09 -0.09 0.16 -0.20 1.00
# dev.off()
#
# pdf(file = "germancredit_7_predscor.pdf",
# width = height, height = height)
predscor(fit7, sort.by.stdev = FALSE)
## Correlation matrix of the prediction terms:
## months purpose amount rate age nclients sex
## months 1.00 -0.12 0.62 0.07 0.04 -0.02 -0.08
## purpose -0.12 1.00 -0.12 -0.01 -0.03 0.04 0.05
## amount 0.62 -0.12 1.00 -0.27 -0.03 0.02 -0.09
## rate 0.07 -0.01 -0.27 1.00 -0.06 -0.07 -0.09
## age 0.04 -0.03 -0.03 -0.06 1.00 -0.12 0.16
## nclients -0.02 0.04 0.02 -0.07 -0.12 1.00 -0.20
## sex -0.08 0.05 -0.09 -0.09 0.16 -0.20 1.00
## Prediction for a new case:
newc = list("u.car", 36, 2, 6000, 55, "F", 1)
names(newc) = c("purpose","months","rate","amount",
"age","sex","nclients")
# fact = 0.48
# width = fact*10; height = fact*8
# pdf(file = "germancredit_7_predsplot_new_stairs.pdf",
# width = width, height = height)
predsplot(fit7, main = main, casetoshow = newc,
staircase = TRUE)
##
## The case to show is:
## $purpose
## [1] "u.car"
##
## $months
## [1] 36
##
## $rate
## [1] 2
##
## $amount
## [1] 6000
##
## $age
## [1] 55
##
## $sex
## [1] "F"
##
## $nclients
## [1] 1
##
##
## Prediction terms for the new case:
## prediction term value
## months -0.47190
## purpose +1.02816
## amount -0.25499
## rate +0.23763
## age +0.41640
## nclients +0.03030
## sex +0.14143
## SUM +1.12701
## centercept 0.95998
## Total linear prediction of credit 2.08699
## Total prediction of credit in response units 0.88963
##
## The glm fit used the logit link function.
# dev.off()
## Figure with profile:
# pdf(file = "germancredit_7_predsplot_1_dens_profile.pdf",
# width = width, height = height)
predsplot(fit7, main = main, casetoshow=1,
displaytype = "density", profile = TRUE)
##
## Prediction terms for case 1:
## prediction term value
## months +0.46584
## purpose +0.37611
## amount +0.19645
## rate -0.25082
## age +0.67325
## nclients +0.03030
## sex +0.14143
## SUM +1.63255
## centercept 0.95998
## Total linear prediction of credit 2.59253
## Total prediction of credit in response units 0.93038
##
## The glm fit used the logit link function.
credit$x1 = credit$months + credit$nclients
credit$x2 = credit$months - credit$nclients
cor(credit$x1,credit$x2) # 0.998199
## [1] 0.9981994
# But minus that for prediction terms!
fitart <- glm(credit ~ x1 + x2 + purpose + amount + rate + age +
sex, family=binomial, data = credit)
## Figure 7:
##
# fact = 0.48
# width = fact*10; height = fact*8
# main = "German credit data with artificial variables x1 and x2"
# pdf(file = "germancredit_7_artif_predsplot.pdf",
# width = width, height = height)
predsplot(fitart, main = main)
##
## Prediction terms in the model:
## prediction term stdev up/down
## x1 1.3670 down
## x2 0.9913 up
## purpose 0.5170
## amount 0.2638 down
## rate 0.2732 down
## age 0.2435 up
## sex 0.2111
## Total linear prediction of credit 0.8090
##
## The glm fit used the logit link function.
# dev.off()
## Figure 8:
##
# fact = 0.6
# width = fact*10; height = fact*8
# pdf(file = "germancredit_7_artif_predscor.pdf",
# width = height, height = height)
predscor(fitart, sort.by.stdev = FALSE)
## Correlation matrix of the prediction terms:
## x1 x2 purpose amount rate age sex
## x1 1.00 -1.00 -0.12 0.63 0.07 0.03 -0.09
## x2 -1.00 1.00 0.12 -0.62 -0.08 -0.04 0.08
## purpose -0.12 0.12 1.00 -0.12 -0.01 -0.03 0.05
## amount 0.63 -0.62 -0.12 1.00 -0.27 -0.03 -0.09
## rate 0.07 -0.08 -0.01 -0.27 1.00 -0.06 -0.09
## age 0.03 -0.04 -0.03 -0.03 -0.06 1.00 0.16
## sex -0.09 0.08 0.05 -0.09 -0.09 0.16 1.00