Below are the solutions to these exercises on LASSO regression.
############### # # # Exercise 1 # # # ############### library(lars) library(glmnet)
data(diabetes) attach(diabetes) ############### # # # Exercise 2 # # # ############### summary(x)
## age sex bmi ## Min. :-0.107226 Min. :-0.04464 Min. :-0.090275 ## 1st Qu.:-0.037299 1st Qu.:-0.04464 1st Qu.:-0.034229 ## Median : 0.005383 Median :-0.04464 Median :-0.007284 ## Mean : 0.000000 Mean : 0.00000 Mean : 0.000000 ## 3rd Qu.: 0.038076 3rd Qu.: 0.05068 3rd Qu.: 0.031248 ## Max. : 0.110727 Max. : 0.05068 Max. : 0.170555 ## map tc ldl ## Min. :-0.112400 Min. :-0.126781 Min. :-0.115613 ## 1st Qu.:-0.036656 1st Qu.:-0.034248 1st Qu.:-0.030358 ## Median :-0.005671 Median :-0.004321 Median :-0.003819 ## Mean : 0.000000 Mean : 0.000000 Mean : 0.000000 ## 3rd Qu.: 0.035644 3rd Qu.: 0.028358 3rd Qu.: 0.029844 ## Max. : 0.132044 Max. : 0.153914 Max. : 0.198788 ## hdl tch ltg ## Min. :-0.102307 Min. :-0.076395 Min. :-0.126097 ## 1st Qu.:-0.035117 1st Qu.:-0.039493 1st Qu.:-0.033249 ## Median :-0.006584 Median :-0.002592 Median :-0.001948 ## Mean : 0.000000 Mean : 0.000000 Mean : 0.000000 ## 3rd Qu.: 0.029312 3rd Qu.: 0.034309 3rd Qu.: 0.032433 ## Max. : 0.181179 Max. : 0.185234 Max. : 0.133599 ## glu ## Min. :-0.137767 ## 1st Qu.:-0.033179 ## Median :-0.001078 ## Mean : 0.000000 ## 3rd Qu.: 0.027917 ## Max. : 0.135612
par(mfrow=c(2,5)) for(i in 1:10){ plot(x[,i], y) abline(lm(y~x[,i])) }
layout(1) ############### # # # Exercise 3 # # # ############### model_ols <- lm(y ~ x) summary(model_ols)
## ## Call: ## lm(formula = y ~ x) ## ## Residuals: ## Min 1Q Median 3Q Max ## -155.829 -38.534 -0.227 37.806 151.355 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 152.133 2.576 59.061 < 2e-16 *** ## xage -10.012 59.749 -0.168 0.867000 ## xsex -239.819 61.222 -3.917 0.000104 *** ## xbmi 519.840 66.534 7.813 4.30e-14 *** ## xmap 324.390 65.422 4.958 1.02e-06 *** ## xtc -792.184 416.684 -1.901 0.057947 . ## xldl 476.746 339.035 1.406 0.160389 ## xhdl 101.045 212.533 0.475 0.634721 ## xtch 177.064 161.476 1.097 0.273456 ## xltg 751.279 171.902 4.370 1.56e-05 *** ## xglu 67.625 65.984 1.025 0.305998 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 54.15 on 431 degrees of freedom ## Multiple R-squared: 0.5177, Adjusted R-squared: 0.5066 ## F-statistic: 46.27 on 10 and 431 DF, p-value: < 2.2e-16
############### # # # Exercise 4 # # # ############### model_lasso <- glmnet(x, y) plot.glmnet(model_lasso, xvar = "norm", label = TRUE)
############### # # # Exercise 5 # # # ############### cv_fit <- cv.glmnet(x=x, y=y, alpha = 1, nlambda = 1000) plot.cv.glmnet(cv_fit)
cv_fit$lambda.min
## [1] 0.6499985
############### # # # Exercise 6 # # # ############### fit <- glmnet(x=x, y=y, alpha = 1, lambda=cv_fit$lambda.min) fit$beta
## 10 x 1 sparse Matrix of class "dgCMatrix" ## s0 ## age . ## sex -210.23480 ## bmi 524.02061 ## map 304.58443 ## tc -141.21684 ## ldl . ## hdl -195.91264 ## tch 43.11054 ## ltg 520.99839 ## glu 59.01632
############### # # # Exercise 7 # # # ############### cv_fit$lambda.1se
## [1] 7.762283
fit <- glmnet(x=x, y=y, alpha = 1, lambda=cv_fit$lambda.1se) fit$beta
## 10 x 1 sparse Matrix of class "dgCMatrix" ## s0 ## age . ## sex . ## bmi 493.08164 ## map 171.33388 ## tc . ## ldl . ## hdl -93.78863 ## tch . ## ltg 428.15531 ## glu .
############### # # # Exercise 8 # # # ############### model_ols2 <- lm(y~x2) summary(model_ols2)
## ## Call: ## lm(formula = y ~ x2) ## ## Residuals: ## Min 1Q Median 3Q Max ## -158.216 -30.809 -3.857 31.348 153.946 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 152.133 2.532 60.086 < 2e-16 *** ## x2age 50.721 65.513 0.774 0.4393 ## x2sex -267.344 65.270 -4.096 5.15e-05 *** ## x2bmi 460.721 84.601 5.446 9.32e-08 *** ## x2map 342.933 72.447 4.734 3.13e-06 *** ## x2tc -3599.542 60575.187 -0.059 0.9526 ## x2ldl 3028.281 53238.699 0.057 0.9547 ## x2hdl 1103.047 22636.179 0.049 0.9612 ## x2tch 74.937 275.807 0.272 0.7860 ## x2ltg 1828.210 19914.504 0.092 0.9269 ## x2glu 62.754 70.398 0.891 0.3733 ## x2age^2 67.691 69.470 0.974 0.3305 ## x2bmi^2 45.849 83.288 0.550 0.5823 ## x2map^2 -8.460 71.652 -0.118 0.9061 ## x2tc^2 6668.449 7059.159 0.945 0.3454 ## x2ldl^2 3583.174 5326.148 0.673 0.5015 ## x2hdl^2 1731.821 1590.574 1.089 0.2769 ## x2tch^2 773.374 606.967 1.274 0.2034 ## x2ltg^2 1451.581 1730.103 0.839 0.4020 ## x2glu^2 114.149 94.122 1.213 0.2260 ## x2age:sex 148.678 73.407 2.025 0.0435 * ## x2age:bmi -18.052 79.620 -0.227 0.8208 ## x2age:map 18.534 76.303 0.243 0.8082 ## x2age:tc -158.891 617.109 -0.257 0.7970 ## x2age:ldl -67.285 494.527 -0.136 0.8918 ## x2age:hdl 209.245 280.614 0.746 0.4563 ## x2age:tch 184.960 210.330 0.879 0.3798 ## x2age:ltg 124.667 223.765 0.557 0.5778 ## x2age:glu 62.575 80.377 0.779 0.4367 ## x2sex:bmi 64.612 77.902 0.829 0.4074 ## x2sex:map 88.472 74.744 1.184 0.2373 ## x2sex:tc 433.598 590.709 0.734 0.4634 ## x2sex:ldl -352.823 468.951 -0.752 0.4523 ## x2sex:hdl -124.731 273.870 -0.455 0.6491 ## x2sex:tch -131.223 199.714 -0.657 0.5115 ## x2sex:ltg -118.995 226.493 -0.525 0.5996 ## x2sex:glu 45.758 73.650 0.621 0.5348 ## x2bmi:map 154.720 86.340 1.792 0.0739 . ## x2bmi:tc -302.045 667.930 -0.452 0.6514 ## x2bmi:ldl 241.540 561.026 0.431 0.6671 ## x2bmi:hdl 121.942 329.884 0.370 0.7118 ## x2bmi:tch -33.445 230.836 -0.145 0.8849 ## x2bmi:ltg 114.673 255.987 0.448 0.6544 ## x2bmi:glu 23.377 91.037 0.257 0.7975 ## x2map:tc 478.303 682.264 0.701 0.4837 ## x2map:ldl -326.740 574.317 -0.569 0.5697 ## x2map:hdl -187.305 309.589 -0.605 0.5455 ## x2map:tch -58.294 198.601 -0.294 0.7693 ## x2map:ltg -154.795 271.966 -0.569 0.5696 ## x2map:glu -133.476 91.314 -1.462 0.1447 ## x2tc:ldl -9313.775 11771.220 -0.791 0.4293 ## x2tc:hdl -3932.025 3816.572 -1.030 0.3036 ## x2tc:tch -2205.910 1761.843 -1.252 0.2113 ## x2tc:ltg -3801.442 13166.091 -0.289 0.7729 ## x2tc:glu -176.295 595.459 -0.296 0.7673 ## x2ldl:hdl 2642.645 3165.926 0.835 0.4044 ## x2ldl:tch 1206.822 1470.512 0.821 0.4123 ## x2ldl:ltg 2773.697 10960.214 0.253 0.8004 ## x2ldl:glu 85.626 505.102 0.170 0.8655 ## x2hdl:tch 1188.406 1002.242 1.186 0.2365 ## x2hdl:ltg 1467.845 4609.793 0.318 0.7503 ## x2hdl:glu 217.541 296.749 0.733 0.4640 ## x2tch:ltg 389.805 624.671 0.624 0.5330 ## x2tch:glu 235.693 235.064 1.003 0.3167 ## x2ltg:glu 83.525 264.726 0.316 0.7525 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 53.23 on 377 degrees of freedom ## Multiple R-squared: 0.5924, Adjusted R-squared: 0.5233 ## F-statistic: 8.563 on 64 and 377 DF, p-value: < 2.2e-16
############### # # # Exercise 9 # # # ############### model_lasso1 <- glmnet(x2, y) plot.glmnet(model_lasso1, xvar = "norm", label = TRUE)
################ # # # Exercise 10 # # # ################ cv_fit1 <- cv.glmnet(x=x2, y=y, alpha = 1, nlambda = 1000) plot.cv.glmnet(cv_fit1)
fit1 <- glmnet(x=x2, y=y, alpha = 1, lambda=cv_fit1$lambda.min) fit1$beta
## 64 x 1 sparse Matrix of class "dgCMatrix" ## s0 ## age . ## sex -123.526483 ## bmi 501.154155 ## map 258.213165 ## tc . ## ldl . ## hdl -195.124006 ## tch . ## ltg 469.006546 ## glu 22.317103 ## age^2 13.329055 ## bmi^2 41.105173 ## map^2 . ## tc^2 . ## ldl^2 . ## hdl^2 . ## tch^2 . ## ltg^2 . ## glu^2 73.657901 ## age:sex 112.170736 ## age:bmi . ## age:map 30.398283 ## age:tc . ## age:ldl . ## age:hdl . ## age:tch . ## age:ltg 10.930534 ## age:glu 10.313587 ## sex:bmi . ## sex:map 4.446147 ## sex:tc . ## sex:ldl . ## sex:hdl . ## sex:tch . ## sex:ltg . ## sex:glu . ## bmi:map 88.506958 ## bmi:tc . ## bmi:ldl . ## bmi:hdl . ## bmi:tch . ## bmi:ltg . ## bmi:glu . ## map:tc . ## map:ldl . ## map:hdl . ## map:tch . ## map:ltg . ## map:glu . ## tc:ldl . ## tc:hdl . ## tc:tch . ## tc:ltg . ## tc:glu . ## ldl:hdl . ## ldl:tch . ## ldl:ltg . ## ldl:glu . ## hdl:tch . ## hdl:ltg . ## hdl:glu . ## tch:ltg . ## tch:glu . ## ltg:glu .
Are you aware of any R packages/exercises that could solve phase boundary DT type problems? There has been some recent work in Compressed Sensing using Linear L1 Lasso penalized regression that has found a large amount of the variance for height. I would be particularly interested in an exercise that could take simulated or otherwise genotypes and
could produce the phase boundary as in Figure 1 on page 20 of https://arxiv.org/pdf/1310.2264.pdf . Any idea of what software could duplicate Figure 1? Would love to extend the delta (x axis) in Figure 1 above 1.
Thank you for the CS resources that your are providing.