source("http://scg.sdsu.edu/wp-content/uploads/2013/09/dataprep.r") library(randomForest) library(ROCR) library(nnet) library(rpart) # Logistic Regression logit.fit = glm(income ~.-relationship, family = binomial(logit),data = data$train) logit.preds = predict(logit.fit,newdata=data$val,type="response") logit.pred = prediction(logit.preds,data$val$income) logit.perf = performance(logit.pred,"tpr","fpr") # Random Forest bestmtry <- tuneRF(data$train[-13],data$train$income, ntreeTry=100, stepFactor=1.5,improve=0.01, trace=TRUE, plot=TRUE, dobest=FALSE) rf.fit <-randomForest(income~.,data=data$train, mtry=2, ntree=1000, keep.forest=TRUE, importance=TRUE,test=data$val) rf.preds = predict(rf.fit,type="prob",newdata=data$val)[,2] rf.pred = prediction(rf.preds, data$val$income) rf.perf = performance(rf.pred,"tpr","fpr") # CART Trees mycontrol = rpart.control(cp = 0, xval = 10) tree.fit = rpart(income~., method = "class",data = data$train, control = mycontrol) tree.fit$cptable tree.cptarg = sqrt(tree.fit$cptable[8,1]*tree.fit$cptable[9,1]) tree.prune = prune(tree.fit,cp=tree.cptarg) tree.preds = predict(tree.prune,newdata=data$val,type="prob")[,2] tree.pred = prediction(tree.preds,data$val$income) tree.perf = performance(tree.pred,"tpr","fpr") # Neural Network nnet.fit = nnet(income~., data=data$train,size=20,maxit=10000,decay=.001) nnet.preds = predict(nnet.fit,newdata=data$val,type="raw") nnet.pred = prediction(nnet.preds,data$val$income) nnet.perf = performance(nnet.pred,"tpr","fpr") # Plotting ROC Curves plot(logit.perf,col=2,lwd=2,main="ROC Curve for Classifiers on Adult Dataset") plot(rf.perf,col=3,lwd=2,add=T) plot(tree.perf,lwd=2,col=4,add=T) plot(nnet.perf,lwd=2,col=5,add=T) abline(a=0,b=1,lwd=2,lty=2,col="gray") legend("bottomright",col=c(2:5),lwd=2,legend=c("logit","RF","CART","Neural Net"),bty='n')