Machine Learning/kdd r: Difference between revisions
Jump to navigation
Jump to search
ThomasLotze (talk | contribs) Created page with '<pre> algebra <- read.csv("algebra_2008_2009_train.txt_sample_1000_random_students.txt", sep="\t", header=TRUE) str(algebra) algebra_test <- read.csv("algebra_2008_2009_test.txt…' |
ThomasLotze (talk | contribs) mNo edit summary |
||
| Line 8: | Line 8: | ||
# Random Forest Fail | # Random Forest Fail | ||
library(randomForest) | library(randomForest) | ||
my_model <- randomForest(Correct.First.Attempt ~ Step.Name + Anon.Student.Id + Problem.Hierarchy + Problem.Name, data=algebra) | my_model <- randomForest(as.factor(Correct.First.Attempt) ~ Step.Name + Anon.Student.Id + Problem.Hierarchy + Problem.Name, data=algebra) | ||
# glm fail | # glm fail | ||
Latest revision as of 22:44, 12 May 2010
algebra <- read.csv("algebra_2008_2009_train.txt_sample_1000_random_students.txt", sep="\t", header=TRUE)
str(algebra)
algebra_test <- read.csv("algebra_2008_2009_test.txt", sep="\t", header=TRUE)
str(algebra_test)
# Random Forest Fail
library(randomForest)
my_model <- randomForest(as.factor(Correct.First.Attempt) ~ Step.Name + Anon.Student.Id + Problem.Hierarchy + Problem.Name, data=algebra)
# glm fail
my_model <- glm(Correct.First.Attempt ~ Step.Name + Anon.Student.Id + Problem.Hierarchy + Problem.Name, family="binomial", data=algebra)
forecasts = predict(my_model, newdata=algebra_test)
forecast_frame = data.frame(Row=algebra_test$Row,Correct.First.Attempt=forecasts)
write_table(forecast_frame, file="algenra_2008_2009_submission_r.txt", sep="\t", header=TRUE)