文書の過去の版を表示しています。
学習誤差と予測誤差
コード
貼り付け用
準備
library(class) generate.data <- function(n, p, k, setting) { X <- NULL y <- NULL if( setting==1 ) { X <- rbind(X, cbind(rnorm(ceiling(n/2), mean=0, sd=1), rnorm(ceiling(n/2), mean=0, sd=1)) ) y <- rbind(y, as.matrix(array(0, dim=c(ceiling(n/2)) ) ) ) X <- rbind(X, cbind(rnorm(floor(n/2), mean=2, sd=1), rnorm(floor(n/2), mean=2, sd=1)) ) y <- rbind(y, as.matrix(array(1, dim=c(floor(n/2)) ) ) ) Data <- cbind(X,y) colnames(Data) <- c("X.1", "X.2", "y") Data.ret <- Data[sample(c(1:n)),] return(Data.ret) } } split.data <- function(dataset, n.learn) { data.learn <- dataset[c(1:n.learn),] data.eval <- dataset[-c(1:n.learn),] return(list(learn=data.learn, eval=data.eval)) }
設定
m <- 1000 p <- 2 k <- 2 n.learn <- 500 n.eval <- 200 n <- n.learn + n.eval
シミュレーション実験の実施
error.rate.learn <- NULL error.rate.eval <- NULL for( i in c(1:m) ) { error.temp.learn <- NULL error.temp.eval <- NULL data.gen <- generate.data(n,2,2,setting=1) data.split <- split.data(data.gen, n.learn) data.learn <- data.frame(data.split$learn) data.eval <- data.frame(data.split$eval) # lm data.lm <- lm(y~X.1+X.2, data=data.learn) data.pred <- predict(data.lm, newdata=data.eval) data.fit <- fitted(data.lm) error.temp.learn <- append(error.temp.learn, 1-sum(abs(data.learn$y-data.fit)<0.5)/n.learn) error.temp.eval <- append(error.temp.eval, 1-sum(abs(data.eval$y-data.pred)<0.5)/n.eval) # knn:1 data.fit <- knn(data.learn[,c(1:2)], data.learn[,c(1:2)], data.learn[,c(3)], k=1, prob=FALSE) data.pred <- knn(data.learn[,c(1:2)], data.eval[,c(1:2)], data.learn[,c(3)], k=1, prob=FALSE) error.temp.learn <- append(error.temp.learn, 1-sum(abs(data.learn$y-(as.numeric(data.fit)-1))<0.5)/n.learn) error.temp.eval <- append(error.temp.eval, 1-sum(abs(data.eval$y-(as.numeric(data.pred)-1))<0.5)/n.eval) # knn:3 data.fit <- knn(data.learn[,c(1:2)], data.learn[,c(1:2)], data.learn[,c(3)], k=3, prob=FALSE) data.pred <- knn(data.learn[,c(1:2)], data.eval[,c(1:2)], data.learn[,c(3)], k=3, prob=FALSE) error.temp.learn <- append(error.temp.learn, 1-sum(abs(data.learn$y-(as.numeric(data.fit)-1))<0.5)/n.learn) error.temp.eval <- append(error.temp.eval, 1-sum(abs(data.eval$y-(as.numeric(data.pred)-1))<0.5)/n.eval) # knn:5 data.fit <- knn(data.learn[,c(1:2)], data.learn[,c(1:2)], data.learn[,c(3)], k=5, prob=FALSE) data.pred <- knn(data.learn[,c(1:2)], data.eval[,c(1:2)], data.learn[,c(3)], k=5, prob=FALSE) error.temp.learn <- append(error.temp.learn, 1-sum(abs(data.learn$y-(as.numeric(data.fit)-1))<0.5)/n.learn) error.temp.eval <- append(error.temp.eval, 1-sum(abs(data.eval$y-(as.numeric(data.pred)-1))<0.5)/n.eval) # knn:7 data.fit <- knn(data.learn[,c(1:2)], data.learn[,c(1:2)], data.learn[,c(3)], k=7, prob=FALSE) data.pred <- knn(data.learn[,c(1:2)], data.eval[,c(1:2)], data.learn[,c(3)], k=7, prob=FALSE) error.temp.learn <- append(error.temp.learn, 1-sum(abs(data.learn$y-(as.numeric(data.fit)-1))<0.5)/n.learn) error.temp.eval <- append(error.temp.eval, 1-sum(abs(data.eval$y-(as.numeric(data.pred)-1))<0.5)/n.eval) # knn:9 data.fit <- knn(data.learn[,c(1:2)], data.learn[,c(1:2)], data.learn[,c(3)], k=9, prob=FALSE) data.pred <- knn(data.learn[,c(1:2)], data.eval[,c(1:2)], data.learn[,c(3)], k=9, prob=FALSE) error.temp.learn <- append(error.temp.learn, 1-sum(abs(data.learn$y-(as.numeric(data.fit)-1))<0.5)/n.learn) error.temp.eval <- append(error.temp.eval, 1-sum(abs(data.eval$y-(as.numeric(data.pred)-1))<0.5)/n.eval) # knn:15 data.fit <- knn(data.learn[,c(1:2)], data.learn[,c(1:2)], data.learn[,c(3)], k=15, prob=FALSE) data.pred <- knn(data.learn[,c(1:2)], data.eval[,c(1:2)], data.learn[,c(3)], k=15, prob=FALSE) error.temp.learn <- append(error.temp.learn, 1-sum(abs(data.learn$y-(as.numeric(data.fit)-1))<0.5)/n.learn) error.temp.eval <- append(error.temp.eval, 1-sum(abs(data.eval$y-(as.numeric(data.pred)-1))<0.5)/n.eval) # knn:21 data.fit <- knn(data.learn[,c(1:2)], data.learn[,c(1:2)], data.learn[,c(3)], k=21, prob=FALSE) data.pred <- knn(data.learn[,c(1:2)], data.eval[,c(1:2)], data.learn[,c(3)], k=21, prob=FALSE) error.temp.learn <- append(error.temp.learn, 1-sum(abs(data.learn$y-(as.numeric(data.fit)-1))<0.5)/n.learn) error.temp.eval <- append(error.temp.eval, 1-sum(abs(data.eval$y-(as.numeric(data.pred)-1))<0.5)/n.eval) # knn:25 data.fit <- knn(data.learn[,c(1:2)], data.learn[,c(1:2)], data.learn[,c(3)], k=31, prob=FALSE) data.pred <- knn(data.learn[,c(1:2)], data.eval[,c(1:2)], data.learn[,c(3)], k=31, prob=FALSE) error.temp.learn <- append(error.temp.learn, 1-sum(abs(data.learn$y-(as.numeric(data.fit)-1))<0.5)/n.learn) error.temp.eval <- append(error.temp.eval, 1-sum(abs(data.eval$y-(as.numeric(data.pred)-1))<0.5)/n.eval) # knn:51 data.fit <- knn(data.learn[,c(1:2)], data.learn[,c(1:2)], data.learn[,c(3)], k=51, prob=FALSE) data.pred <- knn(data.learn[,c(1:2)], data.eval[,c(1:2)], data.learn[,c(3)], k=51, prob=FALSE) error.temp.learn <- append(error.temp.learn, 1-sum(abs(data.learn$y-(as.numeric(data.fit)-1))<0.5)/n.learn) error.temp.eval <- append(error.temp.eval, 1-sum(abs(data.eval$y-(as.numeric(data.pred)-1))<0.5)/n.eval) # knn:75 data.fit <- knn(data.learn[,c(1:2)], data.learn[,c(1:2)], data.learn[,c(3)], k=75, prob=FALSE) data.pred <- knn(data.learn[,c(1:2)], data.eval[,c(1:2)], data.learn[,c(3)], k=75, prob=FALSE) error.temp.learn <- append(error.temp.learn, 1-sum(abs(data.learn$y-(as.numeric(data.fit)-1))<0.5)/n.learn) error.temp.eval <- append(error.temp.eval, 1-sum(abs(data.eval$y-(as.numeric(data.pred)-1))<0.5)/n.eval) # knn:101 data.fit <- knn(data.learn[,c(1:2)], data.learn[,c(1:2)], data.learn[,c(3)], k=101, prob=FALSE) data.pred <- knn(data.learn[,c(1:2)], data.eval[,c(1:2)], data.learn[,c(3)], k=101, prob=FALSE) error.temp.learn <- append(error.temp.learn, 1-sum(abs(data.learn$y-(as.numeric(data.fit)-1))<0.5)/n.learn) error.temp.eval <- append(error.temp.eval, 1-sum(abs(data.eval$y-(as.numeric(data.pred)-1))<0.5)/n.eval) # knn:201 data.fit <- knn(data.learn[,c(1:2)], data.learn[,c(1:2)], data.learn[,c(3)], k=201, prob=FALSE) data.pred <- knn(data.learn[,c(1:2)], data.eval[,c(1:2)], data.learn[,c(3)], k=201, prob=FALSE) error.temp.learn <- append(error.temp.learn, 1-sum(abs(data.learn$y-(as.numeric(data.fit)-1))<0.5)/n.learn) error.temp.eval <- append(error.temp.eval, 1-sum(abs(data.eval$y-(as.numeric(data.pred)-1))<0.5)/n.eval) # knn:301 data.fit <- knn(data.learn[,c(1:2)], data.learn[,c(1:2)], data.learn[,c(3)], k=301, prob=FALSE) data.pred <- knn(data.learn[,c(1:2)], data.eval[,c(1:2)], data.learn[,c(3)], k=301, prob=FALSE) error.temp.learn <- append(error.temp.learn, 1-sum(abs(data.learn$y-(as.numeric(data.fit)-1))<0.5)/n.learn) error.temp.eval <- append(error.temp.eval, 1-sum(abs(data.eval$y-(as.numeric(data.pred)-1))<0.5)/n.eval) error.rate.learn <- rbind(error.rate.learn, error.temp.learn) error.rate.eval <- rbind(error.rate.eval, error.temp.eval) } colnames(error.rate.learn) <- c("lm", "knn.1", "knn.3", "knn.5", "knn.7", "knn.9", "knn.15", "knn.21", "knn.25", "knn.51", "knn.75", "knn.101", "knn.201", "knn.301") rownames(error.rate.learn) <- c(1:m) colnames(error.rate.eval) <- c("lm", "knn.1", "knn.3", "knn.5", "knn.7", "knn.9", "knn.15", "knn.21", "knn.25", "knn.51", "knn.75", "knn.101", "knn.201", "knn.301") rownames(error.rate.eval) <- c(1:m)
結果のグラフ
boxplot(error.rate.learn)
結果のグラフ
boxplot(error.rate.eval)