learn.models <- function(obj, learner, learner.pars){ percents <- list() # for PERCENTS .1,.2, .3, .4, .5 for(i in 1:length(obj$data)){ pred <- real <- list() reg.data <- obj$data[[i]]$reg.data # for each 30 data variations for(j in 1:length(reg.data)){ aux.data <- reg.data[[j]] ind.na <- which(is.na(aux.data[,"target"])) test.data <- aux.data[ind.na,-(1:3)] # filter x, y, target train.data <- aux.data[-ind.na,-(1:2)] # filter x, y # learn model model <- train.model(train.data, learner, learner.pars) # predict test data, excluding 1 column which contains NA pred[[length(pred)+1]] <- predict(model, data.frame(test.data)) # get real data real[[length(real)+1]] <- mapply(function(x,y) obj$orig.data[x,y], aux.data[ind.na, "x"], aux.data[ind.na, "y"]) # get x, y of the NA }#end for 30 data variation percents[[length(percents)+1]] <- list(pred=pred, real=real) }#end for PERCENTS percents } train.model <- function(data,learner,learner.pars){ if(any(is.na(data))) stop("data has NAs") cat('*') model <- NULL new.pars <- list() if(learner %in% c("rpart","rpartXse")){ new.pars$form <- formula(paste(colnames(data)[1]," ~ .",sep="")) }else{ new.pars$formula <- formula(paste(colnames(data)[1]," ~ .",sep="")) } new.pars$data <- data.frame(data) new.pars <- c(new.pars,learner.pars) # trainning model <- do.call(learner,new.pars) model }#.train.model # in the future put in other file # tree.x0 <- tune.model(data=x0, learner="rpartXse", params=list(se=c(0,.5,1,1.5))) # svm.x0 <- tune.model(data=x0, learner="svm", params=list(cost=c(1,5,10,50,100),gamma=c(.001,.01,.05,.1))) # rf.x0 <- tune.model(data=x0, learner="randomForest", params=list(ntree=c(500,1000,1500))) tune.model <- function(data, learner, params){ model <- list() pars <- permuta.list(params) for(i in 1:length(pars)){ p <- pars[[i]] if(DEBUG) print(p) model[[paste(learner, print.list(p))]] <- learn.models(obj=data,learner=learner,learner.pars=p) } model } # m1.x0 <- learn.base.models(obj=x0, type="m1") # m2.x0 <- learn.base.models(obj=x0, type="m2") # m3.x0 <- learn.base.models(obj=x0, type="m3") # w1.x0 <- learn.base.models(obj=x0, type="w1") # w2.x0 <- learn.base.models(obj=x0, type="w2") # w3.x0 <- learn.base.models(obj=x0, type="w3") learn.base.models <- function(obj, type="m1"){ if(! type %in% c("m1","m2","m3","w1","w2","w3")) stop("type must be one of ") percents <- list() # for PERCENTS .1,.2, .3, .4, .5 for(i in 1:length(obj$data)){ pred <- real <- list() reg.data <- obj$data[[i]]$reg.data # for each 30 data variations for(j in 1:length(reg.data)){ aux.data <- reg.data[[j]] ind.na <- which(is.na(aux.data[,"target"])) # selecting the corresponding column... pred[[length(pred)+1]] <- aux.data[ind.na,type] # get real data ind.real <- aux.data[ind.na, c("x","y")] # get x, y of the NA real[[length(real)+1]] <- mapply(function(x,y) obj$orig.data[x,y], ind.real[,1], ind.real[,2]) }#end for 30 data variation percents[[length(percents)+1]] <- list(pred=pred, real=real) }#end for PERCENTS percents } learn.autokrige <- function(obj, nmax=NULL){ percents <- list() coordIn <- obj$allCoordIn # for PERCENTS .1,.2, .3, .4, .5 for(i in 1:length(obj$data)){ cat(i,'0%\n') pred <- real <- kcs <- list() gaps <- obj$data[[i]]$gaps # for each 30 data variations for(j in 1:length(gaps)){ cat(j,"\n") gap <- gaps[[j]] z <- cbind(merge(1:nrow(obj$orig.data),1:ncol(obj$orig.data)), gap) colnames(z) <- c("x","y","valor") ind.na <- which(is.na(z$valor)) loci <- as.data.frame(z[-ind.na,]) loci$x <- as.numeric(loci$x) loci$y <- as.numeric(loci$y) na.data <- z[ind.na, c("x","y")] na.data$x <- as.numeric(na.data$x) na.data$y <- as.numeric(na.data$y) # prepara for krige coordinates(loci) <- ~ x + y coordinates(na.data) <- ~ x + y # calcula o krige # adicionei o verbose, pra imprimir algumas infos dos modelos krige treinados # adicionei o nmax por causa de erros nos modelos x2, x7 e x9. Restringe o numero de vizinhos nas previsões... if(is.null(nmax)) nmax <- Inf kc <- autoKrige(formula=valor~1, input_data=loci, new_data=na.data, verbose=T, nmax=nmax) # predict pred[[as.character(j)]] <- kc$krige_output$var1.pred # real data real[[as.character(j)]] <- obj$vect.orig.data[ind.na] #mapply(function(x,y) obj$orig.data[x,y], na.data[,"x"], na.data[,"y"]) kcs[[as.character(j)]] <- kc }#end for 30 data variation percents[[as.character(i)]] <- list(pred=pred, real=real, kcs=kcs) }#end for PERCENTS percents }