# Function to partiton the vector v into n folds of aprroximately the same size # v, numeric vector # n, integer, number of folds # OUTPUT # res, vector of the same length as v, whose elements are the integers 1:n: # res[i] = k if v[i] falls in the k-th fold partition <- function(v, n=3){ v_len <- length(v) indices <- 1:v_len if(v_len < n) stop("Number of folds larger than the number of elements") size <- floor(v_len/n) res <- rep(0, v_len) for(i in 1:(n-1)){ tmp_fold <- sample(indices, size) res[tmp_fold] <- i indices <- setdiff(indices, tmp_fold) } # filling last fold res[indices] <- n return(res) } # Analogous to partition, with the constraint that v isa binary vector. # The function ensures the partion is stratified, i.d. each fold contains # (approximately) the same proportion of 1s # v, numeric 0/1 vector # n, integer, number of folds # OUTPUT # res, vector of the same length as v, whose elements are the integers 1:n: # res[i] = k if v[i] falls in the k-th fold # partition.bin <- function(v, n=3){ v_len <- length(v) pos.indices <- which(v == 1) neg.indices <- setdiff(1:v_len, pos.indices) if(v_len < n) stop("Number of folds larger than the number of elements") size.p <- floor(length(pos.indices)/n) size.n <- floor(length(neg.indices)/n) res <- rep(0, v_len) for(i in 1:(n-1)){ tmp_pos <- sample(pos.indices, size.p) tmp_neg <- sample(neg.indices, size.n) res[c(tmp_pos,tmp_neg)] <- i pos.indices <- setdiff(pos.indices, tmp_pos) neg.indices <- setdiff(neg.indices, tmp_neg) } # filling last fold res[c(pos.indices,neg.indices)] <- n # checking positive proportion pos.indices <- which(v == 1) neg.indices <- setdiff(1:v_len, pos.indices) for(i in 1:n) cat("Fold : ", i, "\t positive proportion : ", length(intersect(which(res==i),pos.indices))/length(intersect(which(res==i), neg.indices)), "\n"); return(res) }