# Function to compute the Euclidean distance between vectors x and y
d.Eucl <- function (x,y){
	if(length(x) != length(y))stop("Vectors x and y must have the same length.")
	return(sqrt(sum((x-y)^2)))	
}

# Function to compute the matrix of distances D for instances in matrix M.
# D[i,j]=D[j,i] is the distance between vectors M[i,] and M[j,].
d.Eucl.mat <- function (M){
	n <- nrow(M)
	rnames <- rownames(M)
	D <- matrix(0, nrow=n, ncol=n)
	rownames(D) <- colnames(D) <- rnames
	for(k in 1:(n-1)){
		y <- as.vector(M[k,])
		# computes the distances of the current vector to the remaining vectors
		D[k, k:n] <- apply(M[k:n, ], 1, function(x,y){return(d.Eucl(x,y))}, y = y)
		# distance is symmetric
		D[k:n, k] <- D[k, k:n]
	}
	return(D)
}

# Like d.Eucl.mat, but it uses the R library function dist, which calls C routines 
# implementing the loops.
d.Eucl.mat2 <- function (M){
	return(as.matrix(dist(M, method = "euclidean")))	
}

#Function to compute the everage number of wrong predictions in pred with regard
# to labels in y. Both y and pred are -1/1 valued vectors.
compute.avg.err <- function(y, pred){
	return(sum(y!=pred)/length(y))
}


# M nxd instance matrix. y -1/1 label vector, train and test numeric vectors 
# partitioning the rows of matrix M.
# Output: the best k for kNN classifier 
train.kNN.test <- function(M,y,train,test){
  n <- nrow(M)
  if(length(intersect(train, test))>0 || length(setdiff(1:n,union(train,test)))!=0)
  		stop("train and test must constitute a partition in rows in M.")
  dist <-  d.Eucl.mat2(M);
  k.vect <- 1:50;
  y.tmp <- y
  y.tmp[test] <- 0; # hiding test labels
  train.err <- rep(0, length(k.vect))
  test.err <- rep(0, length(k.vect))
  for(k in 1:length(k.vect)){
  		# predicting test instances with kNN
  		pred <- apply(dist[test, ], 1, function(x, y, k){
			b <- order(x, decreasing=FALSE)
			return(sign(sum(y[b[1:k]])))	
			}, y=y.tmp, k=k.vect[k]);
		# default -1, when having same number of pos and neg neighbours
		pred[pred==0] <- -1
		test.err[k] <- compute.avg.err(y[test], pred)
		# predicting train instances
  		pred <- apply(dist[train, train], 1, function(x, y, k){
			b <- order(x, decreasing=FALSE)
			return(sign(sum(y[b[1:k]])))	
			}, y=y.tmp[train], k=k.vect[k]);
		pred[pred==0] <- -1
		train.err[k] <- compute.avg.err(y[train], pred)	
  }
  k.best	<- which(test.err==min(test.err))
  if(length(k.best) > 1) 
  	k.best <- k.best[which(train.err[k.best] == min(train.err[k.best]))]
  k.best<- k.best[1]	
  png("train_test_err.png")
  plot(k.vect, train.err, xlim=range(k.vect), ylim=c(min(train.err,test.err),max(train.err,test.err)), ylab="Error rate", xlab="k", 
  				cex.lab=1.3, cex.axis=1.3, col="blue", type="l")
  lines(k.vect, test.err, cex.lab=1.3, cex.axis=1.3, col="red", lty=2)
  legend(x="topright", legend=c("Training error", "Test error"), 
			col=c("blue", "red"), lty=1:2, cex=1.2)	
  dev.off();
  return(k.vect[k.best])
}