# function to compute the mean of a given vector. NA values are removed # x: numeric vector containing the values for which mean is to be computed mean.fun <- function(x){ if(any(is.na(x))) x <- x[!is.na(x)] res <- 0; n<-length(x) for(i in 1:n) # sum of vector components res <- res + x[i]; av <- res/n; return(av); } # function to compute the mean of a given vector. NA values are removed. Efficient version # x: numeric vector containing the values for which mean is to be computed mean.fun2 <- function(x){ if(any(is.na(x))) x <- x[!is.na(x)] res <- sum(x); av <- res/length(x); return(av); } # variance function # x: numeric vector containing the values for which variance is to be computed var.fun <- function(x){ if(any(is.na(x))) x <- x[!is.na(x)] mu <- mean.fun2(x) n<-length(x) y <- 0; for(i in 1:n) y <- y + (x[i] - mu)^2; var <- y/(n-1); return(var); } # variance function. Efficient version # x: numeric vector containing the values for which variance is to be computed var.fun2 <- function(x){ if(any(is.na(x))) x <- x[!is.na(x)] y <- 0; mu<-mean.fun2(x) y<-sum((x-mu)^2) var <- y/(length(x)-1); return(var); } # standard deviation function # x: numeric vector containing the values for which standard deviation is to be computed sd.fun <- function(x){ return(sqrt(var.fun2(x))) } # median function # x: numeric vector containing the values for which median is to be computed median.fun <- function(x){ x <- sort(x); # note: if the vector x contains same NA # values, sort function removes it n <- length(x); r <- n%%2; #r==0 se r is even, r==1 otherwise if(r==0){ y <- mean.fun2(c(x[n/2], x[n/2+1])) } else{ y <- x[(n+1)/2] } return(y); } # function to produce the k*100-th percentile # x: numeric vector containing the sample values # k: value in [0,1] for which (k*100)-th percentile of x is to be computed percentile.fun <- function(x, k){ if(k > 1) stop("error, k must be < 1") else{ y <- sort(x); n <- length(x); p <- k*n; y <- y[ceiling(p)]; names(y) <- paste(k*100,"%",sep=""); } return(y); } # absolute frequency function # x: numeric vector containing the values for which absolute frequency is to be computed absolute.frequency <- function(x){ v <- unique(x); v <- sort(v); n<-length(v) y <- rep(0, n); names(y) <- v; for(j in 1:n){ y[j] <- sum(x == v[j]); } return(y); } # relative frequency function # x: numeric vector containing the values for which relative frequency is to be computed relative.frequency <- function(x){ abs.freq <- absolute.frequency(x); return(abs.freq/length(x)); } # mode function # x: numeric vector containing the values for which mode is to be computed mode.fun <- function(x){ abs.freq <- absolute.frequency(x); mod <- abs.freq[abs.freq == max(abs.freq)]; return(mod); } # Gini heterogeneicity index # x: numeric vector containing the values for which gini index is to be computed gini <- function(x){ f <- relative.frequency(x); k <- length(f); if (k == 1) return(0) else{ I <- k * (1 - sum(f^2))/(k - 1); return(I); } } # Shannon Entropy # x: numeric vector containing the values for which Shannon entropy is to be computed entropia <- function(x){ f <- relative.frequency(x); k <- length(f); IA <- log(1/f); # log(1/f) == -log(f) HX <- sum(f*IA)/log(k); return(HX); }