# R functions by (or adapted by) Kyle Gorman goodman <- function(x, y) { require(Hmisc) return(as.numeric(rcorr.cens(x, y, outx=TRUE)['Dxy'])) detach('package:Hmisc') } standardize <- function(x) { # standardize the vector return((x - mean(x)) / (2 * sd(x))) } drop <- function(dat) { # given a subset of a data frame, remove all the empty levels # based on code by Florian Jaeger dat[] <- lapply(dat, function(x) x[, drop=1]) return(dat) } fc.switch <- function() { # switches coding of factors between sum and treatment coding if(getOption('contrasts')[1] == 'contr.treatment') { options(contrasts = c('contr.sum', 'contr.poly')) print(noquote('unordered factors set to sum (contrast) coding')) } else { options(contrasts = c('contr.treatment', 'contr.poly')) print(noquote('unordered factors set to treatment coding')) } } degree2rad <- function(x) { # convert degrees to radians return(x * pi / 180) } haversine <- function(lata, lnga, latb, lngb) { # compute great-circle distance (in Km) between two points on earth. # based on: http://www.movable-type.co.uk/scripts/latlong.html cLng <- cos(rad(lngb) - rad(lnga)) latA <- rad(lata) latB <- rad(latb) return(acos(sin(latA) * sin(latB) + cos(latA) * cos(latB) * cLng) * 6371) } hz2mel <- function(f) { # convert Hertz to Mels return(1127.01048 * log(1 + f / 700.)) } mel2hz <- function(f) { # convert Mels to Hertz return(700. * (exp(m / 1127.01048) - 1.)) } pdf2cdf <- function(x) { # make a sorted PDF into a sorted CDF s <- 0 # sum i <- 0 # counter for (i in 1:length(x)) { x[i] <- x[i] + s s <- x[i] } return(x) } ran.outliers <- function(model, p = .05) { require(arm) # identifies random outliers given a p-criterion, from an lmer/lmer2 object est <- ranef(model) se <- se.ranef(model) groupings <- names(est) i <- 1 # keep track of where we are grouping for(zest in est) { # over groupings j <- 1 # keep track of where we are in se's x-given-group for(effect in names(zest)) { # over effects in grouping k <- 1 levels <- row.names(zest[effect]) print(noquote(paste(effect, groupings[i], sep='|'))) for (val in zest[effect][,]) { pval <- pnorm(val, 0, se[[i]][k, j]) if (pval < p) { # signif print(noquote(paste(' ', levels[k], val, pval))) } k <- k + 1 } j <- j + 1 } i <- i + 1 } } tolerance <- function(x, y) { # computes Tolerance (after Yang 2005) z <- x + y return((z / log(z)) > y) } height <- function(f1, f2) { # computes height (after Labov 1994, p. 458) return(sqrt(abs((f2 * f2) - (2 * f1) ^ 2))) } peripherality <- function(f2) { # computes peripherality (after Labov 1994, p. 458) return(sqrt((f2 * f2) + (2 * f2) ^ 2)) } Zr.nr <- function(r, nr) { # compute a smoothed freq distribution using Z_r statistic zro <- nr zro[1] <- zro[1] / (r[2] - r[1]) L <- length(nr) i <- 2 while (i < L) { zro[i] <- 2 * zro[i] / (r[i + 1] - r[i - 1]) i <- i + 1 } zro[L] <- zro[L] / (r[L] - r[L - 1]) return(zro) } Zr.f <- function(f) { # f is a bowl of frequencies, returns a data frame you can plot q <- rle(sort(f)) r <- q$values nr <- q$lengths Zr <- Zr.nr(r, nr) return(data.frame(r, nr, Zr)) } freq2hit <- function(x) { # turn a frequency spectrum into a list of frequency hits hitz <- c() cour <- 1 for (i in x) { hitz <- c(hitz, rep(cour, i)) cour <- cour + 1 } return(hitz) } spc.frame <- function(observed, expected, m.max=15) { # given observed (.spc) and expected (.fzm.spc), creates an data frame observed.Vm <- freq2hit(Vm(observed, 1:m.max)) expected.Vm <- freq2hit(Vm(expected, 1:m.max)) freq <- c(observed.Vm, expected.Vm) Type <- c(rep('Observed', length(observed.Vm))) Type <- factor(c(Type, rep('Expected', length(expected.Vm))), levels=c('Observed', 'Expected')) return(data.frame(freq, Type)) } hypot <- function(x, y) { # hypotenuse length function that can never overflow, based on libc # function of the same name. if (y > x) { temp = y y = x x = temp } return(abs(x) * sqrt(1. + (y / x) ^ 2)) } sum.coef <- function(model) { # function for computing log-odds. ASSUMES SUM CODING if (class(model) %in% c('lme', 'mer')) coefs <- fixef(model) else coefs <- coef(model) new.names <- vector() new.values <- vector() n <- names(coefs) h <- gsub("\\d", "", n) for (hh in unique(h)) { new.names <- c(new.names, n[h==hh]) new.values <- c(new.values, (x <- coefs[h==hh])) if (hh == "(Intercept)") next new.names <- c(new.names,paste(hh, 0, sep="")) new.values <- c(new.values, -sum(x)) } names(new.values) <- new.names return(new.values) } sf <- function(y) { # function for generating proportional odds assumption table, based on # http://www.ats.ucla.edu/stat/r/dae/ologit.htm return(c('Y>=0'=qlogis(mean(y >= 0)),'Y>=1'=qlogis(mean(y >= 1)), 'Y>=2'=qlogis(mean(y >= 2)))) } IR.scores <- function(tab) { # print out classic IR scores tp <- tab[2, 2] fp <- tab[1, 2] fn <- tab[2, 1] tn <- tab[1, 1] b <- max(sum(tab[,1]), sum(tab[, 2])) / sum(tab) p <- tp / (tp + fp) r <- tp / (tp + fn) f <- fp / (tn + fp) acc <- (tp + tn) / (tp + tn + fp + fn) F1 <- (2. * p * r) / (p + r) cat(paste('baseline = ', format(b, digits=3)), '\n') cat(paste('accuracy = ', format(acc, digits=3)), '\n') cat(paste('precision = ', format(p, digits=3)), '\n') cat(paste('recall = ', format(hr, digits=3)), '\n') cat(paste('F1 = ', format(F1, digits=3)), '\n') cat(paste('MCC = ', format(MCC(tp, tn, fp, fn), digits=3)), '\n') cat(paste('Aprime = ', format(a.prime(r, f), digits=3)), '\n') cat(paste('Bprime = ', format(b.prime(r, f), digits=3)), '\n') } MCC <- function(tp, tn, fp, fn) { # compute Matthews correlation coefficient prod <- log2(tp + fp) + log2(tp + fn) + log2(tn + fp) + log2(tn + fn) return((tp * tn - fp * fn) / sqrt(2 ^ prod)) } a.prime <- function(hr, fr) { # compute A' discriminability statistic (after Grier 1970) if (fr > hr) { tp <- hr hr <- fr fr <- tp } return(.5 + ((hr - fr) * (1. + hr - fr) / (4. * hr * (1. - fr)))) } b.prime <- function(hr, fr) { # compute B' bias stat (after Donaldson 1992) if (fr > hr) { tp <- hr hr <- fr fr <- tp } prod <- hr * fr return(((1. - hr) * (1. - fr) - prod) / ((1. - hr) * (1. - fr) + prod)) } d.prime <- function(hr, fr) { # print D' detection statistic (after Macmillan & Creelman 2005) if (fr > hr) { tp <- hr hr <- fr fr <- tp } return(qnorm(hr, 0., 1.) - qnorm(fr, 0., 1.)) } logx <- function() { require(scales) return(scale_x_continuous(trans=log10_trans(), breaks=trans_breaks('log10', function(x) 10^x), labels=trans_format('log10', math_format(10^.x)))) } logy <- function() { require(scales) return(scale_y_continuous(trans=log10_trans(), breaks=trans_breaks('log10', function(x) 10^x), labels=trans_format('log10', math_format(10^.x)))) }