##
# Copyright (c) 2011 Fred Hutchinson Cancer Research Center
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#
http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##
# source ("
http://youtil.googlecode.com/files/youtil.R")
# author: Youyi Fong, mostly, if not specified otherwise
# some are modified base functions, usually with names such as mypostscript
# some are short convenience functions, naming convention is a mix of the R convention, first.last, and the Java convention, firstLast
# some are useful functions copied from others
# no warranty whatsoever
# bug reports, comments, suggestions: yfong@u.washington.edu
# most useful functions:
# mypostscript
# mytex
# %+% # this operator add up two strings, e.g. "a" %+% "b"
# getFormattedSummary
# most most dangerous functions (commented out):
# "[" is redefined so that the default behavior for drop is FALSE
# all functions are grouped into the following categories
# string functions
# matrix functions
# regression functions
# misc functions
library (xtable) # writen by David Dahl, needed for making latex tables
#########################################################
### string functions
#########################################################
# paste two strings together
# e.g. "a" %+% "b"
"%+%" <- function (a, b) {
if (is.numeric(a) & is.numeric(b)) out=sum(a,b)
else out=paste(a,b,sep="")
out
}
# many code in the packages expect the default behavior, thus it is dangerous to do this
#"[" <- function (x, ...) {
# base::"["(x, ...,drop=F)
#}
firstIndex =function (s1, s2) {
k=nchar (s2)
ret=-1;
for (i in 1:(nchar(s1)-k+1) ) {
if (substr(s1, i, i+k-1)==s2) {
ret=i;
break;
}
}
ret
}
lastIndex =function (s1, s2) {
k=nchar (s2)
ret=-1;
for (i in 1:(nchar(s1)-k+1) ) {
if (substr(s1, i, i+k-1)==s2) {
ret=i;
}
}
ret
}
# return TRUE if s1 starts with s2?
startsWith=function(s1, s2){
sapply (s1, function (s) {
if ( substring (s, 1, nchar(s2)) == s2 ) {
return (T);
} else {
return (F);
}
})
}
# return TRUE if s1 contains s2
contain =function (s1, s2) {
k=nchar (s2)
matched=FALSE
for (i in 1:(nchar(s1)-k+1) ) {
if (substr(s1, i, i+k-1)==s2) {
matched=TRUE
}
}
matched
}
####################################################
# Matrix functions
####################################################
# serial covariance matrix
AR1 = function (p, w) {
m = matrix(1, p, p)
for (i in 1:p) {
for (j in 1:p) {
m [i,j]=w**abs(i-j)
}
}
m
}
# exchangeable covariance matrix
EXCH = function (p, rho) {
m = matrix(1, p, p)
for (i in 1:p) {
for (j in 1:p) {
if (i!=j) m [i,j]=rho
}
}
m
}
# matrix trace
tr=function (m) {
s=0
for (i in 1:length(m[1,])) {
s=s+m[i,i]
}
s
}
getUpperRight = function (matri, func=NULL) {
n=nrow (matri)
out= numeric ( (n-1)*n/2 )
index=0
for (i in 1:(n-1)) {
for (j in (i+1):n) {
index=index+1
out[index]=matri[i,j]
}
}
if (is.null(func)) {
out
} else {
func(out)
}
}
#repeat a matrix in a block diagonal fashion
rep.matrix.block = function (.matrix, times=2) {
orig.dim = nrow (.matrix)
m = matrix (0, orig.dim * times, orig.dim * times)
for (i in 1: times) {
m[(1+(i-1)*orig.dim):(i*orig.dim), (1+(i-1)*orig.dim):(i*orig.dim)]=.matrix
}
m
}
#it does not work on data.frame
rep.matrix = function (.matrix, times=1, each=1, by.row=T) {
if (by.row) {
new.matrix=matrix(0, nrow(.matrix)*each*times, ncol(.matrix) )
for (i in 1:nrow(.matrix)) {
for (j in 1:each) {
new.matrix[(i-1)*each + j,] = .matrix[i,]
}
}
if(times>1) {
for (i in 2:times) {
new.matrix[ ((i-1)*nrow(.matrix)*each+1) : (i*nrow(.matrix)*each), ] = new.matrix[1:(nrow(.matrix)*each), ]
}
}
new.matrix
}
else {
t ( rep.matrix(t(.matrix), times, each, by.row=T) )
}
}
# rep.data.frame(chi[21,], 2)
rep.data.frame = function (.data.frame, times=1, by.row=T){
out = .data.frame
for (i in 1:times) {
out = rbind (out, .data.frame)
}
out
}
##############################################################
# misc functions
##############################################################
#binomial coefficient
binom.coef=function(n,m) { prod((n-m+1):n)/prod(1:m) }
# change the default sep
cat=function (..., file = "", sep = "", fill = FALSE, labels = NULL,
append = FALSE)
{
if (is.character(file))
if (file == "")
file <- stdout()
else if (substring(file, 1, 1) == "|") {
file <- pipe(substring(file, 2), "w")
on.exit(close(file))
}
else {
file <- file(file, ifelse(append, "a", "w"))
on.exit(close(file))
}
.Internal(cat(list(...), file, sep, fill, labels, append))
}
myboxplot=function(formula,data,showNames=T, subset=NULL,... ){
if (!is.null(subset))
data = subset (data, subset)
names = boxplot(formula, data=data)$names
resp = as.character ( formula )[[2]]
splittedforms = splitFormula( formula , "+")
groupvars = list (length(splittedforms ))
for (i in 1:length(splittedforms ) ) {
name = as.character ( splittedforms [[i]] )[2]
groupvars[[i]] = data [,name]
}
groupeddata = split (data, groupvars)
summarydata = sapply ( groupeddata , function (y) { sd1=sd(y[resp]); m1=mean(y[resp]); c ( m1-2*sd1, m1-1*sd1, m1, m1+1*sd1, m1+2*sd1 ) } )
ifelse(showNames,
bxp( list(stats= summarydata, names= names ), ...),
bxp( list(stats= summarydata, names=rep("", length (groupeddata ) ) ), ...)
)
names
}
sign.test = function (diff) {
binom.test( sum( diff > 0 , na.rm=T) , sum( diff != 0 , na.rm=T), p = 0.5,
alternative = c("two.sided", "less", "greater"),
conf.level = 0.95)
}
# if ret.mat is set to true, always return a matrix
# in the output, each row corresponds to one element of X, instead of each column
mysapply=function (X, FUN, ..., simplify = TRUE, USE.NAMES = TRUE, ret.mat=T)
{
if (is.null(names(X)) & is.numeric(X)) names(X)=X%+%""
FUN <- match.fun(FUN)
answer <- lapply(X, FUN, ...)
if (USE.NAMES && is.character(X) && is.null(names(answer)))
names(answer) <- X
if (simplify && length(answer) && length(common.len <- unique(unlist(lapply(answer,
length)))) == 1) {
if (common.len >= 1)
if (common.len == 1 & !ret.mat)
unlist(answer, recursive = FALSE)
else
t(array(unlist(answer, recursive = FALSE), dim = c(common.len,
length(X)), dimnames = if (!(is.null(n1 <- names(answer[[1]])) &
is.null(n2 <- names(answer))))
list(n1, n2)))
else t(answer)
}
else t(answer)
}
# test mysapply
answer=lapply(1:3, function (i) if (i==2) rep(NA,2) else 1:3 )
answer=mysapply(1:3, function (i) if (i==2) rep(NA,2) else 1:3 )
# This function process all columns of x together instead of processing them one at a time
# FUN can return an array or a list. It does not have to return a scalar. This
# saves from having to redo grouping for every col that has to be returned
# also this eliminates the necessity to process a column of x at a time
myaggregate = function (x, by, FUN, new.col.name="aggregate.value", ...)
{
if (!is.data.frame(x))
x <- as.data.frame(x)
if (!is.list(by))
stop(paste(sQuote("by"), "must be a list"))
if (is.null(names(by)))
names(by) <- paste("Group", seq(along = by), sep = ".")
else {
nam <- names(by)
ind <- which(nchar(nam) == 0)
names(by)[ind] <- paste("Group", ind, sep = ".")
}
#original
#y <- lapply(x, tapply, by, FUN, ..., simplify = FALSE)
#modified
z=tapply2(x, by, FUN, ...)
#original
#if (any(sapply(unlist(y, recursive = FALSE), length) > 1))
# stop(paste(sQuote("FUN"), "must always return a scalar"))
#z <- y[[1]]
d <- dim(z)
w <- NULL
for (i in seq(along = d)) {
j <- rep.int(rep.int(seq(1:d[i]), prod(d[seq(length = i -
1)]) * rep.int(1, d[i])), prod(d[seq(from = i + 1,
length = length(d) - i)]))
w <- cbind(w, dimnames(z)[[i]][j])
}
w <- w[which(!unlist(lapply(z, is.null))), , drop = FALSE]
#original
#y <- data.frame(w, lapply(y, unlist, use.names = FALSE))
#modified
y <- data.frame(w, matrix(unlist(z), nrow=nrow(w), byrow=T))
#original
#names(y) <- c(names(by), names(x))
#modified
names(y) <- c(names(by), new.col.name)
y
}
# This function can handle X being matrix instead of just a vector
mytapply = function (X, INDEX, FUN = NULL, ..., simplify = TRUE)
{
FUN <- if (!is.null(FUN))
match.fun(FUN)
if (!is.list(INDEX))
INDEX <- list(INDEX)
nI <- length(INDEX)
namelist <- vector("list", nI)
names(namelist) <- names(INDEX)
extent <- integer(nI)
#original
#nx <- length(X)
#modified
nx = ifelse(!(is.data.frame(X) | is.matrix(X)), length(X), length(X[,1]) )
one <- as.integer(1)
group <- rep.int(one, nx)
ngroup <- one
for (i in seq(INDEX)) {
index <- as.factor(INDEX[[i]])
if (length(index) != nx)
stop("arguments must have same length")
namelist[[i]] <- levels(index)
extent[i] <- nlevels(index)
group <- group + ngroup * (as.integer(index) - one)
ngroup <- ngroup * nlevels(index)
}
if (is.null(FUN))
return(group)
ans <- lapply(split(X, group), FUN, ...)
index <- as.numeric(names(ans))
if (simplify && all(unlist(lapply(ans, length)) == 1)) {
ansmat <- array(dim = extent, dimnames = namelist)
ans <- unlist(ans, recursive = FALSE)
}
else {
ansmat <- array(vector("list", prod(extent)), dim = extent,
dimnames = namelist)
}
names(ans) <- NULL
ansmat[index] <- ans
ansmat
}
expit=function (x) {exp(x)/(1+exp(x))}
logit=function (x) {log(x/(1-x))}
q=function (save = "no", status = 0, runLast = TRUE) .Internal(quit(save, status, runLast))
# set the classes for columns of a dataframe
SetColType = function (df, colClasses) {
colNum = length(names(df))
for (i in 1:colNum) {
if(colClasses[i]=="factor")
df[,i]=as.factor(df[,i])
if(colClasses[i]=="Date")
df[,i]=as.date(as.character(df[,i]))
if(colClasses[i]=="integer")
df[,i]=as.integer(df[,i])
if(colClasses[i]=="numeric")
df[,i]=as.numeric(df[,i])
if(colClasses[i]=="character")
df[,i]=as.character(df[,i])
if(colClasses[i]=="logical")
df[,i]=as.logical(df[,i])
}
df
}
quick.t.test = function (x, y, var.equal=F) {
mean.x = mean(x)
mean.y = mean(y)
m=length(x)
n = length(y)
if (var.equal) {
(mean.x-mean.y)/sqrt( ( sum((x-mean.x)**2) + sum((y-mean.y)**2) ) * (1/m + 1/n) /(m+n-2) )
} else {
(mean.x-mean.y)/sqrt( sum((x-mean.x)**2)/(m-1)/m + sum((y-mean.y)**2)/(n-1)/n )
}
}
vector.t.test = function (mean.x, mean.y, var.x, var.y, n) {
new.var = (var.x + var.y) /n
t.stat = abs(mean.x-mean.y)/sqrt(new.var)
names(t.stat)=NULL
t.stat
}
tukey.mtest = function (mu, ms, n) {
#mu = c(45, 58, 46, 45, 56 )
#ms = 5
#n=3
m=length(mu)
cutoff = qtukey(p=.01, nmeans=m, df=(n-1)*(m-1), nranges = 1, lower.tail = F, log.p = FALSE)/sqrt(2)
t=matrix(0, m,m)
for (i in 1:m) {
for (j in i:m) {
t[i,j] = ( (mu[i]-mu[j])/ sqrt( 2/n * ms ) )
}
}
cat ("The t statistics for Tukey method are calculated below:\n")
print (signif(t,3))
cat ("\n")
cat ("By comparing the t values with ", signif (cutoff,3), ", Tukey method declares that the following pairs are significantly different: ")
for (i in 1:m) {
for (j in i:m) {
if (abs(t[i,j]) > cutoff) {
if (t[i,j]>0) cat (i, "&", j)
else cat (j, "&", i)
if (i==m & j==n) cat (". ")
else cat (", ")
}
}
}
cat ("In other words, the following pairs are not significantly different: ")
for (i in 1:m) {
for (j in i:m) {
if (abs(t[i,j]) <= cutoff & i!=j) {
cat (i, "&", j)
if (i==m & j==n) cat (". ")
else cat (", ")
}
}
}
cat("\n")
}
as.binary <- function(n, base=2 , r=FALSE)
{
out <- NULL
while(n > 0) {
if(r) {
out <- c(out , n%%base)
} else {
out <- c(n%%base , out)
}
n <- n %/% base
}
return(out)
}
############################################################
# output functions
############################################################
# print a matrix/table or a list of them to a latex file as xtable
# note file.name can not have space in it
# e.g. mytex(matrix(0,2,2));
# e.g. mytex(matrix(0,2,2), digits=4);
# e.g. mytex(list(matrix(0,2,2), c(1,1)));
mytex=function(dat=NULL, file.name="temp.tex", digits=NULL, display=NULL, align="r", append=F, preamble="", keep.row.names=TRUE, ...) {
if(exists("tablePath") && file.exists(tablePath)) {
file.name=tablePath%+%"/"%+%file.name%+%".tex"
} else {
file.name=file.name%+%".tex"
}
if(is.data.frame(dat)) dat=list(dat)
if (!is.list(dat)) dat=list(dat)
if (!append) {
cat ("\\documentclass{article}\n", file=file.name, append=F)
cat (preamble, file=file.name, append=T)
cat("\n\\begin{document}\n", file=file.name, append=T)
} else {
fil1 = file(file.name, "r")
# copy content to a new file until hit \end{document}, assuming that is the last line
n=as.numeric(strsplit(system ("wc -l "%+%file.name, intern=T), " ")[[1]][1])
tmp = readLines(fil1, n-1)
close(fil1)
cat (concatList(tmp, "\n"), file=file.name, append=F)
}
if (length(dat)>0) {
for (i in 1:length(dat)) {
dat1 = dat[[i]]
.ncol=ncol(dat1)
if (is.null(.ncol)) {
if (is.null(nrow(dat1))) .ncol=1
else .ncol=nrow(dat1)
}
if (!is.matrix(dat1) & is.character(dat1)) {
cat (dat1%+%"\n\n\n", file=file.name, append=T)
} else {
if (is.vector(dat1)) dat1=as.matrix(dat1)
cat (names(dat)[i]%+%"\n\n", file=file.name, append=T)
if (!is.null(dat1)) {
if (!keep.row.names) rownames(dat1)=1:nrow(dat1) # there is no way to not print rownames by xtable, here we make it not so distracting
print(..., xtable(dat1,
digits=(if(is.null(digits)) rep(3, .ncol+1) else digits), # cannot use ifelse here!!!
display=(if(is.null(display)) rep("f", .ncol+1) else display), # or here
align=rep(align,.ncol+1), ...),
type = "latex", file = file.name, append = T, floating = F )
}
cat ("\n", file=file.name, append=T)
}
}
}
cat ("\n\\end{document}", file=file.name, append=T)
print ("data saved to "%+%getwd()%+%"/"%+%file.name)
}
# write a table that contains mean and sd to temp.tex in the current working directory, getwd()
# models can be a list of models, or a single model
make.latex.coef.table = function (models, model.names=NULL, row.major=F, round.digits=NULL) {
# e.g.: models=list(gam1, gam2); round.digits= c(3,3,3,3,3); model.names=c("gam1", "gam2"); row.major=T
if (! ("list" %in% class (models) ) ) {models=list(models)}
numParams = nrow (getFixedEf(models[[1]]))
numModels = length (models)
if (is.null (model.names)) {model.names=rep("",numModels)}
if (is.null(round.digits)) round.digits=rep(3,numParams)
coef.table = mysapply (1:numModels, function (i.model) {
temp = getFixedEf(models[[i.model]]) [,1:2,drop=FALSE]
for (i.param in 1:numParams) {
temp[i.param,] = round (temp[i.param,], round.digits[i.param])
}
temp2 = paste (format(temp[,1]), "(", format(temp[,2]), ")")
names (temp2) = dimnames(temp)[[1]]
temp2
})
dimnames (coef.table)[[1]] = model.names
if (row.major) mytex ( coef.table, align="r" )
else mytex (t(coef.table), align="r")
}
# convert a factor to integer using its value, e.g. 1 to 1, 2 to 2
ftoi = function (f) {
as.integer (as.character (f) )
}
#returns day of year from a date
day.of.year = function (date1) {
temp = date.mdy (date1)
date.new.year = as.date(paste("1/1/",as.character(temp$year)))
date1-date.new.year + 1
}
between = function (x, a, b){
x>=a & x<=b
}
# convert temp from f to c
f2c = function (f) {
(f-32)*5/9
}
# like lag, move vector to the right/left by given number of steps
# x is a vector
shift.right = function (x, k=1) {
c(rep(NA,k), x[1:(length(x)-k)])
}
shift.left = function (x, k=1) {
c(x[(k+1):length(x)], rep(NA,k))
}
# return a subset of data that is 1 row every thin.factor rows
ThinRows = function (dat, thin.factor=10) {
NumRows = nrow(dat)
dat[1:(NumRows/thin.factor)*thin.factor,]
}
thin.rows=ThinRows
#mix two arrays in an interlacing way
mix = function (a, b) {
if (length(a)!=length(b)) print ("Length of two arguments to mix function not equal.")
out = rep (a, each=2)
for (i in 1:length(a)) {
out[2*i]=b[i]
}
out
}
# this is written before I know about matplot {graphics}
#note that the first column must be the time axis
#plot several series in one plot
#dat is a matrix, first column is x, second is y1, third is y2, ...
#it is hard to change color to blue and red
# e.g. plotSeries (cbind(1:10, 2:11, 3:12))
plotSeries = function(dat, main="", legend=NULL,ylim=NULL,col=NULL,lty=NULL,ylab=NULL,type="b"
,legend.cex=1,legend.x="topleft",legend.inset=0,legend.bty="n",pch=NULL, ...) {
if (is.null (ylim)) ylim=range(dat[,2:ncol(dat)], na.rm=T)
if (is.null (col)) col=1:(ncol(dat)-1)
if (is.null (lty)) lty=rep(1,(ncol(dat)-1))
if (is.null (ylab)) ylab=""
plot (dat[,1],rep(0,nrow(dat)),type="n",ylim=ylim, ylab=ylab, main=main, ...)
for (i in 2:ncol(dat)) {
lines (dat[,1], dat[,i], type=type, col=col[i-1], lty=lty[i-1], pch=pch, ...)
}
if (!is.null(legend))
if (is.null(pch))
legend(x=legend.x, col=col, legend=legend, lty=lty, cex=legend.cex,inset=legend.inset,bty=legend.bty)
else
legend(x=legend.x, col=col, legend=legend, pch=pch, cex=legend.cex,inset=legend.inset,bty=legend.bty, pt.cex=2)
}
#generating rejective sample
rejective.sampling = function (N, n, pik) {
s = sample(N, n, replace = T, prob = pik)
while (length(unique(s)) != n) {
s = sample(N, n, replace = T, prob = pik)
}
s
}
#mysystem can call any exe file
mysystem = function (cmd, ...) {
system ( paste(Sys.getenv("COMSPEC")," /c ",cmd) , invisible =T, intern=F, ...)
}
getModeFromLocfit=function (fit) {
tmp=preplot(fit)
tmp$xev[[1]][rank (tmp$fit) == length(tmp$fit)]
}
endWith = function (s1, c1) {
substr(s1, nchar(s1), nchar(s1) )==c1
}
getMfrow=function (len) {
ret=NULL
if (len==1) {
ret=c(1,1)
} else if (len==2) {
ret=c(1,2)
} else if (len==3) {
ret=c(1,3)
} else if (len<=4) {
ret=c(2,2)
} else if (len<=6) {
ret=c(2,3)
} else if (len<=9) {
ret=c(3,3)
} else if (len<=12) {
ret=c(3,4)
} else if (len<=16) {
ret=c(4,4)
} else if (len<=20) {
ret=c(4,5)
} else if (len<=25) {
ret=c(5,5)
}
ret
}
concatList = function (lis, sep=""){
out=lis[[1]]
i=2
while (i<=length(lis)){
out=out%+%sep%+%lis[[i]]
i=i+1
}
out
}
escapeUnderline=function (name) {
gsub("_", "\\_", name)
}
#Usage eg: sendEmail("ying@gmail.com", "re: simulation", "body 1")
sendEmail=function (to, subject, body) {
if (!unix())
system ("C:/1Youyisstuff/3Software/C++/SendEmail/Release/SendEmail.exe "
%+% to %+%" \""
%+% subject %+%"\" \""
%+% body %+% "\"",
wait=FALSE, show.output.on.console=FALSE, intern=TRUE)
else {
cmd="echo '"%+%body%+%"' | /usr/bin/mail -s '"%+%subject%+%"' " %+% to
system (cmd)
}
}
# default values for the control parameters have been tuned so that the eps figure is suitable for a latex file
# example: mypostscript(); plot(1:10); dev.off()
mypostscript.old=function (file="temp", mfrow=c(1,1), mfcol=NULL, width=NULL, height=NULL, my.oma=c(0,0,0,0),
my.mar=c(3,3,2.5,1.5), type="eps", ...) {
# if(exists("figurePath") && file.exists(figurePath)) {
# file=figurePath%+%"/"%+%file
# } else {
# file=file
# }
print(paste(getwd(),"/",file,sep=""))
if (!is.null(mfcol)) {
nrow=mfcol[1]; ncol=mfcol[2]
} else {
nrow=mfrow[1]; ncol=mfrow[2]
}
if (nrow>4) warning ("nrow > 4 will not fit a page without making the figures hard to see")
if(is.null(width)) {
if (ncol>=3) width=6.5
else if (ncol==2) width=5
else width=4
}
if(is.null(height)) {
if (ncol>=4) height=width/ncol*nrow * 1
else height=width/ncol*nrow
}
if (type=="pdf") pdf (paper="special", file=file%+%"."%+%type, width=width, height=height, ...)
else postscript (paper="special", horizontal=F, file=file%+%"."%+%type, width=width, height=height, ...)
if (!is.null(mfcol)) par(mfcol=mfcol)
else par(mfrow=mfrow)
my.mgp=c(1.6,.5,0);
if (ncol>=4) par (oma=my.oma, mar=my.mar, mgp=my.mgp, cex=.5, cex.main=1, cex.axis=1, cex.lab=1)
else if (ncol==3) par (oma=my.oma, mar=my.mar, mgp=my.mgp, cex=.75, cex.main=.75, cex.axis=.75, cex.lab=.85)
else if (ncol==2) par (oma=my.oma, mar=my.mar, mgp=my.mgp, cex=.75, cex.main=.75, cex.axis=.75, cex.lab=.85)
else par (oma=my.oma, mar=my.mar, mgp=my.mgp, cex=1, cex.main=.75, cex.axis=.75, cex.lab=.85)
}
# it is best to resize a figure in latex, because the proportions won't look right, if we change the wide and height
# is.prezo controls whether or not it is used for a presentation
mypostscript=function (file="temp", mfrow=c(1,1), mfcol=NULL, width=NULL, height=NULL, ext="eps", is.prezo=FALSE, oma=NULL, main.outer=FALSE, ...) {
print(paste(getwd(),"/",file,sep=""))
if (!is.null(mfcol)) {
nrow=mfcol[1]; ncol=mfcol[2]
} else {
nrow=mfrow[1]; ncol=mfrow[2]
}
if (nrow>4) warning ("nrow > 4 will not fit a page without making the figures hard to see")
# sca controls how much to scale down for use in a paper
if(is.null(width) | is.null(height)) {
if (nrow==1 & ncol==1) {width=6.7; height=6.7
} else if (nrow==1 & ncol==2) {width=9.7; height=5.2
} else if (nrow==1 & ncol==3) {width=9.7; height=3.4
} else if (nrow==2 & ncol==3) {width=9.7; height=6.7
} else if (nrow==3 & ncol==4) {width=9.7; height=8
} else if (nrow==4 & ncol==5) {width=9.7; height=8.3
} else if (nrow==2 & ncol==2) {width=8; height=8.5
} else if (nrow==3 & ncol==3) {width=9.7; height=10.3
} else if (nrow==4 & ncol==4) {width=9.7; height=10.3
} else if (nrow==5 & ncol==5) {width=9.7; height=10.3
} else if (nrow==2 & ncol==1) {width=6.7; height=9.7
} else if (nrow==3 & ncol==2) {width=6.7; height=10.3
} else if (nrow==4 & ncol==3) {width=7; height=10.3
} else stop ("nrow ncol not supported")
}
if (ext=="pdf") pdf (paper="special", file=file%+%"."%+%ext, width=width, height=height, ...)
else postscript (paper="special", horizontal=F, file=file%+%"."%+%ext, width=width, height=height, ...)
if (!is.null(mfcol)) par(mfcol=mfcol)
else par(mfrow=mfrow)
if (!is.null(oma)) par(oma=oma)
if (main.outer) {
tmp=par()$oma
tmp[3]=tmp[3]+1
par(oma=tmp)
}
}
mypdf=function (...) {mypostscript(ext="pdf",...)}
##test
#mypdf(mfrow=c(1,1),file="test1x1");plot(1:10,main="LUMX",xlab="t",ylab="y");dev.off()
#mypdf(mfrow=c(1,2),file="test1x2");plot(1:10,main="LUMX",xlab="t",ylab="y");plot(1:10);dev.off()
#mypdf(mfrow=c(2,2),file="test2x2");plot(1:10,main="LUMX",xlab="t",ylab="y");plot(1:10);plot(1:10);plot(1:10);plot(1:10);dev.off()
#mypdf(mfrow=c(1,3),file="test1x3");plot(1:10,main="LUMX",xlab="t",ylab="y");plot(1:10);plot(1:10);dev.off()
#mypdf(mfrow=c(2,3),file="test2x3");plot(1:10,main="LUMX",xlab="t",ylab="y");plot(1:10);plot(1:10);plot(1:10);plot(1:10);plot(1:10);plot(1:10);dev.off()
#mypdf(mfrow=c(4,4),file="test4x4");plot(1:10,main="LUMX",xlab="t",ylab="y");plot(1:10);plot(1:10);plot(1:10);plot(1:10);plot(1:10);plot(1:10);plot(1:10);plot(1:10);plot(1:10);plot(1:10);plot(1:10);plot(1:10);plot(1:10);plot(1:10);plot(1:10,main="Luminex");dev.off()
normalize=function (a) {
a/sum(a)
}
# returns binary representation of an integer
binary<-function(i) if (i) paste(bb(i %/% 2), i %% 2, sep="") else ""
# returns binary representatin of an integer with leading 0, the length of string is n
binary2<-function(i, n) {
a<-2^((n-1):0)
b<-2*a
sapply(i,function(x) paste(as.integer((x %% b)>=a),collapse=""))
}
##-- Stirling numbers of the 2nd kind
##-- (Abramowitz/Stegun: 24,1,4 (p. 824-5 ; Table 24.4, p.835)
##> S^{(m)}_n = number of ways of partitioning a set of $n$ elements into $m$
##> non-empty subsets
Stirling2 <- function(n,m)
{
## Purpose: Stirling Numbers of the 2-nd kind
## S^{(m)}_n = number of ways of partitioning a set of
## $n$ elements into $m$ non-empty subsets
## Author: Martin Maechler, Date: May 28 1992, 23:42
## ----------------------------------------------------------------
## Abramowitz/Stegun: 24,1,4 (p. 824-5 ; Table 24.4, p.835)
## Closed Form : p.824 "C."
## ----------------------------------------------------------------
## maechler@_stat.math.ethz.ch
if (0 > m || m > n) stop("'m' must be in 0..n !")
k <- 0:m
sig <- rep(c(1,-1)*(-1)^m, length= m+1)
# 1 for m=0; -1 1 (m=1)
## The following gives rounding errors for (25,5) :
## r <- sum( sig * k^n /(gamma(k+1)*gamma(m+1-k)) )
ga <- gamma(k+1)
round(sum( sig * k^n /(ga * rev(ga))))
}
myprint <- function(object, ...) UseMethod("myprint")
myprint.default = function (..., local.verbose=T, newline=T) {
object <- as.list(substitute(list(...)))[-1]
x=list(...)
for (i in 1:length(x)) {
tmpname <- deparse(object[[i]])[1]
#str(tmpname)
#str(gsub("\\\\","\\",gsub("\"", "", tmpname)))
#str(x[[i]])
#if (gsub("\\\\","\\",gsub("\"", "", tmpname))!=x[[i]]) {
if (contain(tmpname, "\"") | contain(tmpname, "\\")) {
cat (x[[i]])
} else {
cat (tmpname %+% " = ")
cat (x[[i]])
cat ("; ")
}
}
if (newline) cat("\n")
}
#a="hello"; b="you"; myprint (a, b); myprint ("test"); myprint.default ("\t")
setSeedAsInR=function() {
# choose a rng so that results can be compared with that from C program
RNGkind(kind = "Marsaglia-Multicarry", normal.kind = NULL)
set.seed(1, kind = "Marsaglia-Multicarry")
myprint(runif(1))
myprint("the number above should be 0.006153224")
}
rmdirichlet=function(mAlpha, mixtureCoef) {
p=runif(1)
k = min(which (p<cumsum (mixtureCoef)))
# above implementation is similar to this, but use rng differently: k = which(rmultinom(1, 1, mixtureCoef)==1)
rdirichlet(1, mAlpha[k,])
}
# each row of mAlpha is a parameter of Dirichlet
dmdirichlet=function(x, mAlpha, mixtureCoef) {
d=0;
for (i in 1:nrow(mAlpha)) {
d = d + mixtureCoef[i]*ddirichlet(x, mAlpha[i,]);
}
d
}
my.lbeta = function (x) {
sum(lgamma(x)) - lgamma(sum(x))
}
lotto=function(max, min=1) {
cat (" ")
cat (floor(runif(1, min, max+1)))
cat(" : a random number from "%+%min%+%" to "%+%max)
cat ("\n")
}
# copied from MCMCpack
ddirichlet=function (x, alpha)
{
dirichlet1 <- function(x, alpha) {
logD <- sum(lgamma(alpha)) - lgamma(sum(alpha))
s <- sum((alpha - 1) * log(x))
exp(sum(s) - logD)
}
if (!is.matrix(x))
if (is.data.frame(x))
x <- as.matrix(x)
else x <- t(x)
if (!is.matrix(alpha))
alpha <- matrix(alpha, ncol = length(alpha), nrow = nrow(x),
byrow = TRUE)
if (any(dim(x) != dim(alpha)))
stop("Mismatch between dimensions of x and alpha in ddirichlet().\n")
pd <- vector(length = nrow(x))
for (i in 1:nrow(x)) pd[i] <- dirichlet1(x[i, ], alpha[i,
])
pd[apply(x, 1, function(z) any(z < 0 | z > 1))] <- 0
pd[apply(x, 1, function(z) all.equal(sum(z), 1) != TRUE)] <- 0
return(pd)
}
# the input is a list, typically the output from a sapply call that should be matrix, but have different length
fill.jagged.array=function(a) {
# don't check is.matrix, because for some reason, it will return true
max.len=max(sapply(a, length))
mysapply(a, function (e) {
c(e, rep(NA, max.len-length(e)))
})
}
# don't have to transpose x
mywrite=function(x, ...){
if (is.list(x)) x=fill.jagged.array(x)
if (is.null(ncol(x))) i=length(x)
else i=ncol(x)
write (t(x), ncol=i, ...)
}
empty.plot=function () {
plot(1,1,type="n",xlab="",ylab="",xaxt="n", yaxt="n", bty="n")
}
logSumExp=function (logx){
logMeanExp(logx, 1)
}
logSumExpFor2=function (logx, logy){
c=max(logx, logy)
dif=abs(logx-logy)
if (dif>300) return (c)
else {
log(sum(exp(logx-c), exp(logy-c)))+c
}
}
# log( exp(logx1)-exp(logx2) )
logDiffExp=function (logx1,logx2){
c=logx1
c+ log(1-exp(logx2-logx1))
}
logMeanExp=function (logx,B=NULL){
# mean function for small numbers
# logx is a vector of large negative values
# return log (sum(exp(logx))/B)
# calculate log of the mean of a vector which contains 0 and very small real numbers (logged)
# return log of the mean
if (is.null(B)) B=length(logx)
c=max(logx)
log(sum(exp(logx-c))/B)+c
}
# logMeanExp(log(1:5), 5) # test, should return log(3)
logDiffExp=function (logx1, logx2){
# diff function for small numbers
# logx1 and logx2 are typically large negative values, logx1>logx2
# return log (exp(logx1)-exp(logx2))
if (logx1<logx2) {cat("\nlWarning [logDiffExp]: first argument smaller than second, return NaN.\n\n"); return (NaN);}
c=max(logx1, logx2)
log (exp(logx1-c)-exp(logx2-c))+c
}
# logDiffExp(log(2), log(1)) # test, should return 0
# from combinat package
permn=function (x, fun = NULL, ...)
{
if (is.numeric(x) && length(x) == 1 && x > 0 && trunc(x) ==
x)
x <- seq(x)
n <- length(x)
nofun <- is.null(fun)
out <- vector("list", gamma(n + 1))
p <- ip <- seqn <- 1:n
d <- rep(-1, n)
d[1] <- 0
m <- n + 1
p <- c(m, p, m)
i <- 1
use <- -c(1, n + 2)
while (m != 1) {
out[[i]] <- if (nofun)
x[p[use]]
else fun(x[p[use]], ...)
i <- i + 1
m <- n
chk <- (p[ip + d + 1] > seqn)
m <- max(seqn[!chk])
if (m < n)
d[(m + 1):n] <- -d[(m + 1):n]
index1 <- ip[m] + 1
index2 <- p[index1] <- p[index1 + d[m]]
p[index1 + d[m]] <- m
tmp <- ip[index2]
ip[index2] <- ip[m]
ip[m] <- tmp
}
out
}
last = function (x, n=1, ...) {
if (is.character(x)) tail (readLines(x), n=n, ...) # read file
else if (is.vector(x)) x[length(x)]
else if (is.array(x)) x[length(x)]
else if (is.list(x)) x[[length(x)]]
else stop ("last(): x not supported")
}
# dat is all positive and we want density to have positive support as well
get.density.boundary.corrected=function(dat) {
# first log transform dat to real line
dat1=log(dat)
tmp=density(dat1)
}
unix=function (){
substr(Sys.getenv("R_HOME") , 1,1)=="/"
}
#############################################################################
# regression functions or functions written for 570, 571
#############################################################################
mycoef <- function(object, ...) UseMethod("mycoef")
"mycoef.drc" <-
function(object, ...)
{
if (!is.null(object$"coefficients"))
{
out=object$"coefficients"
names(out)=substr(names(out), 1, 1)
return(out)
} else {
retVec <- object$fit$par
names(retVec) <- object$parNames[[2]]
return(retVec)
}
}
# return a matrix, first column coef, second column se,
getFixedEf <- function(object, ...) UseMethod("getFixedEf")
getFixedEf.MIresult=function(mir) {
cbind(coef(mir), sqrt(diag(vcov(mir))))
}
getFixedEf.ltm=function (fit) {
fit[-1,1:2]
}
#add CI to summary
mysummary <- function(object, ...) UseMethod("mysummary")
#add prediction interval to summary
mypredict <- function(object, ...) UseMethod("mypredict")
# returns sandwich estimator of variance matrix
# from Thomas Lumley
infjack.glm<-function(glm.obj,groups){
umat<-estfun.glm(glm.obj)
usum<-rowsum(umat,groups,reorder=F)
modelv<-summary(glm.obj)$cov.unscaled
modelv%*%(t(usum)%*%usum)%*%modelv
}
# from Thomas Lumley
jack.glm<-function(glm.obj,groups){
umat<-jackvalues(glm.obj)
usum<-rowsum(umat,groups,reorder=F)
t(usum)%*%usum*(nrow(umat)-1)/nrow(umat)
}
# from Thomas Lumley
jackvalues<-function(glm.obj){
db<-lm.influence(glm.obj)$coef
t(t(db)-apply(db,2,mean))
}
# from Thomas Lumley
estfun.glm<-function(glm.obj){
if (is.matrix(glm.obj$x))
xmat<-glm.obj$x
else {
mf<-model.frame(glm.obj)
xmat<-model.matrix(terms(glm.obj),mf)
}
residuals(glm.obj,"working")*glm.obj$weights*xmat
}
coef.geese = function (geese1, ...) {
tmp = summary(geese1)$mean[,1]
names (tmp)=names (geese1$beta)
tmp
}
vcov.geese = function (geese1, ...) {
tmp = geese1$vbeta
dimnames (tmp)=list (names (geese1$beta), names (geese1$beta))
tmp
}
residuals.geese = function (geese1, y, x) {
y - x %*% geese1$beta
}
predict.geese = function (geese1, x) {
x %*% geese1$beta
}
#get estimates, variances, sd from lmer fit
getFixedEf.mer = function (lmerfit) {
Vcov <- lme4::vcov(lmerfit, useScale = FALSE)
betas <- lme4::fixef(lmerfit)
se <- sqrt(diag(Vcov))
zval <- betas / se
pval <- 2 * pnorm(abs(zval), lower.tail = FALSE)
cbind("Estimate"=betas, se, zval, pval)
}
#get estimates, variances, sd from lme fit
getFixedEf.lme = function (lme1) {
betas <- lme1$coef$fixed
se <- sqrt (diag (lme1$varFix))
zval <- betas / se
pval <- 2 * pnorm(abs(zval), lower.tail = FALSE)
cbind(betas, se, zval, pval)
}
getFixedEf.geese = function (geese1) {
summary(geese1)$mean
}
getFixedEf.glm = function (glm1) {
summary(glm1)$coef
}
getFixedEf.gam = function (gam1) {
temp = summary(gam1)
cbind (temp$p.coef, temp$se[1:length(temp$p.coef)])
}
getFixedEf.lm = function (lm1) {
summary(lm1)$coef
}
getFixedEf.gee = function (gee1) {
summary(gee1)$coef
}
getFixedEf.inla = function (inlafit) {
tmp = summary(inlafit)$fixed
n=nrow(tmp)
tmp.name = row.names(tmp)[n]
# move intercept to the top
if (tmp.name=="intercept") {
tmp = rbind (tmp[n,],tmp)[1:n,,drop=F]
dimnames (tmp)[[1]][1] = tmp.name
}
# rename first column
dimnames (tmp)[[2]][1] = "Estimate"
tmp
}
getFixedEf.coxph=function (fit){
sum.fit<-summary(fit)
sum.fit$coef[,c(1,3)]
# round(sqrt(diag(attr(fit$var,"phases")$phase1)),3)
# round(sqrt(diag(attr(fit$var,"phases")$phase2)),3)
}
# used to get mean and sd from a jags or winbugs sample
# each column of samples is a variable
getFixedEf.matrix=function (samples) {
t(apply(samples, 2, function (x) c("Estimate"=mean(x), "sd"=sd(x))))
}
# add CI
getFixedEf2 = function (object, ...) {
temp=getFixedEf (object, ...)
temp = cbind(temp, "lower bound"=temp[,1]-1.96*temp[,2])
temp = cbind(temp, "upper bound"=temp[,1]+1.96*temp[,2])
temp
}
mysummary.lm = function (object, correlation = FALSE, symbolic.cor = FALSE, ...)
{
z <- object
p <- z$rank
if (p == 0) {
r <- z$residuals
n <- length(r)
w <- z$weights
if (is.null(w)) {
rss <- sum(r^2)
}
else {
rss <- sum(w * r^2)
r <- sqrt(w) * r
}
resvar <- rss/(n - p)
ans <- z[c("call", "terms")]
class(ans) <- "summary.lm"
ans$aliased <- is.na(coef(object))
ans$residuals <- r
ans$df <- c(0, n, length(ans$aliased))
ans$coefficients <- matrix(NA, 0, 6)
dimnames(ans$coefficients) <- list(NULL, c("Estimate",
"Std. Error", "lower CI", "higher CI", "t value", "Pr(>|t|)"))
ans$sigma <- sqrt(resvar)
ans$r.squared <- ans$adj.r.squared <- 0
return(ans)
}
Qr <- object$qr
if (is.null(z$terms) || is.null(Qr))
stop("invalid 'lm' object: no terms nor qr component")
n <- NROW(Qr$qr)
rdf <- n - p
if (rdf != z$df.residual)
warning("inconsistent residual degrees of freedom. -- please report!")
p1 <- 1:p
r <- z$residuals
f <- z$fitted
w <- z$weights
if (is.null(w)) {
mss <- if (attr(z$terms, "intercept"))
sum((f - mean(f))^2)
else sum(f^2)
rss <- sum(r^2)
}
else {
mss <- if (attr(z$terms, "intercept")) {
m <- sum(w * f/sum(w))
sum(w * (f - m)^2)
}
else sum(w * f^2)
rss <- sum(w * r^2)
r <- sqrt(w) * r
}
resvar <- rss/rdf
R <- chol2inv(Qr$qr[p1, p1, drop = FALSE])
se <- sqrt(diag(R) * resvar)
est <- z$coefficients[Qr$pivot[p1]]
#youyi
ci.l = est - qt(.975, rdf, lower.tail=T)*se
ci.r = est + qt(.975, rdf, lower.tail=T)*se
tval <- est/se
ans <- z[c("call", "terms")]
ans$residuals <- r
ans$coefficients <- cbind(est, se, tval, 2 * pt(abs(tval),
rdf, lower.tail = FALSE), ci.l, ci.r)
dimnames(ans$coefficients) <- list(names(z$coefficients)[Qr$pivot[p1]],
c("Estimate", "Std. Error", "t value", "Pr(>|t|)", "CI left", "CI right"))
ans$aliased <- is.na(coef(object))
ans$sigma <- sqrt(resvar)
ans$df <- c(p, rdf, NCOL(Qr$qr))
if (p != attr(z$terms, "intercept")) {
df.int <- if (attr(z$terms, "intercept"))
1
else 0
ans$r.squared <- mss/(mss + rss)
ans$adj.r.squared <- 1 - (1 - ans$r.squared) * ((n -
df.int)/rdf)
ans$fstatistic <- c(value = (mss/(p - df.int))/resvar,
numdf = p - df.int, dendf = rdf)
}
else ans$r.squared <- ans$adj.r.squared <- 0
ans$cov.unscaled <- R
dimnames(ans$cov.unscaled) <- dimnames(ans$coefficients)[c(1,
1)]
if (correlation) {
ans$correlation <- (R * resvar)/outer(se, se)
dimnames(ans$correlation) <- dimnames(ans$cov.unscaled)
ans$symbolic.cor <- symbolic.cor
}
class(ans) <- "summary.lm"
ans
}
mysummary.ols=function (x, digits = 4, long = FALSE, ...)
{
oldopt <- options(digits = digits)
on.exit(options(oldopt))
cat("\n")
cat("Linear Regression Model\n\n")
dput(x$call)
cat("\n")
if (!is.null(z <- x$na.action))
naprint(z)
stats <- x$stats
if (lst <- length(stats)) {
if (.R.) {
cstats <- character(lst)
names(cstats) <- names(stats)
for (i in 1:lst) cstats[i] <- format(stats[i])
print(cstats, quote = FALSE)
}
else print(x$stats)
cat("\n")
}
pen <- length(x$penalty.matrix) > 0
resid <- x$residuals
n <- length(resid)
p <- length(x$coef) - (names(x$coef)[1] == "Intercept")
if (length(x$stats) == 0)
cat("n=", n, " p=", p, "\n\n", sep = "")
ndf <- x$stats["d.f."]
df <- c(ndf, n - ndf - 1, ndf)
r2 <- x$stats["R2"]
sigma <- x$stats["Sigma"]
rdf <- df[2]
if (rdf > 5) {
cat("Residuals:\n")
if (length(dim(resid)) == 2) {
rq <- apply(t(resid), 1, quantile)
dimnames(rq) <- list(c("Min", "1Q", "Median", "3Q",
"Max"), dimnames(resid)[[2]])
}
else {
rq <- quantile(resid)
names(rq) <- c("Min", "1Q", "Median", "3Q", "Max")
}
print(rq, digits = digits, ...)
}
else if (rdf > 0) {
cat("Residuals:\n")
print(resid, digits = digits, ...)
}
if (nsingular <- df[3] - df[1])
cat("\nCoefficients: (", nsingular, " not defined because of singularities)\n",
sep = "")
else cat("\nCoefficients:\n")
se <- sqrt(diag(x$var))
z <- x$coefficients/se
P <- 2 * (1 - pt(abs(z), rdf))
ci.l = x$coefficients - qt(.975, rdf, lower.tail=T)*se
ci.r = x$coefficients + qt(.975, rdf, lower.tail=T)*se
co <- cbind(x$coefficients, se, z, P, ci.l, ci.r)
dimnames(co) <- list(names(x$coefficients), c("Value", "Std. Error",
"t", "Pr(>|t|)", "CI left", "CI right"))
print(co)
if (pen)
cat("\n")
else cat("\nResidual standard error:", format(signif(sigma,
digits)), "on", rdf, "degrees of freedom\n")
rsqa <- 1 - (1 - r2) * (n - 1)/rdf
if (length(x$stats) == 0)
cat("Multiple R-Squared:", format(signif(r2, digits)),
" ")
cat("Adjusted R-Squared:", format(signif(rsqa, digits)),
"\n")
if (!pen) {
if (long && p > 0) {
correl <- diag(1/se) %*% x$var %*% diag(1/se)
dimnames(correl) <- dimnames(x$var)
cat("\nCorrelation of Coefficients:\n")
ll <- lower.tri(correl)
correl[ll] <- format(round(correl[ll], digits), ...)
correl[!ll] <- ""
print(correl[-1, -(p + 1), drop = FALSE], quote = FALSE,
digits = digits, ...)
}
}
cat("\n")
invisible()
}
summary.ols = function (ols1) {
print(ols1)
}
mysummary.glm = function (object, dispersion = NULL, correlation = FALSE, symbolic.cor = FALSE, ...)
{
est.disp <- FALSE
df.r <- object$df.residual
if (is.null(dispersion))
dispersion <- if (any(object$family$family == c("poisson",
"binomial")))
1
else if (df.r > 0) {
est.disp <- TRUE
if (any(object$weights == 0))
warning("observations with zero weight ", "not used for calculating dispersion")
sum(object$weights * object$residuals^2)/df.r
}
else Inf
p <- object$rank
if (p > 0) {
p1 <- 1:p
Qr <- object$qr
aliased <- is.na(coef(object))
coef.p <- object$coefficients[Qr$pivot[p1]]
covmat.unscaled <- chol2inv(Qr$qr[p1, p1, drop = FALSE])
dimnames(covmat.unscaled) <- list(names(coef.p), names(coef.p))
covmat <- dispersion * covmat.unscaled
var.cf <- diag(covmat)
s.err <- sqrt(var.cf)
tvalue <- coef.p/s.err
#youyi
ci.l = coef.p - qt(.975, df.r, lower.tail=T)*s.err
ci.r = coef.p + qt(.975, df.r, lower.tail=T)*s.err
dn <- c("Estimate", "Std. Error")
if (!est.disp) {
pvalue <- 2 * pnorm(-abs(tvalue))
coef.table <- cbind(coef.p, s.err, tvalue, pvalue, ci.l, ci.r)
dimnames(coef.table) <- list(names(coef.p), c(dn,
"z value", "Pr(>|z|)", "lower CI", "upper CI"))
}
else if (df.r > 0) {
pvalue <- 2 * pt(-abs(tvalue), df.r)
coef.table <- cbind(coef.p, s.err, tvalue, pvalue, ci.l, ci.r)
dimnames(coef.table) <- list(names(coef.p), c(dn,
"t value", "Pr(>|t|)", "lower CI", "upper CI"))
}
else {
coef.table <- cbind(coef.p, Inf)
dimnames(coef.table) <- list(names(coef.p), dn)
}
df.f <- NCOL(Qr$qr)
}
else {
coef.table <- matrix(, 0, 4)
dimnames(coef.table) <- list(NULL, c("Estimate", "Std. Error",
"t value", "Pr(>|t|)"))
covmat.unscaled <- covmat <- matrix(, 0, 0)
aliased <- is.na(coef(object))
df.f <- length(aliased)
}
ans <- c(object[c("call", "terms", "family", "deviance",
"aic", "contrasts", "df.residual", "null.deviance", "df.null",
"iter")], list(deviance.resid = residuals(object, type = "deviance"),
coefficients = coef.table, aliased = aliased, dispersion = dispersion,
df = c(object$rank, df.r, df.f), cov.unscaled = covmat.unscaled,
cov.scaled = covmat))
if (correlation && p > 0) {
dd <- sqrt(diag(covmat.unscaled))
ans$correlation <- covmat.unscaled/outer(dd, dd)
ans$symbolic.cor <- symbolic.cor
}
class(ans) <- "summary.glm"
return(ans)
}
mypredict.glm = function (object, newdata = NULL, type = c("link", "response",
"terms"), se.fit = FALSE, dispersion = NULL, terms = NULL,
na.action = na.pass, ...)
{
type <- match.arg(type)
na.act <- object$na.action
object$na.action <- NULL
if (!se.fit) {
if (missing(newdata)) {
pred <- switch(type, link = object$linear.predictors,
response = object$fitted, terms = predict.lm(object,
se.fit = se.fit, scale = 1, type = "terms",
terms = terms))
if (!is.null(na.act))
pred <- napredict(na.act, pred)
}
else {
pred <- predict.lm(object, newdata, se.fit, scale = 1,
type = ifelse(type == "link", "response", type),
terms = terms, na.action = na.action)
switch(type, response = {
pred <- family(object)$linkinv(pred)
}, link = , terms = )
}
}
else {
if (inherits(object, "survreg"))
dispersion <- 1
if (is.null(dispersion) || dispersion == 0)
dispersion <- summary(object, dispersion = dispersion)$dispersion
residual.scale <- as.vector(sqrt(dispersion))
pred <- predict.lm(object, newdata, se.fit, scale = residual.scale,
type = ifelse(type == "link", "response", type),
terms = terms, na.action = na.action, interval="confidence")#youyi adds interval here
fit <- pred$fit
se.fit <- pred$se.fit
switch(type, response = {
se.fit <- se.fit * abs(family(object)$mu.eta(fit))
fit <- family(object)$linkinv(fit)
}, link = , terms = )
if (missing(newdata) && !is.null(na.act)) {
fit <- napredict(na.act, fit)
se.fit <- napredict(na.act, se.fit)
}
pred <- list(fit = fit, se.fit = se.fit, residual.scale = residual.scale)
}
pred
}
mypredict.lm=function (object, newdata, se.fit = FALSE, scale = NULL, df = Inf,
interval = c("none", "confidence", "prediction"), level = 0.95,
type = c("response", "terms"), terms = NULL, na.action = na.pass,
...)
{
tt <- terms(object)
if (missing(newdata) || is.null(newdata)) {
mm <- X <- model.matrix(object)
mmDone <- TRUE
offset <- object$offset
}
else {
Terms <- delete.response(tt)
m <- model.frame(Terms, newdata, na.action = na.action,
xlev = object$xlevels)
if (!is.null(cl <- attr(Terms, "dataClasses")))
.checkMFClasses(cl, m)
X <- model.matrix(Terms, m, contrasts = object$contrasts)
offset <- if (!is.null(off.num <- attr(tt, "offset")))
eval(attr(tt, "variables")[[off.num + 1]], newdata)
else if (!is.null(object$offset))
eval(object$call$offset, newdata)
mmDone <- FALSE
}
n <- length(object$residuals)
p <- object$rank
p1 <- seq(len = p)
piv <- object$qr$pivot[p1]
if (p < ncol(X) && !(missing(newdata) || is.null(newdata)))
warning("prediction from a rank-deficient fit may be misleading")
beta <- object$coefficients
predictor <- drop(X[, piv, drop = FALSE] %*% beta[piv])
if (!is.null(offset))
predictor <- predictor + offset
interval <- match.arg(interval)
type <- match.arg(type)
if (se.fit || interval != "none") {
res.var <- if (is.null(scale)) {
r <- object$residuals
w <- object$weights
rss <- sum(if (is.null(w))
r^2
else r^2 * w)
df <- n - p
rss/df
}
else scale^2
if (type != "terms") {
if (p > 0) {
XRinv <- if (missing(newdata) && is.null(w))
qr.Q(object$qr)[, p1, drop = FALSE]
else X[, piv] %*% qr.solve(qr.R(object$qr)[p1,
p1])
ip <- drop(XRinv^2 %*% rep(res.var, p))
}
else ip <- rep(0, n)
}
}
if (type == "terms") {
if (!mmDone) {
mm <- model.matrix(object)
mmDone <- TRUE
}
aa <- attr(mm, "assign")
ll <- attr(tt, "term.labels")
if (attr(tt, "intercept") > 0)
ll <- c("(Intercept)", ll)
aaa <- factor(aa, labels = ll)
asgn <- split(order(aa), aaa)
hasintercept <- attr(tt, "intercept") > 0
if (hasintercept) {
asgn$"(Intercept)" <- NULL
if (!mmDone) {
mm <- model.matrix(object)
mmDone <- TRUE
}
avx <- colMeans(mm)
termsconst <- sum(avx[piv] * beta[piv])
}
nterms <- length(asgn)
if (nterms > 0) {
predictor <- matrix(ncol = nterms, nrow = NROW(X))
dimnames(predictor) <- list(rownames(X), names(asgn))
if (se.fit || interval != "none") {
ip <- matrix(ncol = nterms, nrow = NROW(X))
dimnames(ip) <- list(rownames(X), names(asgn))
Rinv <- qr.solve(qr.R(object$qr)[p1, p1])
}
if (hasintercept)
X <- sweep(X, 2, avx)
unpiv <- rep.int(0, NCOL(X))
unpiv[piv] <- p1
for (i in seq(1, nterms, length = nterms)) {
iipiv <- asgn[[i]]
ii <- unpiv[iipiv]
iipiv[ii == 0] <- 0
predictor[, i] <- if (any(iipiv) > 0)
X[, iipiv, drop = FALSE] %*% beta[iipiv]
else 0
if (se.fit || interval != "none")
ip[, i] <- if (any(iipiv) > 0)
as.matrix(X[, iipiv, drop = FALSE] %*% Rinv[ii,
, drop = FALSE])^2 %*% rep.int(res.var,
p)
else 0
}
if (!is.null(terms)) {
predictor <- predictor[, terms, drop = FALSE]
if (se.fit)
ip <- ip[, terms, drop = FALSE]
}
}
else {
predictor <- ip <- matrix(0, n, 0)
}
attr(predictor, "constant") <- if (hasintercept)
termsconst
else 0
}
if (interval != "none") {
tfrac <- qt((1 - level)/2, df)
hwid <- tfrac * switch(interval, confidence = sqrt(ip),
prediction = sqrt(ip + res.var))
if (type != "terms") {
predictor <- cbind(predictor, predictor + hwid %o%
c(1, -1))
colnames(predictor) <- c("fit", "lwr", "upr")
}
else {
lwr <- predictor + hwid
upr <- predictor - hwid
}
}
if (se.fit || interval != "none")
se <- sqrt(ip)
if (missing(newdata) && !is.null(na.act <- object$na.action)) {
predictor <- napredict(na.act, predictor)
if (se.fit)
se <- napredict(na.act, se)
}
if (type == "terms" && interval != "none") {
if (missing(newdata) && !is.null(na.act)) {
lwr <- napredict(na.act, lwr)
upr <- napredict(na.act, upr)
}
list(fit = predictor, se.fit = se, lwr = lwr, upr = upr,
df = df, residual.scale = sqrt(res.var))
}
else if (se.fit)
list(fit = predictor, se.fit = se, df = df, residual.scale = sqrt(res.var))
else predictor
}
# predict.gam returns a list of fit and se.fit. This function transforms it to a matrix: 1st col is fit, 2nd col is LI, 3rd col is UI, 4th col is se.fit
mypredict.gam = function (...) {
pred = predict(...)
x=matrix (0, length (pred$fit), 4)
x[,1]=pred$fit
x[,2]=pred$fit - 2 * pred$se.fit
x[,3]=pred$fit + 2 * pred$se.fit
x[,4]=pred$se.fit
dimnames (x) = list (NULL, list("prediction", "LI", "UI", "se"))
x
}
getMidPoints=function(x){
((c(0,x)+c(x,0))/2)[2:length(x)]
}
# from a list of fits, say lmer, inla fits, return formatted summary controlled by "type"
# for a matrix, return Monte Carlo variance
# random=TRUE returns variance components
# type=1: est
# type=2: est (se)
# type=3: est (2.5%, 97.5%)
# type=4: est se
getFormattedSummary=function(fits, type=1, est.digits=2, se.digits=2, random=FALSE){
# should not use mysapply here
t(mysapply(fits, function (fit) {
if (random) {
tmp = getVarComponent (fit)
if (class(fit)=="mer" & type!=1) {
warning ("only point estimate is available for variance components from lmer fit, forcing type to 1")
type=1
}
} else {
tmp = getFixedEf (fit)
}
if (type==1)
# this is the best way to format: first round, then nsmall
format(round(tmp[,1], est.digits), nsmall=est.digits, scientific=FALSE)
else if (type==2)
format(tmp[,1], digits=1, nsmall=est.digits, scientific=FALSE) %+% " (" %+%
format(tmp[,2], digits=1, nsmall=se.digits, scientific=FALSE) %+% ")"
else if (type==3)
format(tmp[,1], digits=1, nsmall=est.digits, scientific=FALSE) %+% " (" %+%
format(tmp[,3], digits=1, nsmall=est.digits, scientific=FALSE) %+% ", " %+%
format(tmp[,4], digits=1, nsmall=est.digits, scientific=FALSE) %+% ")"
else if (type==4)
# a space is inserted between est and se, they could be post-processed in swp
format(tmp[,1], digits=1, nsmall=est.digits, scientific=FALSE) %+% " " %+%
format(tmp[,2], digits=1, nsmall=se.digits, scientific=FALSE)
else
stop ("getFormattedSummaries(). type not supported: "%+%type)
}))
}
getVarComponent <- function(object, ...) UseMethod("getVarComponent")
getVarComponent.mer = function (lmer1) {
tmp=lme4::VarCorr(lmer1)
mysapply(tmp, function (comp) attr(comp, "stddev") )
}
getVarComponent.lme = function (lme.fit) {
VarCorr(lme.fit)
}
# used to get mean and sd from a jags or winbugs sample, getVarComponent.matrix and getFixedEf.matrix do the same thing
# each column of samples is a variable
getVarComponent.matrix = function (samples) {
t(apply(samples, 2, function (x) c("Estimate"=mean(x), "sd"=sd(x), "2.5%"=quantile(x,.025), "97.5"=quantile(x,.975))))
}
getFixedEf.matrix = function (samples) {
t(apply(samples, 2, function (x) c("Estimate"=mean(x), "sd"=sd(x), "2.5%"=quantile(x,.025), "97.5"=quantile(x,.975))))
}
# returns estimate of standard deviation and the estimated sd of that estimate
getVarComponent.hyperpar.inla = function (hyper1, transformation=NULL) {
marginals = hyper1$marginals
out = mysapply(1:length(marginals), function (i) {
# this is a little precarious, but hey
if (startsWith(names(marginals)[i],"Prec")) {
if (is.null (transformation)) {
getMeanSd(marginals[[i]],"inversesqrt")
} else {
getMeanSd(marginals[[i]],transformation)
}
} else if (startsWith(names(marginals)[i],"Rho")) {
hyper1$summary[i, c(1,2,3,5)]
} else {
stop ("don't know what to do with this names(marginals)[i]: "%+% names(marginals)[i] )
}
})
dimnames (out)[[1]]="sigma.or.rho."%+%dimnames (out)[[1]]
out
}
###########################################
# for use with INLA
###########################################
# dat is all positive and we want density to have positive support as well
# remember to transform the density!
get.density.boundary.corrected=function(dat) {
# first log transform dat to real line
if (min(dat)<0) return (density(dat))
else {
dat1=log(dat)
tmp=density(dat1)
list (x=exp(tmp$x), y=tmp$y/exp(tmp$x))
}
}
# return the mean, sd, CI of the transformed variable
getMeanSd=function (marginal, f="identity") {
# interpolations suggested by Havard: do it on the original scale
logtao=log(marginal[,1]); p.logtao=marginal[,2]*marginal[,1]
fun = splinefun(logtao, log(p.logtao))
h=0.001
x = seq(min(logtao),max(logtao),by=h)
pmf = exp(fun(x))*h
# sum (pmf) # Prob
# x = seq(min(logtao)-sd(logtao)/2,max(logtao)+sd(logtao)/2,by=h)
# pmf = exp(fun(x))*h
# sum (pmf) # Prob
# x = seq(min(logtao)-sd(logtao),max(logtao)+sd(logtao),by=h)
# pmf = exp(fun(x))*h
# sum (pmf) # Prob
# x = seq(min(logtao)-sd(logtao)*2,max(logtao)+sd(logtao)*2,by=h)
# pmf = exp(fun(x))*h
# sum (pmf) # Prob
lower.boundary = rle(cumsum(pmf)>.025)$lengths[1]+1
upper.boundary = rle(cumsum(pmf)>.975)$lengths[1]+1
# if (pmf[lower.boundary]>.04) {
# #stop ("getMeanSd(): pmf too large at lower boundary: "%+%pmf[lower.boundary])
# return (rep(NA, 4))
# }
# if (pmf[upper.boundary]>.04) {
# stop ("getMeanSd(): pmf too large at upper boundary"%+%pmf[upper.boundary])
# return (rep(NA, 4))
# }
if (f=="identity") {
func=function(x) { exp(x) }
} else if (f=="inverse") {
func=function(x) { exp(x)**-1 }
} else if (f=="inversesqrt") {
func=function(x) { exp(x)**-.5 }
} else
stop ("getMeanSd(): function not supported "%+%f)
mu = sum( pmf * func(x))
stdev = (sum( pmf * func(x)**2 ) - mu**2) ** .5
out = c("mean"=mu, "stddev"=stdev, 0,0 ) # we may run into problems with the boundary
#out = c("mean"=mu, "stddev"=stdev, sort(func(x)[c(lower.boundary, upper.boundary)]) );
names(out)[3]="2.5%"; names(out)[4]="97.5%";
out
}
get.inla.f.param=function(arg) {
if (arg$distr=="Gamma")
inla.f.param=c(arg$shape, arg$rate)
else if (arg$distr=="2DWishart")
inla.f.param=c(arg$r, arg$R[1,1], arg$R[2,2], arg$R[1,2])
else if (arg$distr=="3DWishart")
inla.f.param=c(arg$r, arg$R[1,1], arg$R[2,2], arg$R[3,3], arg$R[1,2], arg$R[1,3], arg$R[2,3])
else
stop ("get.inla.f.param(). distr not supported: "%+%arg$distr)
inla.f.param
}
setClass("data.model", representation(
# attributes
name="character"
, dat="data.frame"
, is.sim="logical"
, data.sim.function="function"
, data.sim.function.default.call="expression" # useful for running jags
# for BUGS and JAGS
, gibbs.priors="list"
, gibbs.additional.data="list"
, gibbs.init="list"
, gibbs.chains="numeric"
, gibbs.n.iter="numeric"
, gibbs.watch.list="array"
, gibbs.samples="list"
# for INLA
, inla.model="ANY" #if formula is specified here, then it cannot be NULL, same goes for all models
, inla.pre.call="expression"
, inla.call="expression"
, inla.fit="ANY"
, hyper.fit="ANY"
# for GLM
, glm.model="ANY"
, glm.call="expression"
, glm.fit = "ANY"
# for LME
, lme.model="ANY"
, lme.call="expression"
, lme.fit="ANY"
# for LMER
, lmer.model="ANY"
, lmer.call="expression"
, lmer.fit="ANY"
# for glmmPQL
, glmmPQL.call="expression"
, glmmPQL.fit="ANY"
)
, prototype = list(
# default values
gibbs.chains=3
, is.sim=FALSE
, inla.pre.call=expression(print("default inla.pre.call, do nothing"))
, gibbs.samples=list()
)
)
getSamplesFileName=function (data.model,seed,chain,prior){
seedstr=ifelse(is.null(seed) | is.na(seed),"","_seed"%+%seed)
data.model%+%"/"%+%data.model%+%"_prior"%+%prior%+%"_chain"%+%chain%+%seedstr%+%".jags"
}
# plot marginal distribution from jags and inla
plot.marginals=function (.data.model, seed=NA) {
# first combine samples
samples = NULL
for (i in 1:.data.model@gibbs.chains) samples=rbind(.data.model@gibbs.samples[[i]])
seedstr=""
if (.data.model@is.sim) seedstr="_seed"%+%seed
mypostscript(file=.data.model@name%+%"_prior"%+%prior%+%seedstr%+%".eps" )
for (i in 1:length(.data.model@gibbs.watch.list)) {
# this is a little precarious, but hey
if (startsWith(.data.model@gibbs.watch.list[i],"sigma")) {
jags.marginal = get.density.boundary.corrected ( 1/samples[,i]**2 )
} else if (startsWith(.data.model@gibbs.watch.list[i],"rho")) {
jags.marginal = get.density.boundary.corrected ( samples[,i] )
} else if (startsWith(.data.model@gibbs.watch.list[i],"log")) {
jags.marginal = get.density.boundary.corrected ( samples[,i] )
} else if (.data.model@gibbs.watch.list[i]=="z") {
jags.marginal = get.density.boundary.corrected ( samples[,i] )
} else {
next; # only do variance component parameters
}
hyper.marginal=.data.model@hyper.fit$marginals[[i]]
# sometimes it is necessary to do transformation
if (startsWith(.data.model@gibbs.watch.list[i],"log")) {
hyper.marginal = cbind(log(hyper.marginal[,1]), hyper.marginal[,2]*hyper.marginal[,1])
} else if (.data.model@gibbs.watch.list[i]=="z") {
hyper.marginal = cbind(logit(.5+hyper.marginal[,1]/2), hyper.marginal[,2]*(.5-hyper.marginal[,1]**2/2))
} else {
hyper.marginal = hyper.marginal
}
plot(jags.marginal$x, jags.marginal$y, type="l", xlab="", ylab="density", ylim=c(0,max(jags.marginal$y,hyper.marginal[,2])))
#, xlim=range(c(marginal2[,1])) )
lines(hyper.marginal[,1], hyper.marginal[,2], col=3)
# plot(hyper.marginal[,1], hyper.marginal[,2], col=3, type="l", xlab="", ylab="density")
# lines(jags.marginal$x, jags.marginal$y )
legend(legend=c("mcmc","hyperpar"), col=c(1,3),x="topright", inset=0, bty="o", lty=1, cex=.6)
title(main=.data.model@gibbs.watch.list[i])
#title(main="n="%+%a$n%+%", T="%+%a$T%+%", tao="%+%(1/a$sigma2)%+%", prior=Ga("%+%inla.f.param[1]%+%", rate="%+%inla.f.param[2]%+%")")
}
dev.off()
}
load.jags.samples=function (.data.model, prior,seed=NA) {
for (i in 1:.data.model@gibbs.chains) {
load(getSamplesFileName(.data.model@name,seed,i,prior))
.data.model@gibbs.samples[[i]] = jags.samples.tmp[,.data.model@gibbs.watch.list,drop=FALSE]
print(getVarComponent(.data.model@gibbs.samples[[i]]))
}
.data.model
}
# calculate entropy
# p can be count vector or probability vector, but not a vector of membership indicator
H=function (p) {
if (sum(p)!=1) p=p/sum(p) # if p is count, transform to probability
p=p[p!=0] # remove zero entries
sum(-p*log2(p))
}
# each row is one observation in euclidean space
sum.of.square<-withinss<-function(x) {
mean1 = colMeans(x)
ss1 = sum( apply(x, 1, function(x) sum((x-mean1)**2) ) )
ss1
}
# remove file extension from file name
rmExt = function(name){
substr(name, 1, lastIndex(name, ".")-1 )
}
getExt = function(name){
substr(name, lastIndex(name, ".")+1, nchar(name) )
}
########################################
# protein sequence functions
########################################
readFastaFile = function (fileName, sep=" "){
fastaFile = file(fileName, "r")
sequences = list()
line.text=readLines(fastaFile,1)
name = NULL
while (length(line.text)>0) {
if (substr(line.text, 1,1)==">") {
name = line.text
name = substr(strsplit (name, sep)[[1]][1], 2, 1000)
temp.seq = ""
line.text = readLines(fastaFile,1)
while ( length(line.text)>0 ) {
if (substr(line.text, 0, 1) == ">") break;
temp.seq = temp.seq %+% line.text
line.text = readLines(fastaFile,1)
}
sequences[[name]] = temp.seq
} else {
print ("sth wrong")
}
}
close(fastaFile)
sequences
}
writeFastaFile=function (seqList, fileName) {
outFile=file (fileName, open="w")
for (i in 1:length(seqList)){
write (file=outFile, ">"%+%names(seqList)[i], append=T)
write (file=outFile, seqList[[i]], append=T)
}
close(outFile)
}
aaList=c("A","C","D","E","F","G","H","I","K","L","M","N","P","Q","R","S","T","V","W","Y","-")
# seq1 is a string of amino acids
aa2arabic=function (seq1) {
temp = strsplit(seq1, "|")[[1]] # separate every character
out = sapply(temp, function (i) {
tmp=which (aaList==toupper(i));
ifelse(length(tmp)==0, 21, tmp)
} )
names(out)=NULL
out
}
string2arabic=function (stringList) {
# fasta is a list of aa sequences, returns a matrix of arabic numbers representing aa
# if out is not what you expect, maybe the strings don't have the same length
out=mysapply(stringList, aa2arabic)
dimnames(out)[[2]]=1:ncol(out)
out
}
# remove gap from a seq
removeGap = function (seq) {
tmp=aa2arabic(seq)
tmp1=tmp[tmp!=21]
concatList(aaList[tmp1])
}
fastaFile2arabicFile=function(fastaFile, arabicFile, removeGapMajor=F){
stringList = readFastaFile(fastaFile)
stringList2arabicFile(stringList, arabicFile, removeGapMajor)
}
selexFile2arabicFile=function(selexFile, arabicFile, removeGapMajor=F){
stringList = readSelexFile(selexFile)
stringList2arabicFile(stringList, arabicFile, removeGapMajor)
}
stringList2arabicFile=function (stringList, arabicFile, removeGapMajor=F) {
alignment=string2arabic(stringList)
# remove columns that are predominantly gaps
if (removeGapMajor) {
tmpCount = alignment2count(alignment, level=21)
alignment=alignment[,tmpCount[,21]<10]
}
arabic2arabicFile (alignment, arabicFile)
}
# alignment is a matrix of arabic representation of sequences (1 based)
arabic2arabicFile=function (alignment, arabicFile) {
# adjust to 0 based index before writing to file
alignment=alignment-1
n=nrow(alignment)
p=ncol(alignment)
m=max(alignment)
write (c(n, p, m), file=arabicFile)
write (t(alignment), file=arabicFile, ncolumns=p, append=T)
invisible(alignment+1)
}
# return a list of strings
readSelexFile=function (fileName) {
fileCon = file(fileName, open = "r")
line1 = readLines(fileCon, 1)
while (startsWith(line1, "#") | line1=="") line1=readLines(fileCon, 1)
seqs=list()
while(length(line1)>0){
tmp = strsplit(line1, " ")[[1]]
seqs[[tmp[1]]] = tmp[length(tmp)]
line1 = readLines(fileCon, 1)
}
close (fileCon)
seqs
}
readSelexAsMatrix=function (fileName="SETpfamseed/seq/SETpfamseed_aligned.selex") {
fileCon = file(fileName, open = "r")
outM=NULL
names=NULL
while(T){
seqName = scan (fileCon, what="character", n=1)
seq = scan (fileCon, what="character", n=1)
if (length(seqName)>0) {
outM=rbind(outM, strsplit(seq,"")[[1]])
names=c(names, seqName)
}else{
break;
}
}
close (fileCon)
dimnames(outM)=list(names, NULL)
outM
}
# alignment is a matrix of n row and p column arabic numbers representing amino acids
arabic2fastaFile=function (alignment, fileName){
outFile=file (fileName, open="w")
name=rownames(alignment)
if (is.null(name)) name=1:nrow(alignment)
for (i in 1:nrow(alignment)){
write (file=outFile, ">"%+%name[i], append=T)
write (file=outFile, concatList(aaList[alignment[i,]]), append=T)
}
close(outFile)
}
# mY is a n by T matrix
# return T by 20 matrix
alignment2count=function (mY, level=20, weight=rep(1,nrow(mY))){
#t(sapply(1:ncol(mY), function (i) table(factor(mY[,i], levels=1:level)) ))
n=nrow(mY)
T=ncol(mY)
out=matrix(0,T,level)
for (t in 1:T)
for (i in 1:n) {
aa=mY[i,t]
if (aa<=level) out[t,aa] = out[t,aa] + weight[i]
}
out
}
# return a T by 4 matrix, each row is the count of MM, MD, DM, DD for each position
alignment2trancount=function (alignment, weight=rep(1,nrow(mY))){
n=nrow(alignment)
counts=matrix(0, ncol(alignment), 4)
for (i in 1:n) {
x=alignment[i,,drop=F]
if(x[1]!=21) counts[1,1]=counts[1,1]+weight[i]
else counts[1,2]=counts[1,2]+weight[i]
if (ncol(alignment)>1) {
for (t in 2:ncol(alignment)) {
if (x[t-1]!=21)
if(x[t]!=21) counts[t,1]=counts[t,1]+weight[i]
else counts[t,2]=counts[t,2]+weight[i]
else
if(x[t]!=21) counts[t,3]=counts[t,3]+weight[i]
else counts[t,4]=counts[t,4]+weight[i]
}
}
}
counts
}
# return a matrix of nxp alignment
readArabicFile = function (fileName) {
infile = file (fileName, open="r")
n=scan (infile, n=1, quiet=T)
p=scan (infile, n=1, quiet=T)
m=scan (infile, n=1, quiet=T)
out = mysapply (1:n, function (i) as.numeric( strsplit(readLines(infile, n=1), " ")[[1]] ) )
close(infile)
invisible (out+1)
}
calcPairwiseIdentity = function (alignment, dissimilarity, removeGap) {
n=nrow(alignment)
if (n>2) {
out=matrix(0, nrow=n, ncol=n)
for (i in 1:n)
for (j in 1:n)
out[i,j]=calcPairwiseIdentity(alignment[c(i,j),], dissimilarity, removeGap)
out
} else {
if (removeGap) {
tmpAlign=alignment[,!(alignment[1,]==21 & alignment[2,]==21)] # remove positions that are both gaps
} else {
tmpAlign=alignment
}
tmp=mean(tmpAlign[1,]==tmpAlign[2,])
tmp=(tmp*100)
if (dissimilarity) 100-tmp
else tmp
}
}
readBlockFile=function (fileName) {
file1= file (fileName, "r")
n=scan (file1, n=1, quiet=T)
p=scan (file1, n=1, quiet=T)
M=scan (file1, n=1, quiet=T)
out = matrix(scan (file1, quiet=T), nrow=n, byrow=T)+1
close(file1)
out
}
# generate random number from (generalized) Bernoulli dist
# if generalized, output is 1 based, if not, output is 1 or 0
rbern=function(n, prob) {
if (length(prob)==1) generalized=FALSE else generalized=TRUE
if (!generalized) prob=c(1-prob, prob)
x=rmultinom(n, 1, prob)
out = apply(x, 2, function(y) which(y==1))
if (!generalized) out=out-1
out
}
dbern=function(x,prob,log=FALSE){
out=ifelse(x==1, prob, 1-prob)
ifelse(log, log(out), out)
}
# correlated Bernoulli
dcorbern=function(x, p, a, log=FALSE){
out=dbern(x[1],p)
if (length(x)>1) {
for (i in 2:length(x)) {
out = out * dbern(x[i], p+a*(x[i-1]-p))
}
}
ifelse(log, log(out), out)
}
Breslow.Thomas2 = function (dat, imputation.model, interest.model, strata.formula, subset) {
require(survival)
require(survey)
# if we try to use interest.model directly, something goes wrong with the resid call below
tmp = as.character(interest.model)
interest.model.str = paste(tmp[2],"~",tmp[3])
#step 1 predict missing covariates
dstrat<-twophase(id=list(~1,~1),strata=list(NULL,strata.formula),subset=subset,data=dat)
fit.step1 = svyglm(imputation.model, design=dstrat)
dat.step1 = dat
dat.step1$s <- predict(fit.step1,type="response",newdata=dat,se=F)
# step 2 fit augmented dataset with risk model to get auxiliary variable: dfbeta
calmodel<-coxph(as.formula(interest.model.str), data=dat.step1 )
db = resid(calmodel,"dfbeta", data=dat.step1)+1 # this step needs the risk.model from last line to be "inline",
# otherwise it has trouble finding dat.step1
colnames(db)<-paste("db",1:ncol(db),sep="")
datDB = cbind(dat, db)
dstrt<-twophase(id=list(~1,~1),strata=list(NULL,strata.formula),subset=subset,data=datDB)
# step 3 IPW fitting
dcal<-calibrate(dstrt,formula=make.formula(colnames(db)),pop=c(`(Intercept)`=nrow(dat),colSums(db)),calfun="raking",eps=0.0001)
cal<-svycoxph(as.formula(interest.model.str), design=dcal)
}
## example for using the above function
#dat=read.table("
http://youtil.googlecode.com/files/cwcoxexample.txt", header=T)
#
## unweighted, wrong
#cox.fit = coxph (Surv(X,d) ~ z + s + z:s, dat)
#coef(cox.fit)
#
## simple weighted, less efficient
#dstrat<-twophase(id=list(~1,~1),strata=list(NULL,~d),subset=~indicators,data=dat)
#wcox.fit = svycoxph(Surv(X,d) ~ z + s + z:s, design=dstrat)
#wcox.fit
#
## calibration weighted, more efficient
#bt.fit = Breslow.Thomas2 (dat, imputation.model=s~w, interest.model=Surv(X,d) ~ z + s + z:s, strata.formula=~d, subset=dat$indicators)
#bt.fit
roundup=function (value, digits) {
format(round(value, digits), nsmall=digits, scientific=FALSE)
}
# from Cai Tianxi
## count how many YY's are smaller or equal to yy
N.L.E <- function(yy, YY) ## sum I(YY <= yy[i])
{
rank(c(yy+1e-8,YY))[1:length(yy)] - rank(yy) ### add a small pertubation to avoid ties when calculating rank
}
N.L <- function(yy, YY) ## sum I(YY < yy[i])
{
rank(c(yy-1e-8,YY))[1:length(yy)] - rank(yy) ### add a small pertubation to avoid ties when calculating rank
}
N.G.E <- function(yy, YY) ## sum I(YY >= yy[i])
{
length(YY)-(rank(c(yy-1e-8,YY))[1:length(yy)] - rank(yy))
}
### modified from Lumley
computeROC<-function(TT,D){
# removing na
D=D[!is.na(TT)]
TT=TT[!is.na(TT)]
cutpoints<-c(-Inf,sort(unique(TT)))
sensitivity<-sapply(cutpoints,
function(ci) mean(TT>ci & D)/mean(D))
specificity<-sapply(cutpoints,
function(ci) mean(TT<=ci & !D)/mean(!D))
list(sensitivity=sensitivity, specificity=specificity)
}
plotROC<-function(stuff,...){
plot(1-stuff$specificity, stuff$sensitivity, ...)
}
addROC<-function(stuff,...){
lines(1-stuff$specificity,stuff$sensitivity,...)
}
#### mine
calAUC<-function(stuff){
#### TPR and FPR must be in decreasing order
TPR<-stuff$sensitivity
FPR<-1-stuff$specificity
if ((length(TPR)==1) & (is.na(TPR[1])))
out<-NA else {
len<-length(TPR)
TPRl<-TPR[1:(len-1)]
TPRu<-TPR[-1]
diffFPR<-diff(FPR)
out<-sum((TPRl+TPRu)*diffFPR/(-2))
}
return(out)
}
# next function copied from DiagnosisMed, had to modify it
ROC=function (gold, test, CL = 0.95, Cost = 1, Prevalence = 0, Plot = TRUE,
Plot.point = "Min.ROC.Dist", p.cex = 1, Full = FALSE, Print = TRUE)
{
if (any(is.na(test) | is.na(gold))) {
stop("It seems there are NAs either in the index test or in the reference test. Consider imputing or removing NAs!")
}
test.table <- table(test, gold)
if (dim(test.table)[2] != 2) {
stop("It seems that your gold standard has more than 2 categories")
}
CL <- CL
cost <- Cost
sample.size <- sum(test.table)
sample.prevalence <- (sum(test.table[, 2])/sample.size)
if (Prevalence == 0) {
pop.prevalence <- sample.prevalence
}
if (Prevalence > 0) {
(pop.prevalence <- Prevalence)
}
if (is.numeric(gold) == TRUE) {
X <- sort(test[gold == 0])
Y <- sort(test[gold == 1])
AUC <- ((as.double(length(test[gold == 0]))) * (as.double(length(test[gold ==
1]))) + ((as.double(length(test[gold == 0]))) * ((as.double(length(test[gold ==
0]))) + 1))/2 - sum(rank(test, ties.method = "average")[gold ==
0]))/((as.double(length(test[gold == 0]))) * (as.double(length(test[gold ==
1]))))
AUC[AUC < 0.5] <- 1 - AUC
}
if (is.factor(gold) == TRUE) {
X <- sort(test[gold == "negative"])
Y <- sort(test[gold == "positive"])
AUC <- ((as.double(length(test[gold == "negative"]))) *
(as.double(length(test[gold == "positive"]))) + ((as.double(length(test[gold ==
"negative"]))) * ((as.double(length(test[gold ==
"negative"]))) + 1))/2 - sum(rank(test, ties.method = "average")[gold ==
"negative"]))/((as.double(length(test[gold == "negative"]))) *
(as.double(length(test[gold == "positive"]))))
AUC[AUC < 0.5] <- 1 - AUC
}
m <- as.double(length(X))
n <- as.double(length(Y))
test.summary <- round(c(summary(test), sd(test)), digits = 5)
test.summary <- rbind(test.summary, round(c(summary(X), sd(X)),
digits = 5))
test.summary <- rbind(test.summary, round(c(summary(Y), sd(Y)),
digits = 5))
colnames(test.summary) <- c("Min.", "1st Qu.", "Median",
"Mean", "3rd Qu.", "Max.", "SD")
rownames(test.summary) <- c("Overall summary", "Without disease",
"With disease")
D10X <- function(Xi) {
(1/n) * sum(Y >= Xi[1])
}
D01Y <- function(Yi) {
(1/m) * sum(Yi[1] >= X)
}
VAR.AUC <- sum((tapply(X, X, "D10X") - AUC)^2)/(m * (m -
1)) + sum((tapply(Y, Y, "D01Y") - AUC)^2)/(n * (n - 1))
SD.AUC <- sqrt(VAR.AUC)
alpha <- 1 - CL
AUC.summary <- c(AUC - qnorm(1 - alpha/2) * SD.AUC, AUC,
AUC + qnorm(1 - alpha/2) * SD.AUC)
D <- sum(test.table[, 2])
ND <- sum(test.table[, 1])
test.values <- (as.numeric(rownames(unclass(test.table))))
test.diag.table <- as.data.frame(test.values)
for (i in 1:nrow(test.diag.table)) {
test.diag.table$TP[i] <- sum(test.table[i:nrow(test.table),
2])
test.diag.table$FN[i] <- sum(test.table[1:i - 1, 2])
test.diag.table$FP[i] <- sum(test.table[i:nrow(test.table),
1])
test.diag.table$TN[i] <- sum(test.table[1:i - 1, 1])
}
test.diag.table$Sensitivity <- round(test.diag.table$TP/D,
digits = 4)
test.diag.table$Se.inf.cl <- round(binom.wilson(test.diag.table$TP,
D, conf.level = CL)[4]$lower, digits = 4)
test.diag.table$Se.sup.cl <- round(binom.wilson(test.diag.table$TP,
D, conf.level = CL)[5]$upper, digits = 4)
test.diag.table$Specificity <- round(test.diag.table$TN/ND,
digits = 4)
test.diag.table$Sp.inf.cl <- round(binom.wilson(test.diag.table$TN,
ND, conf.level = CL)[4]$lower, digits = 4)
test.diag.table$Sp.sup.cl <- round(binom.wilson(test.diag.table$TN,
ND, conf.level = CL)[5]$upper, digits = 4)
test.diag.table$PPV <- round(test.diag.table$TP/(test.diag.table$TP +
test.diag.table$FP), digits = 4)
test.diag.table$PPV.inf.cl <- round(binom.wilson(test.diag.table$TP,
(test.diag.table$TP + test.diag.table$TP), conf.level = CL)[4]$lower,
digits = 4)
test.diag.table$PPV.sup.cl <- round(binom.wilson(test.diag.table$TP,
(test.diag.table$TP + test.diag.table$FN), conf.level = CL)[5]$upper,
digits = 4)
test.diag.table$NPV <- round(test.diag.table$TN/(test.diag.table$TN +
test.diag.table$FN), digits = 4)
test.diag.table$NPV.inf.cl <- round(binom.wilson(test.diag.table$TN,
(test.diag.table$TN + test.diag.table$FN), conf.level = CL)[4]$lower,
digits = 4)
test.diag.table$NPV.sup.cl <- round(binom.wilson(test.diag.table$TN,
(test.diag.table$TN + test.diag.table$FN), conf.level = CL)[5]$upper,
digits = 4)
test.diag.table$PLR <- round(test.diag.table$Sensitivity/(1 -
test.diag.table$Specificity), digits = 2)
test.diag.table$PLR.inf.cl <- round(exp(log(test.diag.table$PLR) -
(qnorm(1 - ((1 - CL)/2), mean = 0, sd = 1)) * sqrt((1 -
test.diag.table$Sensitivity)/((D) * test.diag.table$Specificity) +
(test.diag.table$Specificity)/((ND) * (1 - test.diag.table$Specificity)))),
digits = 2)
test.diag.table$PLR.sup.cl <- round(exp(log(test.diag.table$PLR) +
(qnorm(1 - ((1 - CL)/2), mean = 0, sd = 1)) * sqrt((1 -
test.diag.table$Sensitivity)/((D) * test.diag.table$Specificity) +
(test.diag.table$Specificity)/((ND) * (1 - test.diag.table$Specificity)))),
digits = 2)
test.diag.table$NLR <- round((1 - test.diag.table$Sensitivity)/test.diag.table$Specificity,
digits = 2)
test.diag.table$NLR.inf.cl <- round(exp(log(test.diag.table$NLR) -
(qnorm(1 - ((1 - CL)/2), mean = 0, sd = 1)) * sqrt((test.diag.table$Sensitivity)/((D) *
(1 - test.diag.table$Sensitivity)) + (1 - test.diag.table$Specificity)/((ND) *
(test.diag.table$Specificity)))), digits = 2)
test.diag.table$NLR.sup.cl <- round(exp(log(test.diag.table$NLR) +
(qnorm(1 - ((1 - CL)/2), mean = 0, sd = 1)) * sqrt((test.diag.table$Sensitivity)/((D) *
(1 - test.diag.table$Sensitivity)) + (1 - test.diag.table$Specificity)/((ND) *
(test.diag.table$Specificity)))), digits = 2)
test.diag.table$Accuracy <- (test.diag.table$TN + test.diag.table$TP)/sample.size
test.diag.table$DOR <- ((test.diag.table$TN) * (test.diag.table$TP))/((test.diag.table$FP) *
(test.diag.table$FN))
test.diag.table$DOR <- ifelse(test.diag.table$DOR == Inf,
NA, test.diag.table$DOR)
test.diag.table$Error.rate <- ((test.diag.table$FP) + (test.diag.table$FN))/sample.size
test.diag.table$Accuracy.area <- ((test.diag.table$TP) *
(test.diag.table$TN))/(D * ND)
test.diag.table$Max.Se.Sp <- test.diag.table$Sensitivity +
test.diag.table$Specificity
test.diag.table$Youden <- test.diag.table$Sensitivity + test.diag.table$Specificity -
1
test.diag.table$Se.equals.Sp <- abs(test.diag.table$Specificity -
test.diag.table$Sensitivity)
test.diag.table$MinRocDist <- (test.diag.table$Specificity -
1)^2 + (1 - test.diag.table$Sensitivity)^2
test.diag.table$Efficiency <- (test.diag.table$Sensitivity *
(pop.prevalence)) + ((1 - (pop.prevalence)) * test.diag.table$Specificity)
test.diag.table$MCT <- (1 - (pop.prevalence)) * (1 - test.diag.table$Specificity) +
(cost * (pop.prevalence)) * (1 - test.diag.table$Sensitivity)
test.best.cutoff <- as.data.frame(rbind(test.diag.table[which.max(test.diag.table$Accuracy),
c(1, 6:11, 18:20)], test.diag.table[which.max(test.diag.table$DOR),
c(1, 6:11, 18:20)], test.diag.table[which.min(test.diag.table$Error.rate),
c(1, 6:11, 18:20)], test.diag.table[which.max(test.diag.table$Accuracy.area),
c(1, 6:11, 18:20)], test.diag.table[which.max(test.diag.table$Max.Se.Sp),
c(1, 6:11, 18:20)], test.diag.table[which.max(test.diag.table$Youden),
c(1, 6:11, 18:20)], test.diag.table[which.min(test.diag.table$Se.equals.Sp),
c(1, 6:11, 18:20)], test.diag.table[which.min(test.diag.table$MinRocDist),
c(1, 6:11, 18:20)], test.diag.table[which.max(test.diag.table$Efficiency),
c(1, 6:11, 18:20)], test.diag.table[which.min(test.diag.table$MCT),
c(1, 6:11, 18:20)]))
# rownames(test.best.cutoff) <- c("Max. Accuracy", "Max. DOR",
# "Min. Error rate", "Max. Accuracy area", "Max. Sens+Spec",
# "Max. Youden", "Se=Sp", "Min. ROC distance", "Max. Efficiency",
# "Min. MCT")
reteval <- list(pop.prevalence = pop.prevalence, sample.size = sample.size,
sample.prevalence = sample.prevalence, test.summary = test.summary,
AUC.summary = AUC.summary, test.table = test.table, test.best.cutoff = test.best.cutoff,
test.diag.table = test.diag.table, CL = CL, cost = cost)
class(reteval) <- "ROC"
if (Print == TRUE) {
if (Full == TRUE) {
print(reteval, Full = TRUE)
}
else {
print(reteval)
}
}
if (Plot == TRUE) {
plot(reteval, Plot.point = Plot.point, p.cex = p.cex)
}
invisible(reteval)
}
# dat is a n x k matrix
myplot1=function(dat, ylab="", group.lab, main="", point=F, cex=.5){
k=ncol(dat)
x=rep(1:k, each=nrow(dat))
pch=letters[1:nrow(dat)]
boxplot(c(dat)~x, range=0, ylab=ylab, border="white", xlab="", xaxt="n")
for (i in 1:nrow(dat)) {
for (j in 1:(k-1))
lines (j:(j+1), dat[i,j:(j+1)], lwd=.25, col=ifelse(dat[i,j]<=dat[i,j+1],"lightgray","lightpink"))
}
axis(side=1, at=1:k, labels=group.lab)
title(main=main)
if (point) points(x, c(dat), pch=pch, cex=cex)
}
# both dat and dat2 need to be matrix
# if dat2 is null: dat is matrix with two columns. each row is one subject, the columns will be plotted side by side, with lines connecting values from one ptid
# if dat2 is not null, dat has one column, same for dat2
# if add is true, no plot function will be called
my.interaction.plot=function(dat, dat2=NULL, x.ori=0, xaxislabels=rep("",2), cex.axis=1, add=F, xlab="", ylab="", pcol=NULL, lcol=NULL, ...){
if (!add) plot(0,0,type="n",xlim=c(1,2),ylim=range(dat), ylab=ylab, xlab=xlab, xaxt="n", ...)
cex=.25; pch=19
if (is.null(dat2)) {
if (is.null(lcol)) lcol=ifelse(dat[,1]>dat[,2],"red","black")
for (i in 1:nrow(dat)) {
points (1+x.ori, dat[i,1], cex=cex, pch=pch, col=ifelse(is.null(pcol), 1, pcol[i,1]))
points (2+x.ori, dat[i,2], cex=cex, pch=pch, col=ifelse(is.null(pcol), 1, pcol[i,2]))
lines (1:2+x.ori, dat[i,], lwd=.25, col=lcol[i])
}
} else {
points (rep(1+x.ori, nrow(dat)), dat[,1], cex=cex, pch=pch)
points (rep(2+x.ori, nrow(dat2)), dat2[,1], cex=cex, pch=pch)
}
axis(side=1, at=1:2+x.ori, labels=xaxislabels, cex.axis=cex.axis)
}
# make a boxplot without boxes
# this is a widget, that is plot has to be called first
# dat is a data.frame
noboxplot=function(formula, data, cex=.5, ylab="", xlab=""){
boxplot(formula, data, range=0, border="white", ylab=ylab, xlab=xlab)
tmp=model.frame(formula, data)
points(jitter(as.numeric(as.factor(tmp[,2]))), tmp[,1], cex=cex)
}
# called butterfly.plot, because it is meant to plot two treatment arms at two time points, the two arms are plotted in a mirror fashion, see "by analyte.pdf" for an example
# if dat2 is null: dat is matrix with four columns. each row is one subject, the columns will be plotted side by side, with lines connecting values from one ptid
# if dat2 is not null, dat has two columns, which are plotted side by side with lines connecting them, same for dat2
# if add is true, no plot function will be called
butterfly.plot=function (dat, dat2=NULL, add=FALSE, xaxislabels=rep("",4), x.ori=0, xlab="", ylab="", cex.axis=1, ...){
if (!add) plot(0,0,type="n",xlim=c(1,4),ylim=range(dat), xaxt="n", xlab=xlab, ylab=ylab, ...)
for (i in 1:nrow(dat)) {
lines (1:2+x.ori, dat[i,1:2], lwd=.25, col=ifelse(dat[i,1]<=dat[i,2],"red","black"))
if (is.null(dat2)) {
lines (2:3+x.ori, dat[i,2:3], lwd=.25, col="lightgreen")
lines (3:4+x.ori, dat[i,3:4], lwd=.25, col=ifelse(dat[i,3]<=dat[i,4],"black","red"))
}
}
if (!is.null(dat2)) {
for (i in 1:nrow(dat2)) {
lines (3:4+x.ori, dat2[i,1:2], lwd=.25, col=ifelse(dat2[i,1]<=dat2[i,2],"black","red"))
}
}
axis(side=1, at=1:4+x.ori, labels=xaxislabels, cex.axis=cex.axis)
}
# dat is a matrix of two columns
merge.overlap=function(dat) {
out=NULL
cursor=1
j=0
for (i in 1:nrow(dat)) {
if (dat[i,1]<=cursor+1) {
# overlapping out
out[j,2]=dat[i,2]
cursor=dat[i,2]
} else {
# new epitope
out=rbind (out, c(dat[i,1], dat[i,2]))
cursor=dat[i,2]
j=j+1
}
}
out
}
## From the R-help e-mail by Ted Harding:
http://tolstoy.newcastle.edu.au/R/e2/help/07/03/12853.html
## See also
http://tolstoy.newcastle.edu.au/R/help/05/05/4254.html
pava <- function(x, wt = rep(1, length(x)))
{
n <- length(x)
if (n <= 1) return(x)
lvlsets <- 1:n
repeat
{
viol <- (as.vector(diff(x)) < 0)
if (!(any(viol))) break
i <- min((1:(n-1))[viol])
lvl1 <- lvlsets[i]
lvl2 <- lvlsets[i+1]
ilvl <- ( (lvlsets == lvl1) | (lvlsets == lvl2) )
x[ilvl] <- sum(x[ilvl] * wt[ilvl]) / sum(wt[ilvl])
lvlsets[ilvl] <- lvl1
}
x
}
# the integratd density of a normal random variable, whose mean and precision follow a normal gamma distribution. It is a three-parameter t distribution.
# keywords: normgamma
# when x has length greater than 1 and same.distr is true, the data are considered to be from the same mean and variance
dnorm.norm.gamma = function(x, p, same.distr=FALSE, log=FALSE) {
mu.0=p[1]; lambda=p[2]; a=p[3]; beta=p[4]
n=length(x)
if (!same.distr) {
if (n>1) {
mu.0=rep(mu.0, n)
lambda=rep(lambda, n)
a=rep(a, n)
beta=rep(beta, n)
}
ll= 1/2*log(lambda/(2*pi*(1+lambda))) + log(gamma(a+1/2)/gamma(a)) + a*log(beta) - (a+1/2)*log(beta+lambda*(x-mu.0)**2/(2*(1+lambda)))
} else{
s2=var(x)
ll= 1/2*log(lambda/((2*pi)**n*(n+lambda))) + log(gamma(a+n/2)/gamma(a)) + a*log(beta) - (a+n/2)*log(beta + n*s2/2 +n*lambda*(mean(x)-mu.0)**2/(2*(1+lambda)))
}
ll=unname(ll)
ifelse(log, ll, exp(ll))
}
# simulation samples from a normal random variable, whose mean and precision follow a normal gamma distribution
rnorm.norm.gamma = function(n, mu.0, lambda, alpha, beta) {
tao=rgamma(n, shape=alpha, rate=beta)
mu=rnorm(n, mean=mu.0, sd=sqrt(1/(lambda*tao)))
rnorm(n, mean=mu, sd=tao**-.5)
}
# simulate correlated normal random variables, correlation is sqrt(alpha)
rnorm.cor = function (n, mu, sd, alpha) {
out=numeric(n)
out[1]=rnorm(1, mu, sd)
if (n>1)
for (i in 2:n) {
out[i]=sqrt(alpha)*out[i-1] + sqrt(1-alpha)*rnorm(1, mu, sd)
}
out
}
# copied from pairs help page
## put (absolute) correlations on the upper panels,
## with size proportional to the correlations.
panel.cor <- function(x, y, digits=2, prefix="", cex.cor, method="pearson", ...)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r <- abs(cor(x, y, method=method))
txt <- format(c(r, 0.123456789), digits=digits)[1]
txt <- paste(prefix, txt, sep="")
if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
text(0.5, 0.5, txt, cex = cex.cor * r)
}
panel.hist <- function(x, ...)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(usr[1:2], 0, 1.5) )
h <- hist(x, plot = FALSE)
breaks <- h$breaks; nB <- length(breaks)
y <- h$counts; y <- y/max(y)
rect(breaks[-nB], 0, breaks[-1], y, col="cyan", ...)
}
mypairs=function(dat, method="pearson"){
pairs(dat, lower.panel=panel.smooth, upper.panel=panel.cor, method=method, diag.panel=panel.hist)
}
# if lty is specified, a line will be drawn
mylegend=function(legend, x, lty=1, ...) {
x=switch(x, "topleft", "top", "topright", "left", "center" , "right", "bottomleft", "bottom", "bottomright")
legend(bty="n",x=x, legend=legend, lty=lty, ...)
}
# exp.z.beta is a vector
rltm=function (n, model, exp.z.beta=1, la0=1) {
x=runif(n)
if (model=="PH") {
ft = -log(1-x) / (exp.z.beta*la0)
} else if (model=="PO") {
ft = log( 1+x/(1-x)/exp.z.beta ) / la0
} else if (model=="P2") {
ft = log( 1+ (1/(1-x)^2-1)/exp.z.beta ) /2/la0
} else {
stop ("model not supported")
}
ft
}
# mixture normal density funtion
# mix.p: proportion of first component
dmixnorm=function (x, mix.p, sd1, sd2, log=FALSE){
out=mix.p*dnorm (x,0,sd1) + (1-mix.p)*dnorm (x,0,sd2)
if(log) out=log(out)
out
}
getFileStem=function(file.name){
strsplit(file.name,"\\.")[[1]][1]
}
# get first prinipal component
pr.1=function(x){
x.s=scale(x)
pr.s = prcomp(x.s)
out=c(x.s %*% pr.s$rotation[,1])
attr(out, "rotation")=pr.s$rotation[,1]
out
}