################################################################ get_medpolish_res <- function(target.pm,ps.n=11){ require(preprocessCore) ##require(aroma.light) tmp.pm.ps <- matrix(t(target.pm), nrow = ncol(target.pm) * ps.n)#one probeset per row tmpdim <- dimnames(target.pm) rm(target.pm);gc(verbose=FALSE) pm.medpolishres <- lapply(1:ncol(tmp.pm.ps),function(x){ cat("\r",x) tmpres <- rcModelMedianPolish(matrix(tmp.pm.ps[,x],ncol=ps.n))$Residuals ##medianPolish.matrix(matrix(tmp.pm.ps[,x],ncol=ps.n))$res c(tmpres, recursive=T) }) cat("\n");rm(tmp.pm.ps);gc(verbose=FALSE) pm.medpolishres <- unlist(pm.medpolishres, recursive = TRUE, use.names = FALSE) pm.medpolishres <- matrix(pm.medpolishres,nrow=length(tmpdim[[1]]),byrow=T) dimnames(pm.medpolishres) <- tmpdim pm.medpolishres } ################################################## ## Idea from: Cui X, Xu J, Asghar R, Condamine P, Svensson JT, Wanamaker S, Stein N, Roose M, Close TJ: Detecting single-feature polymorphisms using oligonucleotide arrays and robustified projection pursuit. Bioinformatics 2005, 21(20):3852-3858. ################################################## get_SFP_score <- function(medpolishres,cl=substring(colnames(medpolishres),1,1),ps.n=16){ cl <- as.numeric(as.factor(cl)) xscore <- c(sapply(seq(1,nrow(medpolishres),ps.n),function(intv){ cat("\r",(intv-1)/ps.n) tmpres <- medpolishres[intv:(intv+ps.n-1),] x <- abs(rowMeans(tmpres[,cl==1])-rowMeans(tmpres[,cl==2])) sapply(1:ps.n,function(n){ abs(x[n]-mean(x[-n]))/sd(x[-n]) }) }),recursive = TRUE) cat("\n") names(xscore) <- rownames(medpolishres) xscore } ################################################## ## Reference: West MA, van Leeuwen H, Kozik A, Kliebenstein DJ, Doerge RW, St Clair DA, Michelmore RW: High-density haplotyping with microarray-based expression and single feature polymorphism markers in Arabidopsis. Genome Res 2006, 16(6):787-795. ################################################## get_west_sfpdev <- function(target.pm,ps.n=16){ tmp.pm.ps <- matrix(t(target.pm), nrow = ncol(target.pm) * ps.n)#one probeset per row west.sfpdev <- apply(tmp.pm.ps,2,function(x){ x <- matrix(x,ncol=ps.n) sapply(1:ncol(x),function(y){ abs(x[,y]-rowMeans(x[,-y]))/x[,y] }) }) west.sfpdev <- matrix(west.sfpdev,nrow=nrow(target.pm),byrow=T) dimnames(west.sfpdev) <- dimnames(target.pm) west.sfpdev } do_limma <- function(pm.medpolishres,design,contrast.matrix,...){ require(limma) fit <- lmFit(pm.medpolishres,design) fit <- contrasts.fit(fit,contrast.matrix) eBayes(fit) } do_sam <- function(pm.medpolishres,cl=factor(substring(colnames(pm.medpolishres),1,1)),rand=123,B=500,...){ require(siggenes) sam(pm.medpolishres,cl,rand = rand,B=B,...) } get_ROC_data <- function(target.pm=NULL,ps.n=16,check.data,pm.medpolishres=NULL,one_side='less',return_res=FALSE,return_fit=FALSE){ require(limma) if(is.null(pm.medpolishres)){ cat("\rGet residuals of medpolish...");pm.medpolishres <- get_medpolish_res(target.pm,ps.n=ps.n); cat("Done.") } ##pm.medpolishres <- get_west_sfpdev(target.pm,ps.n=ps.n) psubset <- intersect(rownames(pm.medpolishres),check.data$Probename) check.data <- check.data[match(psubset,check.data$Probename,nomatch=0),] psubset.order <- match(psubset,rownames(pm.medpolishres),nomatch=0) cl <- factor(substring(colnames(pm.medpolishres),1,1)) design <- model.matrix(~0+cl);colnames(design) <- c('G1','G2');contrast.matrix <- makeContrasts(G2-G1,levels=design) cat("\nDo limma...");fit <- do_limma(pm.medpolishres,design,contrast.matrix); cat("Done.",col="\n") mysfp<- data.frame(check.data,pq.value=p.adjust(fit$p.value[,1],"fdr")[psubset.order],t=fit$t[,1][psubset.order],lfc=fit$coefficients[,1][psubset.order]) if (one_side=="less") mysfp$pq.value[mysfp$t>0] <- 1 else if (one_side=="more") mysfp$pq.value[mysfp$t<0] <- 1 mydata <- list(mysfp=mysfp) if(return_res) mydata[['medpolishres']] <- pm.medpolishres if(return_fit) mydata[['fit']] <- fit if(!(return_res|return_fit)) mysfp else mydata } draw_ROC <- function(mysfp,check.data,xlim=c(0,1),ylim=c(0,1),cols=1:length(mysfp),by=0.01,add=F,legend.draw=T,legend.auto=T,...){ if(!add) plot(NA,xlim=xlim,ylim=ylim,main="Receiver Operating Characteristic (ROC) Curves",xlab="False Positives",ylab="True Positives",...) proc <- lapply((1:length(mysfp)),function(x){ sfp <- mysfp[[x]] ps <- t(sapply(seq(0,1,by),function(qthr){ c(1-sum(sfp$pq.value>qthr&sfp$SFP==0)/sum(sfp$SFP==0),sum(sfp$pq.value<=qthr&sfp$SFP!=0)/sum(sfp$SFP!=0)) })) lines(ps,col=cols[x],...) ps }) if(!legend.draw) return(proc) if (legend.auto) legend((par("usr")[1]+par("usr")[2])*0.7,(par("usr")[3]+par("usr")[4])*0.9,legend=names(mysfp),fill=cols,col=cols,...) else legend(locator(1),legend=names(mysfp),fill=cols,col=cols,...) proc } get_fl_SNP <- function(SNPdata,fl_interval=0,fl_len=10){ SNPdata <- SNPdata[abs(SNPdata$Pos_forward-13)>=(13+fl_interval)&abs(SNPdata$Pos_forward-13)<(13+fl_interval+fl_len),] fl_SNP.info <- t(sapply(tapply(1:nrow(SNPdata),SNPdata$Probe_name,c),function(x){ snp.subset <- SNPdata[x,] minPPos <- NA ##minimus polymorphism site closed to the probe center item_in_probe_fl_region <- paste(sort(snp.subset$Pos_forward),collapse =',') snp_in_probe_fl_region <- NA if(sum(filter.snp <- snp.subset$MH63!=snp.subset$Zhen)!=0){ snp.item <- snp.subset[filter.snp,] snp.item <- snp.item[order(snp.item$Pos_forward),] snp_in_probe_fl_region <- paste(snp.item$Pos_forward,ifelse(snp.item$Nip=='N','X',ifelse(snp.item$Zhen==snp.item$Nip,'M','Z')),collapse =',',sep='') minPPos <- min(abs(snp.item$Pos_forward-13)) } c(item_in_probe_fl_region,snp_in_probe_fl_region,minPPos) })) tmp <- rownames(fl_SNP.info);rownames(fl_SNP.info) <- NULL data.frame(Probename=I(tmp),item_in_probe_fl_region=I(fl_SNP.info[,1]),snp_in_probe_fl_region=I(fl_SNP.info[,2]),minPPos=as.numeric(fl_SNP.info[,3])) } mylegend <- function (x, y = NULL, legend, fill = NULL, col = par("col"),box.border="black", lty, lwd, pch, angle = 45, density = NULL, bty = "o", bg = par("bg"), box.lwd = par("lwd"), box.lty = par("lty"), pt.bg = NA, cex = 1, pt.cex = cex, pt.lwd = lwd, xjust = 0, yjust = 1, x.intersp = 1, y.intersp = 1, adj = c(0, 0.5), text.width = NULL, text.col = par("col"), merge = do.lines && has.pch, trace = FALSE, plot = TRUE, ncol = 1, horiz = FALSE, title = NULL, inset = 0) { if (missing(legend) && !missing(y) && (is.character(y) || is.expression(y))) { legend <- y y <- NULL } mfill <- !missing(fill) || !missing(density) title <- as.graphicsAnnot(title) if (length(title) > 1) stop("invalid title") legend <- as.graphicsAnnot(legend) n.leg <- if (is.call(legend)) 1 else length(legend) if (n.leg == 0) stop("'legend' is of length 0") auto <- if (is.character(x)) match.arg(x, c("bottomright", "bottom", "bottomleft", "left", "topleft", "top", "topright", "right", "center")) else NA if (is.na(auto)) { xy <- xy.coords(x, y) x <- xy$x y <- xy$y nx <- length(x) if (nx < 1 || nx > 2) stop("invalid coordinate lengths") } else nx <- 0 xlog <- par("xlog") ylog <- par("ylog") rect2 <- function(left, top, dx, dy, density = NULL, angle, ...) { r <- left + dx if (xlog) { left <- 10^left r <- 10^r } b <- top - dy if (ylog) { top <- 10^top b <- 10^b } rect(left, top, r, b, angle = angle, density = density, ...) } segments2 <- function(x1, y1, dx, dy, ...) { x2 <- x1 + dx if (xlog) { x1 <- 10^x1 x2 <- 10^x2 } y2 <- y1 + dy if (ylog) { y1 <- 10^y1 y2 <- 10^y2 } segments(x1, y1, x2, y2, ...) } points2 <- function(x, y, ...) { if (xlog) x <- 10^x if (ylog) y <- 10^y points(x, y, ...) } text2 <- function(x, y, ...) { if (xlog) x <- 10^x if (ylog) y <- 10^y text(x, y, ...) } if (trace) catn <- function(...) do.call("cat", c(lapply(list(...), formatC), list("\n"))) cin <- par("cin") Cex <- cex * par("cex") if (is.null(text.width)) text.width <- max(abs(strwidth(legend, units = "user", cex = cex))) else if (!is.numeric(text.width) || text.width < 0) stop("'text.width' must be numeric, >= 0") xc <- Cex * xinch(cin[1], warn.log = FALSE) yc <- Cex * yinch(cin[2], warn.log = FALSE) if (xc < 0) text.width <- -text.width xchar <- xc xextra <- 0 yextra <- yc * (y.intersp - 1) ymax <- yc * max(1, strheight(legend, units = "user", cex = cex)/yc) ychar <- yextra + ymax if (trace) catn(" xchar=", xchar, "; (yextra,ychar)=", c(yextra, ychar)) if (mfill) { xbox <- xc * 0.8 ybox <- yc * 0.5 dx.fill <- xbox } do.lines <- (!missing(lty) && (is.character(lty) || any(lty > 0))) || !missing(lwd) n.legpercol <- if (horiz) { if (ncol != 1) warning("horizontal specification overrides: Number of columns := ", n.leg) ncol <- n.leg 1 } else ceiling(n.leg/ncol) if (has.pch <- !missing(pch) && length(pch) > 0) { if (is.character(pch) && !is.na(pch[1]) && nchar(pch[1], type = "c") > 1) { if (length(pch) > 1) warning("not using pch[2..] since pch[1] has multiple chars") np <- nchar(pch[1], type = "c") pch <- substr(rep.int(pch[1], np), 1:np, 1:np) } if (!merge) dx.pch <- x.intersp/2 * xchar } x.off <- if (merge) -0.7 else 0 if (is.na(auto)) { if (xlog) x <- log10(x) if (ylog) y <- log10(y) } if (nx == 2) { x <- sort(x) y <- sort(y) left <- x[1] top <- y[2] w <- diff(x) h <- diff(y) w0 <- w/ncol x <- mean(x) y <- mean(y) if (missing(xjust)) xjust <- 0.5 if (missing(yjust)) yjust <- 0.5 } else { h <- (n.legpercol + (!is.null(title))) * ychar + yc w0 <- text.width + (x.intersp + 1) * xchar if (mfill) w0 <- w0 + dx.fill if (has.pch && !merge) w0 <- w0 + dx.pch if (do.lines) w0 <- w0 + (2 + x.off) * xchar w <- ncol * w0 + 0.5 * xchar if (!is.null(title) && (tw <- strwidth(title, units = "user", cex = cex) + 0.5 * xchar) > w) { xextra <- (tw - w)/2 w <- tw } if (is.na(auto)) { left <- x - xjust * w top <- y + (1 - yjust) * h } else { usr <- par("usr") inset <- rep(inset, length.out = 2) insetx <- inset[1] * (usr[2] - usr[1]) left <- switch(auto, bottomright = , topright = , right = usr[2] - w - insetx, bottomleft = , left = , topleft = usr[1] + insetx, bottom = , top = , center = (usr[1] + usr[2] - w)/2) insety <- inset[2] * (usr[4] - usr[3]) top <- switch(auto, bottomright = , bottom = , bottomleft = usr[3] + h + insety, topleft = , top = , topright = usr[4] - insety, left = , right = , center = (usr[3] + usr[4] + h)/2) } } if (plot && bty != "n") { if (trace) catn(" rect2(", left, ",", top, ", w=", w, ", h=", h, ", ...)", sep = "") rect2(left, top, dx = w, dy = h, col = bg, density = NULL, lwd = box.lwd, lty = box.lty) } xt <- left + xchar + xextra + (w0 * rep.int(0:(ncol - 1), rep.int(n.legpercol, ncol)))[1:n.leg] yt <- top - 0.5 * yextra - ymax - (rep.int(1:n.legpercol, ncol)[1:n.leg] - 1 + (!is.null(title))) * ychar if (mfill) { if (plot) { fill <- rep(fill, length.out = n.leg) rect2(left = xt, top = yt + ybox/2, dx = xbox, dy = ybox, col = fill, density = density, angle = angle, border = box.border) } xt <- xt + dx.fill } if (plot && (has.pch || do.lines)) col <- rep(col, length.out = n.leg) if (missing(lwd)) lwd <- par("lwd") if (do.lines) { seg.len <- 2 if (missing(lty)) lty <- 1 lty <- rep(lty, length.out = n.leg) lwd <- rep(lwd, length.out = n.leg) ok.l <- !is.na(lty) & (is.character(lty) | lty > 0) if (trace) catn(" segments2(", xt[ok.l] + x.off * xchar, ",", yt[ok.l], ", dx=", seg.len * xchar, ", dy=0, ...)") if (plot) segments2(xt[ok.l] + x.off * xchar, yt[ok.l], dx = seg.len * xchar, dy = 0, lty = lty[ok.l], lwd = lwd[ok.l], col = col[ok.l]) xt <- xt + (seg.len + x.off) * xchar } if (has.pch) { pch <- rep(pch, length.out = n.leg) pt.bg <- rep(pt.bg, length.out = n.leg) pt.cex <- rep(pt.cex, length.out = n.leg) pt.lwd <- rep(pt.lwd, length.out = n.leg) ok <- !is.na(pch) & (is.character(pch) | pch >= 0) x1 <- (if (merge) xt - (seg.len/2) * xchar else xt)[ok] y1 <- yt[ok] if (trace) catn(" points2(", x1, ",", y1, ", pch=", pch[ok], ", ...)") if (plot) points2(x1, y1, pch = pch[ok], col = col[ok], cex = pt.cex[ok], bg = pt.bg[ok], lwd = pt.lwd[ok]) if (!merge) xt <- xt + dx.pch } xt <- xt + x.intersp * xchar if (plot) { if (!is.null(title)) text2(left + w/2, top - ymax, labels = title, adj = c(0.5, 0), cex = cex, col = text.col) text2(xt, yt, labels = legend, adj = adj, cex = cex, col = text.col) } invisible(list(rect = list(w = w, h = h, left = left, top = top), text = list(x = xt, y = yt))) } myplot.histogram <- function (x, freq = equidist,percent=FALSE, density = NULL, angle = 45, col = NULL, border = par("fg"), lty = NULL, main = paste("Histogram of", paste(x$xname, collapse = "\n")), sub = NULL, xlab = x$xname, ylab, xlim = range(x$breaks), ylim = NULL, axes = TRUE, labels = FALSE, add = FALSE, ...) { equidist <- if (is.logical(x$equidist)) x$equidist else { h <- diff(x$breaks) diff(range(h)) < 1e-07 * mean(h) } if (freq && !equidist) warning("the AREAS in the plot are wrong -- rather use freq=FALSE") y <- if (freq){ if (percent) 100 * x$counts/sum(x$counts) else x$counts } else { y <- x$density if (is.null(y)) x$intensities else y } nB <- length(x$breaks) if (is.null(y) || 0 == nB) stop("'x' is wrongly structured") if (!add) { if (is.null(ylim)) ylim <- range(y, 0) if (missing(ylab)) ylab <- if (percent) "Percent" else if (!freq) "Density" else "Frequency" plot.new() plot.window(xlim, ylim, "") title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...) if (axes) { axis(1, ...) axis(2, ...) } } rect(x$breaks[-nB], 0, x$breaks[-1], y, col = col, border = border, angle = angle, density = density, lty = lty) if ((logl <- is.logical(labels) && labels) || is.character(labels)) text(x$mids, y, labels = if (logl) { if (freq) x$counts else round(x$density, 3) } else labels, adj = c(0.5, -0.5)) invisible() }