library(affy) library(limma) library(siggenes) library(genefilter) setwd("/usr/local/data/supplements/sfp/data/yeast") source("../SFP_function.R") load("yeast_pmIndex.RData") set.seed(123) ################################################################ library(limma) filenames <- list.celfiles("CEL",full=T) all.pm <- pm(normalize.AffyBatch.quantiles(bg.correct.rma(ReadAffy(filenames=filenames)))) colnames(all.pm) <- sub("(.*/)?(.*?)(.CEL)(.gz)?","\\2",colnames(all.pm)) rownames(all.pm) <- pmIndex$probename save(all.pm,file="all.pm.RData",compress=T) ################################################################ table(table(pmIndex$probeset)) ## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 ## 28 27 18 23 25 25 21 1093 20 24 31 22 32 26 25 7835 ## 20 21 49 50 51 ## 46 5 1 4 4 str(probeset16 <- names(which(table(pmIndex$probeset)==16))) yeast.pm <- log(all.pm[match(pmIndex$probeset,probeset16,nomatch=0)!=0,],2) rm(all.pm) ##save(yeast.pm,file="yeast.pm.RData",compress=T) load("yeast.pm.RData") system.time(pm.medpolishres <- get_medpolish_res(yeast.pm,ps.n=16)) ## user system elapsed ## 4.470 0.310 4.784 (seconds) ##save(pm.medpolishres,file="pm.medpolishres.RData",compress=T) (cl <- factor(substring(colnames(pm.medpolishres),1,1))) design <- model.matrix(~0+cl);colnames(design) <- sub('cl','',colnames(design));contrast.matrix <- makeContrasts(R-B,levels=design) fit <- do_limma(pm.medpolishres,design,contrast.matrix) ##save(fit,file="fit.RData") summary(fit.data <- decideTests(fit,p=0.001));table(filter <- fit.data!=0) ##save(fit.data,file="fit.data.RData") ##fixnum: to get the same number of SFPs (fixnum <- sum(fit.data!=0)) str(mysfp <- data.frame(Probename=I(rownames(fit.data)),p.value=fit$p.value[,1],pq.value=p.adjust(fit$p.value[,1],"fdr"),t=fit$t[,1],lfc=fit$coe[,1])[filter,]) ##write.table(mysfp,file="yeast_sfp.xls",row.name=F,quote=F,sep="\t") ###############To test Rostocks' method#################################################x load("yeast.pm.RData") library(limma) tyeast.pm <- matrix(t(yeast.pm), nrow = ncol(yeast.pm) * 16, ncol = nrow(yeast.pm) / 16) colnames(tyeast.pm) <- probeset16 fprobe <- factor(rep(paste("p",1:16,sep=""),each = ncol(yeast.pm))) ftissue <- factor(rep(substr(colnames(yeast.pm),3,3), time=16)) fgenotype <- factor(rep(substr(colnames(yeast.pm),1,1), 16)) tmpdim <- dimnames(yeast.pm) ## Codes from Rostoks # fit everything except the SFP probe:genotype interaction ## set up full model as a test i <- 1 #now record and remove the main effect of gene,tissue,genotype save residuals res.mat <- matrix(nrow = nrow(tyeast.pm), ncol = ncol(tyeast.pm)) t1 <- proc.time()[3] for (i in 1:ncol(tyeast.pm)){ # fit everything except the SFP probe:genotype interaction fit <- summary(lm(tyeast.pm[,i] ~ fprobe + ftissue + fgenotype + ftissue:fgenotype)) res.mat[,i] <- fit$residuals cat("\r",i) } t2 <- proc.time()[3] (t2-t1)/60 ## 2.55 minutes res.mat <- matrix(res.mat, nrow=length(tmpdim[[1]]),ncol=length(tmpdim[[2]]),dimnames=tmpdim,byrow=T) rm(tmpdim) ##save(res.mat,file = "fit.residuals.RData", compres=T) (cl <- factor(substring(colnames(res.mat),1,1))) design <- model.matrix(~0+cl);colnames(design) <- sub('cl','',colnames(design));contrast.matrix <- makeContrasts(R-B,levels=design) fit <- do_limma(res.mat,design,contrast.matrix) ##save(fit,file="fit_lm.RData") summary(fit.data <- decideTests(fit,p=0.001)) str(my.pqvalue <- p.adjust(fit$p.value[,1],'fdr')) (thresh <-quantile(my.pqvalue, fixnum/length(my.pqvalue))) fit.data <- matrix(rep(0,length(my.pqvalue))) fit.data <- new("TestResults", array(0, dim(fit$t))) dimnames(fit.data) <- dimnames(fit$t) fit.data[my.pqvalue0] <- 1 fit.data[my.pqvalue0] nosfp.probes <- yeast.snp$Probename[yeast.snp$SFP<=0] str(tmp <- union(sfp.probes,nosfp.probes)) str(tmp <- tmp[!tmp%in%intersect(sfp.probes,nosfp.probes)]) table(snp.geno <- sfp.check <- fit.data[rownames(fit.data)%in%tmp,]) snp.geno[snp.geno!=0] <- 0 snp.geno[names(snp.geno)%in%sfp.probes] <- -1 print(y <- table(snp.geno,sfp.check)) print(summary(y)) ###residuals of median polish: RM > BY were ignored cat("Sensitivity", sum(y[1,1])/sum(y[1,]), "\n" ) cat("False Negative Rate", 1-sum(y[1,1])/sum(y[1,]), "\n" ) cat("False Positive Rate", sum(y[2,1])/sum(y[2,]), "\n") cat("False Sequence Polymorphism Rate", sum(y[2,1])/sum(y[,1]), "\n") ###residuals of median polish: RM > BY as FALSE discovery cat("Sensitivity", sum(y[1,-2])/sum(y[1,]), "\n" ) cat("False Negative Rate", 1-sum(y[1,-2])/sum(y[1,]), "\n" ) cat("False Positive Rate", sum(y[2,-2])/sum(y[2,]), "\n") cat("False Sequence Polymorphism Rate", sum(y[2,-2])/sum(y[,c(1,3)]), "\n") ################################################################ load("fit.data.RData") load("pm.medpolishres.RData") ########The distribution of average absolute differences of residuals between two yeast strains BY and RM################## x11(width = 7, height = 6.5, pointsize = 12);par(mfcol=c(1,1)) (cl <- factor(substring(colnames(pm.medpolishres),1,1))) res.diff <- abs(rowMeans(pm.medpolishres[,cl=='R'])-rowMeans(pm.medpolishres[,cl=='B'])) tmpb <- seq(0,max(res.diff),length.out=100) ##hist(pmin(res.diff[fit.data==-1],8),tmpb,freq=F,xlim=c(0,8), ylim=c(0,0.7),main="",xlab=expression(group("|",italic(bar(E)[RM]) - italic(bar(E)[BY]) , "|")),col=8,border=0,ylab='Percent') ##hist(pmin(res.diff[fit.data==1],8),tmpb,freq=F,add=T,border="brown") myplot.histogram(hist(pmin(res.diff[fit.data==-1],8),breaks = tmpb,plot=F),xlim=c(0,7),ylim=c(0,5),percent=T,main="",xlab=expression(group("|",italic(bar(E)[RM]) - italic(bar(E)[BY]) , "|")),col=8,border=0)##,ylab='Percent') myplot.histogram(hist(pmin(res.diff[fit.data==1],8),tmpb,plot=F),percent=T,add=T,border="brown") myusr <- par("usr");mylegend(myusr[1]+(myusr[2]-myusr[1])*0.6,myusr[3]+(myusr[4]-myusr[3])*0.9,legend=c(expression(italic(bar(E)[RM]) > italic(bar(E)[BY])),expression(italic(bar(E)[RM]) < italic(bar(E)[BY]))),fill=c(0,8),box.border=c("brown",0)) ##legend("topright",legend=c(expression(italic(bar(E)[RM]) > italic(bar(E)[BY])),expression(italic(bar(E)[RM]) < italic(bar(E)[BY]))),fill=c(0,8),inset = .15) ##dev.print(pdf,file="images/Figure2The Distribution of Average Absolute Differences_abs(RM-BY).pdf") dev.off() ################################################################ ## yeast.flsnp <- read.csv("flanksnp.rm_080221.csv",sep="\t",as.is=T); ## mean(yeast.flsnp$SNP01_10);mean(yeast.flsnp$SNP11_20) ## save(yeast.flsnp,file="yeast.flsnp080221.RData",compress=T) load("yeast.flsnp080221.RData") load("fit.RData") table(is.na(sfporder <- match(yeast.flsnp$Probe,rownames(fit$p.value)))) str(mydata <- data.frame(Probename=yeast.flsnp$Probename,flSNP=yeast.flsnp$SNP01_10,intflSNP=yeast.flsnp$SNP11_20,p.value=fit$p.value[sfporder,1],pq.value=p.adjust(fit$p.value[,1],"fdr")[sfporder],t=fit$t[sfporder,1], stringsAsFactors = F)) ##str(mydata <- mydata[mydata$t>0,]) sum(mydata$t>0);table(tmp <- mydata$flSNP[mydata$t>0]);mean(tmp);table(tmp <- mydata$intflSNP[mydata$t>0]);mean(tmp) filter <- mydata$pq.val<0.05&mydata$t>0;table(tmp <- mydata$flSNP[filter]>0);mean(tmp);table(tmp <- mydata$intflSNP[filter]>0);mean(tmp) ## FALSE TRUE ## 745 74 ## [1] 0.09035409 ## FALSE TRUE ## 775 44 ## [1] 0.05372405 ##table(filter <- mydata$pq.val<0.05&mydata$t>0&(mydata$flSNP>0|mydata$intflSNP>0)); ##write.csv(cbind(mydata[filter,-(2:3)],yeast.flsnp[match(mydata$Probename[filter],yeast.flsnp$Probename),-1]),file="yeast_flSNP_probes.csv", row.names = FALSE) filter <- mydata$pq.val<0.001&mydata$t>0;table(tmp <- mydata$flSNP[filter]>0);mean(tmp);table(tmp <- mydata$intflSNP[filter]>0);mean(tmp) ## FALSE TRUE ## 73 13 ## [1] 0.1511628 ## FALSE TRUE ## 85 1 ## [1] 0.01162791 ##save(mydata,file="mydata080221.RData",compress=T) ##load("mydata080221.RData") #########Figure 3 The fraction distribution of non-polymorphic probes with flanking SNPs in different statistical thresholds.################## par(mfcol=c(1,2)) brs <- quantile(mydata$pq.value[mydata$t>0],seq(0.0,1,0.002)) ##brs <- seq(0.0,1,0.001) tmp <- cbind(brs,sapply(brs,function(x){mean(mydata$flSNP[mydata$pq.value0]>0,na.rm=T)})) ##tmp <- tmp[is.finite(log(tmp[,2])),] ulnum <- max(tmp[,2],na.rm=T)+0.1;llnum <- min(tmp[,2],na.rm=T) ylim <- c(llnum,ulnum)##c(0,0.4) plot(tmp,type='o',ylim=ylim,ylab="Fraction of SFPs with flanking SNPs",xlab="BH adjusted p-value",main="Flanking 10 bp",cex=0.5) ##title("Fraction of SFPs with SNPs in flanking 10 bp", cex.main= 0.75) tmp <- cbind(brs,sapply(brs,function(x){mean(mydata$intflSNP[mydata$pq.value0]>0,na.rm=T)})) plot(tmp,type='o',ylim=ylim,ylab="Fraction of SFPs with flanking SNPs",xlab="BH adjusted p-value",main="Flanking 11-20 bp",cex=0.5) ##title("Fraction of SFPs with SNPs in flanking 11-20 bp", cex.main= 0.75) ##lines(tmp,type='o',ylim=ylim,ylab="Fraction of SFPs with flanking SNPs",xlab="BH adjusted p-value",main="Fraction of SFPs with SNPs in flanking 10-20 bp",cex=0.5,col=8) ##dev.print(pdf,file="images/yeast_flSNP_por.pdf",width=8, height=6) ################################################## ## Fisher exact test and Chi test ################################################## load("fit.data.RData") x<-table(mydata$flSNP[mydata$Probename%in%rownames(fit.data)[fit.data>0]]>0);p <- table(mydata$flSNP);p <- p/sum(p) cbind(p,x,x/sum(x));(rchi <- chisq.test(x, p = p)) cat("Fold: ",(x[2]/sum(x))/p[2],"\n") x<-table(mydata$intflSNP[mydata$Probename%in%rownames(fit.data)[fit.data>0]]>0);p <- table(mydata$intflSNP);p <- p/sum(p) cbind(p,x,x/sum(x));(rchi <- chisq.test(x, p = p)) cat("Fold: ",(x[2]/sum(x))/p[2],"\n") thread <- 0.001;x.t <- cbind(table(mydata$flSNP[mydata$pq.val>=thread&mydata$t>0]>0),table(mydata$flSNP[mydata$pq.val0]>0));dimnames(x.t) <- list(c('NoSNP','SNP'),paste(c('>','<'),thread,sep=''));x.t fisher.test(x.t,alter='great') x.t <- cbind(table(mydata$intflSNP[mydata$pq.val>=thread&mydata$t>0]>0),table(mydata$intflSNP[mydata$pq.val0]>0));dimnames(x.t) <- list(c('NoSNP','SNP'),paste(c('>','<'),thread,sep=''));x.t fisher.test(x.t,alter='great') ################################################## ## Does the free energy influence probe-target hibridyzation? ################################################## ###Select sequence pairs with probe flanking SNPs ##Condition 1. SNPs in probe flanking 1-10 bases nrow(flank.snp.subset <- yeast.flsnp[yeast.flsnp$SNP01_10>0&yeast.flsnp$SNPpos!='0'&nchar(yeast.flsnp$seg)==151,]) ##Condition 2. Monomorphic in probe region nrow(flank.snp.subset <- flank.snp.subset[substr(flank.snp.subset$seg_seq,26,50)==substr(flank.snp.subset$seq01_10,11,35)&substr(flank.snp.subset$seg_seq,26,50)==substr(flank.snp.subset$seg_seq,102,126),]) ##Condition 3. Just SNP, IN/DEL may influence the caculation of the changes of free energy, for the length of sequence paire is not the same nrow(flank.snp.subset <- flank.snp.subset[-grep('[^ATGC@]',flank.snp.subset$seg),]) ##save(flank.snp.subset,file="flank.snp.subset.RData",compress=T) ########Mfold package######cRNA############## library(Biostrings) file.BY <- "Flanking_75bp_BY_cRNA.fas";file.RM <- "Flanking_75bp_RM_cRNA.fas" write.table(paste('>',flank.snp.subset$Probename,"\n",sapply(substr(flank.snp.subset$seg_seq,0,75),function(x){as.character(reverseComplement(DNAString(x)))}),sep=''),file=file.BY,row.name=F,col.name=F,sep="\t",quote=F);write.table(paste('>',flank.snp.subset$Probename,"\n",sapply(substr(flank.snp.subset$seg_seq,77,1000),function(x){as.character(reverseComplement(DNAString(x)))}),sep=''),file=file.RM,row.name=F,col.name=F,sep="\t",quote=F) system(paste("hybrid-ss-min -E -n RNA -t 45 -T 45 ",file.BY,'|wc -l',sep=""));system(paste("hybrid-ss-min -E -n RNA -t 45 -T 45 ",file.RM,'|wc -l',sep="")) dG.BY.cRNA <- read.table(paste(file.BY,".dG",sep=''),as.is=T,header=F)[,2];dG.RM.cRNA <- read.table(paste(file.RM,".dG",sep=''),as.is=T,header=F)[,2];ddG.cRNA <- dG.RM.cRNA-dG.BY.cRNA; table(ddG.cRNA[ddG.cRNA!=0]<0);mean(ddG.cRNA[ddG.cRNA!=0]<0) ########Construct data frame################## load("mydata080221.RData") table(is.na(sfporder <- match(flank.snp.subset$Probe,mydata$Probename))) free_energy <- cbind(mydata[sfporder,],BY.cRNA.dG=dG.BY.cRNA,RM.cRNA.dG=dG.RM.cRNA,ddG.cRNA=ddG.cRNA) free_energy <- na.omit(free_energy) ##save(free_energy,file="free_energy.RData",compress=T) load("free_energy.RData") table(tmp <- free_energy$ddG.cRNA!=0);mean(tmp) table(filter <- free_energy$pq.value<0.05&free_energy$t!=0&free_energy$ddG.cRNA!=0) table(free_energy$ddG.cRNA[filter]<0);mean(free_energy$ddG.cRNA[filter]<0) ###############Additional file 2: The distribution of Delta_G_(RM- BY) indicated the influence of minimum free energy of RNA towards the binding affinity############## x11(width = 7, height = 7, pointsize = 12) par(mfcol=c(2,1),mar=c(4.5,4,2,2),xpd=F,font.main=1,cex.main=1,font.lab=1,cex.lab=1) tmpbreaks <- hist(free_energy$ddG.cRNA,100,plot=F);myplot.histogram(tmpbreaks,xlab=expression(italic(paste(Delta,'G'))(kJ)),percent=T,col=8,border=0,ylim=c(0,3.5),main=expression(paste("The distribution of ",italic(paste(Delta,'G'[RM - BY],sep='')),sep='')));abline(v=c(1,-1)*quantile(abs(free_energy$ddG.cRNA),0.5),col=8,lty=2) myplot.histogram(hist(free_energy$ddG.cRNA[free_energy$t>0&free_energy$pq.value<0.5],tmpbreaks$breaks,plot=F),percent=T,add=T,border=2) myusr <- par("usr");legend(myusr[1]+(myusr[2]-myusr[1])*0.7,myusr[3]+(myusr[4]-myusr[3])*0.9,legend=c('All',expression(italic(bar(E)[RM]) > italic(bar(E)[BY]))),fill=c(8,2)) myusr <- par("usr");text(myusr[1]-(myusr[2]-myusr[1])*0.1,myusr[3]+(myusr[4]-myusr[3])*1.05, labels = '(a)',cex=1,xpd=T) tmpbreaks <- hist(free_energy$ddG.cRNA,100,plot=F);myplot.histogram(tmpbreaks,xlab=expression(italic(paste(Delta,'G'))(kJ)),percent=T,col=8,border=0,ylim=c(0,3.5),main='');abline(v=c(1,-1)*quantile(abs(free_energy$ddG.cRNA),0.5),col=8,lty=2) myplot.histogram(hist(free_energy$ddG.cRNA[free_energy$t<0&free_energy$pq.value<0.5],tmpbreaks$breaks,plot=F),percent=T,add=T,border=3) myusr <- par("usr");legend(myusr[1]+(myusr[2]-myusr[1])*0.7,myusr[3]+(myusr[4]-myusr[3])*0.9,legend=c('All',expression(italic(bar(E)[RM]) < italic(bar(E)[BY]))),fill=c(8,3)) myusr <- par("usr");text(myusr[1]-(myusr[2]-myusr[1])*0.1,myusr[3]+(myusr[4]-myusr[3])*1.05, labels = '(b)',cex=1,xpd=T) ##dev.print(pdf,file="images/SFigure2_dist_free_energy.pdf") dev.off() ##nrow(tmp.free_energy <- free_energy[free_energy$ddG.cRNA!=0,]) ##################Figure 5: The fraction distribution of the free energy changes in non-polymorphic probes with flanking SNPs under different statistical threholds############### x11(width = 7, height = 7, pointsize = 12) par(mfrow=c(1,1),xpd=F,font.main=1,cex.main=1,font.lab=1,cex.lab=1) #####Select threshold to filter not-changed or less-changed sequence pairs (fth <- quantile(abs(free_energy$ddG.cRNA),0.5))##sd(free_energy$ddG.cRNA) brs <- quantile(free_energy$pq.value,seq(0.02,1,0.005))##sort(c(10^-seq(10,8,-0.5),quantile(mydata$pq.value,seq(0,1,0.01),na.rm=T))) ##seq(0,1,0.02)brs <- seq(0,1,0.001) tmp <- cbind(brs,sapply(brs,function(x){mean(free_energy$ddG.cRNA[free_energy$pq.val0&abs(free_energy$ddG.cRNA)>fth]>0,na.rm=T)})) ulnum <- max(tmp[,2],na.rm=T)+0.1;llnum <- min(tmp[,2],na.rm=T) plot(tmp,type='o',ylim=c(0.2,0.8),ylab=expression(paste("Fraction of sequence pairs with ",italic(paste(Delta,'G'[RM - BY],sep=''))>0,sep="")),xlab="BH adjusted p-value",main="",cex=0.5,col="brown")##Sequence pairs of which FLSNPs increased their minimum RNA free energy" tmp <- cbind(brs,sapply(brs,function(x){mean(free_energy$ddG.cRNA[free_energy$pq.valfth]>0,na.rm=T)})) tmp <- tmp[is.finite(log(tmp[,2])),] lines(tmp,type='o',col='black',cex=0.5) abline(h=0.5,col=8,lty=2) myusr <- par("usr");legend(myusr[1]+(myusr[2]-myusr[1])*0.6,myusr[3]+(myusr[4]-myusr[3])*0.9,legend=c(expression(italic(bar(E)[RM]) > italic(bar(E)[BY])),expression(italic(bar(E)[RM]) < italic(bar(E)[BY]))),fill=c('brown','black')) ##dev.print(pdf,file="images/Figure5FLSFPs_free_energy.pdf") dev.off() ################ ##Fisher exact test str(tmp.free_energy <- free_energy[abs(free_energy$ddG.cRNA)>fth,]) thread <- 0.05 ##########Group: RM > BY filter.t <- tmp.free_energy$t>0 table(filter.energy <- tmp.free_energy$ddG.cRNA[filter.t]>0)###free energy RM > BY table(filter.thread <- tmp.free_energy$pq.value[filter.t]','<'),thread,sep='');rownames(x.t) <- c('D<0','D>0');x.t fisher.test(x.t,alter='great') ## filter.thread ## filter.energy >0.05 <0.05 ## D<0 472 11 ## D>0 541 29 ## Fisher's Exact Test for Count Data ## data: x.t ## p-value = 0.01221 ## alternative hypothesis: true odds ratio is greater than 1 ## 95 percent confidence interval: ## 1.220885 Inf ## sample estimates: ## odds ratio ## 2.298326 ##########Group: RM < BY filter.t <- tmp.free_energy$t<0 table(filter.energy <- tmp.free_energy$ddG.cRNA[filter.t]>0)###free energy RM > BY table(filter.thread <- tmp.free_energy$pq.value[filter.t]','<'),thread,sep='');rownames(x.t) <- c('D<0','D>0');x.t fisher.test(x.t,alter='less') ## filter.thread ## filter.energy >0.05 <0.05 ## D<0 351 5 ## D>0 308 2 ## Fisher's Exact Test for Count Data ## data: x.t ## p-value = 0.2859 ## alternative hypothesis: true odds ratio is less than 1 ## 95 percent confidence interval: ## 0.000000 2.219191 ## sample estimates: ## odds ratio ## 0.4563597 ###Chi test filter.t <- tmp.free_energy$t>0;p <- table(tmp.free_energy$ddG.cRNA[filter.t]>0);p <- p/sum(p);x<-table(tmp.free_energy$ddG.cRNA[tmp.free_energy$pq.value0);cbind(p,x,x/sum(x)) (rchi <- chisq.test(x, p = p)) ##X-squared = 5.4358, df = 1, p-value = 0.01973 filter.t <- tmp.free_energy$t<0;p <- table(tmp.free_energy$ddG.cRNA[filter.t]>0);p <- p/sum(p);x<-table(tmp.free_energy$ddG.cRNA[tmp.free_energy$pq.value0);cbind(p,x,x/sum(x)) (rchi <- chisq.test(x, p = p)) ##X-squared = 0.909, df = 1, p-value = 0.3404 ################################################## ## nucleotide composition ################################################## load("mydata080221.RData") load("flank.snp.subset.RData") str(tmp <- flank.snp.subset$Probename[flank.snp.subset$basepaire!='0']) ##Condition: probes with unique SNP in flanking 1-10 bases region str(tmp <- intersect(flank.snp.subset$Probename[-grep('\\|[0-9\\-]',flank.snp.subset$SNPpos)],tmp)) str(flSNP.basepaire <- data.frame(mydata[match(tmp,mydata$Probename),],flank.snp.subset[match(tmp,flank.snp.subset$Probename),c('basepaire','rmbase')])) str(flSNP.basepaire <- na.omit(flSNP.basepaire)) ##(tmp <- table(flSNP.basepaire$base)) ##a <- table(flSNP.basepaire$base[flSNP.basepaire$p.val<0.01&flSNP.basepaire$t>0]);a <- a[match(names(tmp),names(a))];names(a) <- names(tmp);a*100/tmp;a <- table(flSNP.basepaire$base[flSNP.basepaire$p.val<0.01&flSNP.basepaire$t<0]);a <- a[match(names(tmp),names(a))];names(a) <- names(tmp);a*100/tmp #############Figure 4: The fraction distribution of nucleotide content of SNPs flanking non-polymorphic probes under different statisitcal thresholds.############## x11(width = 7, height = 5.8, pointsize = 12) par(mfrow=c(2,2),xpd=F,mar=c(4.1,4.1,2.1,2.1),font.main=1,cex.main=1) brs <- quantile(flSNP.basepaire$pq.value,seq(0,1,0.01))##sort(c(10^-seq(10,8,-0.5),quantile(mydata$pq.value,seq(0,1,0.01),na.rm=T))) nuc.f <- function(ppair='',tlab='',ylab="Fraction",xlab="BH adjusted p-value",main=ppair){ tmp <- cbind(brs,sapply(brs,function(x){mean(flSNP.basepaire$rmbase[flSNP.basepaire$pq.val0]==ppair,na.rm=T)})) ulnum <- max(tmp[,2],na.rm=T)+0.1;llnum <- min(tmp[,2],na.rm=T) plot(tmp,type='o',ylim=c(0.0,0.5),ylab=ylab,xlab=xlab,main=main,cex=0.5,col="brown") tmp <- cbind(brs,sapply(brs,function(x){mean(flSNP.basepaire$rmbase[flSNP.basepaire$pq.val italic(bar(E)[BY])),expression(italic(bar(E)[RM]) < italic(bar(E)[BY]))),fill=c(1,"brown")) dev.print(pdf,file="images/Figure4FLSFPs_base_dist.pdf")##,width = 7, height = 5.8) dev.off() ################ ##Fisher exact test thread <- 0.05;x.t <- cbind(table(flSNP.basepaire$rmbase[flSNP.basepaire$pq.val>=thread&flSNP.basepaire$t!=0]),table(flSNP.basepaire$rmbase[flSNP.basepaire$pq.val','<'),thread,sep='');x.t ## >0.05 <0.05 ## A 791 29 ## C 804 26 ## G 784 20 ## T 821 21 filter.t <- flSNP.basepaire$t<0;p <- table(flSNP.basepaire$rmbase[filter.t]);p <- p/sum(p);x<-sapply(names(p),function(y){sum(flSNP.basepaire$rmbase[flSNP.basepaire$pq.val0;p <- table(flSNP.basepaire$rmbase[filter.t]);p <- p/sum(p);x<-sapply(names(p),function(y){sum(flSNP.basepaire$rmbase[flSNP.basepaire$pq.val0] <- 1 fit.data[fit.pqvalue