################################################## ## Author: Weibo Xie ## Date: Tue Aug 7 21:50:31 2007 ## ## ## Notes: Part of codes modified from Barley SFP Discovery Experiments by Nils Rostoks et al ## ################################################## setwd("/usr/local/data/supplements/sfp/data/rice") source("../SFP_function.R") library(affy) filenames <- sort(list.celfiles("CEL/",pattern='[MZ][345]',full.names=TRUE)) ################################################## ## How many probe sequences stored in Rice_probe_fasta file? ################################################## ## sed -n '/>/p' /usr/local/data/affy_data/affy_seq/Rice_probe_fasta|wc -l ## 631066 ################################################## ## To locate the probes to pseudo-chromosome ################################################## ## nohup blat /usr/local/data/ftp/pub/bio-data/rice/annotation_dbs/pseudomolecules/version_5.0/all.con /usr/local/data/affy_data/affy_seq/Rice_probe_fasta -t=dna -q=dna -minIdentity=100 -minMatch=1 stepSize=5 ~/data/blast_results/Affy_rice_probes_vs_TIGR_V5.blatresults ################################################## ## To make pm probes index ################################################## tmp <- ReadAffy(filenames=filenames[1]) pmIndex <- pmindex(tmp) str(probenames <- rep(names(pmIndex), unlist(lapply(pmIndex, length)))) pmIndex <- unlist(pmIndex) str(pmIndex2xy <- indices2xy(pmIndex,abatch=tmp)) head(pmIndex <- data.frame(probename=paste(probenames,pmIndex2xy[,1],pmIndex2xy[,2],sep='--'),probeset=as.factor(probenames),pmIndex2xy,pmIndex)) save(pmIndex,file="pmIndex.RData",compress=T) ################################################## ## To get expressed probesets ################################################## load("all.mas5call.RData") load("pmIndex.RData") str(probeset11 <- grep("^Os", names(which(table(pmIndex$probeset)==11)),value=T)) tmp <- sapply(tapply(1:ncol(all.mas5call), substr(colnames(all.mas5call),1,2),c),function(x){ rowSums(all.mas5call[,x]=='P') }) library(limma) str(fit.data <- tmp[,1:3]>3&tmp[,4:6]>3) str(fit.data <- fit.data[rownames(fit.data)%in%probeset11,]) colnames(fit.data) <- c('SBP','PSP','PCF')##paste('Stage',3:5) vennDiagram(fit.data,include="both",main='Expressed Gene')##'Changed Gene')##up-regulated, down-regulated dev.print(pdf,file='images/Expressed_Gene_venn.pdf',width=12, height=12) str(EXPprobesets <- grep('^O',rownames(fit.data)[rowSums(fit.data)>0],value=T)) save(EXPprobesets,file='EXPprobesets.RData',compress=T) ################################################## ## To do quality control ################################################## all.rma <- exprs(justRMA(filenames=filenames)) colnames(all.rma) <- sub('.CEL(.gz)?','',colnames(all.rma)) tmp<-all.rma[,grep('[MZ][345]',colnames(all.rma))] dd <- as.dist((1 - cor(tmp))/2);plot(hclust(dd),xlab="Chips") dev.print(pdf,file="images/hclust_by_cor_rma.pdf") save(all.rma,file="all.rma.RData",compress=T) rm(tmp,all.rma) ################################################## ## To get raw intensities ################################################## library(affy) all.pm.raw <- read.probematrix(filenames=filenames,which="pm")$pm load("pmIndex.RData") colnames(all.pm.raw) <- sub(".*/(.*?).CEL.gz","\\1",colnames(all.pm.raw)) rownames(all.pm.raw) <- pmIndex$probename save(all.pm.raw,file="all.pm.raw.RData",compress=T) dd <- as.dist((1 - cor(all.pm.raw))/2);plot(hclust(dd),main="Raw PM Intensity",xlab="Chips") dev.print(pdf,file="images/hclust_by_cor_raw_pm_int.pdf") ################################################## ## To do background correction ################################################## for(i in 1:ncol(all.pm.raw)) all.pm.raw[,i] <- bg.adjust(all.pm.raw[,i]) all.pm.bgcr <- all.pm.raw save(all.pm.bgcr,file="all.pm.bgcr.RData",compress=T) rm(all.pm.raw) gc() dd <- as.dist((1 - cor(all.pm.bgcr))/2);plot(hclust(dd),main="Bgcr PM Intensity",xlab="Chips") dev.print(pdf,file="images/hclust_by_cor_bgcrrma_pm_int.pdf") ################################################## ## To do quantile normalization ################################################## ##load("all.pm.bgcr.RData") all.pm <- normalize.quantiles(all.pm.bgcr,copy=F) dimnames(all.pm) <- dimnames(all.pm.bgcr) save(all.pm,file="all.pm.RData",compress=T) rm(all.pm.bgcr) dd <- as.dist((1 - cor(all.pm))/2);plot(hclust(dd),main="Normalized PM Intensity",xlab="Chips") dev.print(pdf,file="images/hclust_by_cor_normrma_pm_int.pdf") ################################################## ## To make check data from OryzaSNP psuedo sequences ################################################## ## http://www.oryzasnp.org/ str(nosfp.probes <- grep('^O',scan(pipe("awk '{print $1}' Probes_MH63_vs_ZS97_PM_OryzaSNP.xls"),''),value=T)) load("uniprobes.RData") str(nosfp.probes <- intersect(nosfp.probes,uniprobes)) save(nosfp.probes,file="nosfp.probes.RData",compress=T) str(flnosfp.probes <- grep('^O',scan(pipe("awk '{print $1}' Probes_Flanking_20bp_Sequences_MH63_vs_ZS97_PM_OryzaSNP.xls"),''),value=T)) str(flnosfp.probes <- flnosfp.probes[flnosfp.probes%in%intersect(nosfp.probes,uniprobes)]) save(flnosfp.probes,file="flnosfp.probes.RData",compress=T) ################################################## ## To make check data from OryzaSNP SNP data ################################################## ## http://www.oryzasnp.org/ str(oryzaSNP.raw <- read.table("Probes_MH63_vs_ZS97_SNP_table_OryzaSNP.xls",as.is=T,header=T)) save(oryzaSNP.raw,file="oryzaSNP.raw.RData",compress=T) str(sfp.probes.mh <- oryzaSNP.raw$Probe_name[oryzaSNP.raw$Pos_forward>0&oryzaSNP.raw$Pos_forward<26&oryzaSNP.raw$Zh==oryzaSNP.raw$Nip&oryzaSNP.raw$Nip!='N'&oryzaSNP.raw$MH!=oryzaSNP.raw$Zh]) str(sfp.probes.zs <- oryzaSNP.raw$Probe_name[oryzaSNP.raw$Pos_forward>0&oryzaSNP.raw$Pos_forward<26&oryzaSNP.raw$MH==oryzaSNP.raw$Nip&oryzaSNP.raw$Nip!='N'&oryzaSNP.raw$MH!=oryzaSNP.raw$Zh]) save(sfp.probes.mh,sfp.probes.zs,file="sfp.probes.RData",coompress=T)