需要把IUPAC字符串形式的motif转换成position count matrix
iupac2pcm<-function(iupac){
IUPAC <- list(A=c(100, 0, 0, 0),
C=c(0, 100, 0, 0),
G=c(0, 0, 100, 0),
T=c(0, 0, 0, 100),
U=c(0, 0, 0, 100),
R=c(50, 0, 50, 0),
Y=c(0, 50, 0, 50),
M=c(50, 50, 0, 0),
K=c(0, 0, 50, 50),
W=c(50, 0, 0, 50),
S=c(0, 50, 50, 0),
B=c(0, 33, 33, 33),
D=c(33, 0, 33, 33),
H=c(33, 33, 0, 33),
V=c(33, 33, 33, 0),
N=c(25, 25, 25, 25))
iupac <- toupper(iupac)
if(grepl("[^ACGTURYMKWSBDHVN]", iupac)) stop("please check the inputs")
iupac <- unlist(strsplit(iupac, ""))
re <- matrix(nrow=4, ncol=length(iupac))
rownames(re) <- c("A","C","G","T")
for(i in 1:length(iupac))
re[, i] <- IUPAC[[iupac[i]]]
re
}
iupac2pcm("WWWTATTATTTW")
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
## A 50 50 50 0 100 0 0 100 0 0 0 50
## C 0 0 0 0 0 0 0 0 0 0 0 0
## G 0 0 0 0 0 0 0 0 0 0 0 0
## T 50 50 50 100 0 100 100 0 100 100 100 50