IUPAC strings to pcm

需要把IUPAC字符串形式的motif转换成position count matrix

iupac2pcm<-function(iupac){
    IUPAC <- list(A=c(100, 0, 0, 0),
                  C=c(0, 100, 0, 0),
                  G=c(0, 0, 100, 0),
                  T=c(0, 0, 0, 100),
                  U=c(0, 0, 0, 100),
                  R=c(50, 0, 50, 0),
                  Y=c(0, 50, 0, 50),
                  M=c(50, 50, 0, 0),
                  K=c(0, 0, 50, 50),
                  W=c(50, 0, 0, 50),
                  S=c(0, 50, 50, 0),
                  B=c(0, 33, 33, 33),
                  D=c(33, 0, 33, 33),
                  H=c(33, 33, 0, 33),
                  V=c(33, 33, 33, 0),
                  N=c(25, 25, 25, 25))
    iupac <- toupper(iupac)
    if(grepl("[^ACGTURYMKWSBDHVN]", iupac)) stop("please check the inputs")
    iupac <- unlist(strsplit(iupac, ""))
    re <- matrix(nrow=4, ncol=length(iupac))
    rownames(re) <- c("A","C","G","T")
    for(i in 1:length(iupac))
        re[, i] <- IUPAC[[iupac[i]]]
    re
}

iupac2pcm("WWWTATTATTTW")
##   [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
## A   50   50   50    0  100    0    0  100    0     0     0    50
## C    0    0    0    0    0    0    0    0    0     0     0     0
## G    0    0    0    0    0    0    0    0    0     0     0     0
## T   50   50   50  100    0  100  100    0  100   100   100    50

Leave a Reply

  

  

  

%d 博主赞过: