## by row

medianCenter

# 分类：程序员

# 关于GenomicRanges::GRanges的几点

# tophat library type 应该怎么设置

当我们拿到一个测序结果，实验人员也搞不清楚他的结果的library type怎么运行tophat呢？

依照手册，你可以先访问实验人员，他使用的是什么样的测序手段，是用的illumina还是SOLiD。如果是SOLiD，多半是second strand. 如果是illumina，可能还是分辨一下，主要是分辨unstraned还是firststrand.

这可能需要分三步走：

1 r[……]

# 如何使boxplot的宽度窄一些

# ChIP-Seq 数据分析

# markdownR plugin for wordpress

# MacOS compile R error: dyld: Symbol not found: _libiconv_open

First, compile libiconv from source and install it in an alternate location. I installed it on /usr/local (be sure to leave system’s libiconv alone).

go to https://www.gnu.org/software/libiconv/#do[……]

# 在rmd文件中自动改变图片的长度或者宽度

最近不太会起标题了，总是起的很长。

问题：当我们在写Rmd文件时，有时候会在一个loop循环里，或者各种apply程序中绘图，但是要求图片的高度和宽度自适应，也就是说依照数据的大小来由程序自动改变图片的长度或者宽度。扩展起来，就是改变任何一个chunk option选项.

我参考了几个网上的贴子，终于试出了自己的答案，下面是贴子的链接：

knitr: Changing chunk o[……]

# 如何使用R将DAVID功能分析中的function annotation cluster结果2D图合并显示

标题太长了，我实在是无奈：）

首先是数据生成，先去DAVID网站Start Analysis 下点选Demolist1.

然后点击function annotation cluster，跳出下图的窗口。

点击右侧的Download File按钮。

或者您可以直接从本站DAVID.output。

而后打开R

305d428ca4c51c10e9956121624f98[……]

# 使用Biostrings的PDict搜索一系列小片段

今天遇到一个小问题，在给定的一些序列中，搜索CRISPR Toolbox中GeCKO library中已有的guideRNA。这正好符合PDict的要求，PDict要求所有的片段长度必须一致，而guide sequence的长度正好都是20。

代码如下：

> library(Biostrings) > seq <- dir("tobeSearched", pattern=".seq$") > seq [1] "1-F_A01_015_2015-03-06.seq" "1-R_C02_012_2015-03-06.seq" "10-F_B02_014_2015-03-06.seq" [4] "10-R_D03_025_2015-03-06.seq" "2-F_B01_013_2015-03-06.seq" "2-R_D02_010_2015-03-06.seq" [7] "3-F_C01_011_2015-03-06.seq" "3-R_E02_008_2015-03-06.seq" "4-F_D01_009_2015-03-06.seq" [10] "4-R_F02_006_2015-03-06.seq" "5-F_E01_007_2015-03-06.seq" "5-R_G02_004_2015-03-06.seq" [13] "6-F_F01_005_2015-03-06.seq" "6-R_H02_002_2015-03-06.seq" "7-F_G01_003_2015-03-06.seq" [16] "7-R_A03_031_2015-03-06.seq" "8-F_H01_001_2015-03-06.seq" "8-R_B03_029_2015-03-06.seq" [19] "9-F_A02_016_2015-03-06.seq" "9-R_C03_027_2015-03-06.seq" > sequences <- sapply(seq, function(.ele){ readLines(file.path("tobeSearched", .ele))}) > sequences[1:2] $`1-F_A01_015_2015-03-06.seq` [1] "NNNNNNNNNNNNNNNNNNNNNNNNTNNNNNNNGNNNNNNNNNNNNNNNNNNNNNNNAATATCCCCCNGACGGNNNGNGNN" [2] "NNNNNNNNNNNNTTCAGGCTGNNNNNNNCACNNNNNNCGAGNCATCANNNNNAGGGNGNGNTTGNNNCTCANNNNNNCAG" [3] "GTNCTGNNNGNNGCGTTNAGTGNCATCNCCGGTNGGNNNNNNNNNGGNNAAACCNCNCGNANNCCCNNCNNGAAGCNNNN" [4] "ACNTTNNTTNNNCCCNANNNGCCCNAAATGTTTNANCAANANNTNNNNNNNNNNNGNNNNCTTNTNNTNNNNNNNNNNNN" [5] "NNNNNNGNNNNNCGGNNGGNACNAANCNNCCNNNNNNNNANCANANGCNGGGGNNAAATTTNNCCCCNNCANNNNNNNNA" [6] "ANGGGNNCCCCNGNAANNTTTTTNNNAANNNCTTTCCCCCCCCNNAGGCCTTTCCCCCNNNAANNCCNCCCCCNNNNNNN" [7] "NNNNNGGNAAAAAAAAAAAAAAANNNNNNNAAAAANNNNGNNNNNNNNNNNAAAAAGNATNNNTNNTGNNNNNNNNNNTG" [8] "NNNNNNNNNANNNNAANNNNNNNNNTTNNGAANNANNGNNNGNNNNNNNNGTNNTANNNNNGNGNNTATTNNGNTTANNG" [9] "GNNNNGNGNNTNNNNNNNNGNGNNNNNNNNCNGTNNNTNNNNNNNTNNNNNNNNNNGNNGNNGTNGNNGTNNNNNNNGNN" [10] "NNGGNNNNNNGNNGGGNGNNNGGNNNNNNNNNGNCNNNTNNNNNNGNGTNGNNGNGNNNGNNNNNTANTNTNTNNTAANA" [11] "ANGNGNNNNGTGNTNTTTNNNNNNNTNNNNNNNNNNNNNNNNNNNNNNGNNNTTNNNNNNNNNNNNNNNNNN" $`1-R_C02_012_2015-03-06.seq` [1] "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNCCGGCNNAAGNCATTATGCTCGGNNAAAGNACTTCCTGGCGCGTGNTTAC" [2] "CAGGGCGCACCGATTGCCATCACCGTTGAAGGGGCTAACATTCTGACCCGCNGCATGATGNTCTTCGGACAAGGNGCGAT" [3] "TCGTTGCCATCCGTACGTGCTGGANGAGANGGAAGCGGCGAANAANAATGANGTCAACGCGTTCGATAAACTGTTGTTCA" [4] "AACATATCGGTCACGNCGGTAGCAACAAAGNTCGCAGCTTCTGGCTGGNCNNGANGCGCGGNTTAACCAGCAGCACGNCA" [5] "ACCGGCGATGCCACTAAACGCTACTATCAGCACCTGANCNGCNGAGCGCCNNCCNCGCNNTGNTNNTGANGNNTCGNNNN" [6] "NGNNNNGGNGNNNNNNAANNNCNNNNNNNNNNGNNNNNTTNTTTNNNNNNANNCNNCCCNCCNNNNNNNCNGNNNNNNNN" [7] "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGNNNNNNN" > sequences <- lapply(sequences, paste, collapse="") > sequences <- unlist(sequences) > sequences[1] 1-F_A01_015_2015-03-06.seq "NNNNNNNNNNNNNNNNNNNNNNNNTNNNNNNNGNNNNNNNNNNNNNNNNNNNNNNNAATATCCCCCNGACGGNNNGNGNNNNNNNNNNNNNNTTCAGGCTGNNNNNNNCACNNNNNNCGAGNCATCANNNNNAGGGNGNGNTTGNNNCTCANNNNNNCAGGTNCTGNNNGNNGCGTTNAGTGNCATCNCCGGTNGGNNNNNNNNNGGNNAAACCNCNCGNANNCCCNNCNNGAAGCNNNNACNTTNNTTNNNCCCNANNNGCCCNAAATGTTTNANCAANANNTNNNNNNNNNNNGNNNNCTTNTNNTNNNNNNNNNNNNNNNNNNGNNNNNCGGNNGGNACNAANCNNCCNNNNNNNNANCANANGCNGGGGNNAAATTTNNCCCCNNCANNNNNNNNAANGGGNNCCCCNGNAANNTTTTTNNNAANNNCTTTCCCCCCCCNNAGGCCTTTCCCCCNNNAANNCCNCCCCCNNNNNNNNNNNNGGNAAAAAAAAAAAAAAANNNNNNNAAAAANNNNGNNNNNNNNNNNAAAAAGNATNNNTNNTGNNNNNNNNNNTGNNNNNNNNNANNNNAANNNNNNNNNTTNNGAANNANNGNNNGNNNNNNNNGTNNTANNNNNGNGNNTATTNNGNTTANNGGNNNNGNGNNTNNNNNNNNGNGNNNNNNNNCNGTNNNTNNNNNNNTNNNNNNNNNNGNNGNNGTNGNNGTNNNNNNNGNNNNGGNNNNNNGNNGGGNGNNNGGNNNNNNNNNGNCNNNTNNNNNNGNGTNGNNGNGNNNGNNNNNTANTNTNTNNTAANAANGNGNNNNGTGNTNTTTNNNNNNNTNNNNNNNNNNNNNNNNNNNNNNGNNNTTNNNNNNNNNNNNNNNNNN" > sequences <- DNAStringSet(sequences) > head(sequences) A DNAStringSet instance of length 6 width seq names [1] 872 NNNNNNNNNNNNNNNNNNNNNNNNTNNNNNNNG...NNNNNNNNGNNNTTNNNNNNNNNNNNNNNNNN 1-F_A01_015_2015-... [2] 528 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNCCG...NNNNNNNNNNNNNNNNNNNNNNNNGNNNNNNN 1-R_C02_012_2015-... [3] 543 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNG...NNNNNNNNNNNNTNNNNNNNNNNNNNNNNNNN 10-F_B02_014_2015... [4] 525 GNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNC...NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN 10-R_D03_025_2015... [5] 697 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN...NNNGNNNNNNNNNCTTTTTTTTTTNNNNNNAN 2-F_B01_013_2015-... [6] 527 NNNNNNNNNNNNNNNNNNNNNNNNGNNNNNNNC...NNNNNNNNNNNNNNNNNNNNNNNNNNAANNNN 2-R_D02_010_2015-... > library1 <- read.csv("mouse_geckov2_library_b_1.10000000053.csv", stringsAsFactors=F) > library2 <- read.csv("mouse_geckov2_library_a_2.10000000053.csv", stringsAsFactors=F) > head(library1) gene_id UID seq 1 0610007P14Rik MGLibB_00001 TGTCTAAGGTTTCTCAATCC 2 0610007P14Rik MGLibB_00002 CAGTGAATGGCCTCCAAGCC 3 0610007P14Rik MGLibB_00003 GGTGCTTACTTGCTACCATC 4 0610009B22Rik MGLibB_00004 ACCTCGTCGACGAAAACATG 5 0610009B22Rik MGLibB_00005 GTTCATAGCTCATGCTGCTC 6 0610009B22Rik MGLibB_00006 GAGCAGCATGAGCTATGAAC > head(library2) gene_id UID seq 1 0610007P14Rik MGLibA_00001 TCCTGAATGTGTTACGAAGC 2 0610007P14Rik MGLibA_00002 GGTCGGGCTCCGGTACCTAG 3 0610007P14Rik MGLibA_00003 GCCAGCTTCGTAACACATTC 4 0610009B22Rik MGLibA_00004 TCATCATGCTGCATGACGTG 5 0610009B22Rik MGLibA_00005 ATTCAATGAGTGGTTCGTCT 6 0610009B22Rik MGLibA_00006 TCGTCACGGCTGGGCACATG > dim(library1) [1] 62804 3 > dim(library2) [1] 67405 3 > library1.DNA <- library1$seq > library2.DNA <- library2$seq > names(library1.DNA) <- library1$UID > names(library2.DNA) <- library2$UID > library1.DNA <- DNAStringSet(library1.DNA) > library2.DNA <- DNAStringSet(library2.DNA) > head(library1.DNA) A DNAStringSet instance of length 6 width seq names [1] 20 TGTCTAAGGTTTCTCAATCC MGLibB_00001 [2] 20 CAGTGAATGGCCTCCAAGCC MGLibB_00002 [3] 20 GGTGCTTACTTGCTACCATC MGLibB_00003 [4] 20 ACCTCGTCGACGAAAACATG MGLibB_00004 [5] 20 GTTCATAGCTCATGCTGCTC MGLibB_00005 [6] 20 GAGCAGCATGAGCTATGAAC MGLibB_00006 > head(library2.DNA) A DNAStringSet instance of length 6 width seq names [1] 20 TCCTGAATGTGTTACGAAGC MGLibA_00001 [2] 20 GGTCGGGCTCCGGTACCTAG MGLibA_00002 [3] 20 GCCAGCTTCGTAACACATTC MGLibA_00003 [4] 20 TCATCATGCTGCATGACGTG MGLibA_00004 [5] 20 ATTCAATGAGTGGTTCGTCT MGLibA_00005 [6] 20 TCGTCACGGCTGGGCACATG MGLibA_00006 > library1.PDict <- PDict(library1.DNA) > library2.PDict <- PDict(library2.DNA) > mT.1 <- lapply(sequences, function(.ele){ whichPDict(library1.PDict, .ele)}) > mT.2 <- lapply(sequences, function(.ele){ whichPDict(library2.PDict, .ele)}) > sapply(mT.1, length) 1-F_A01_015_2015-03-06.seq 1-R_C02_012_2015-03-06.seq 10-F_B02_014_2015-03-06.seq 0 0 0 10-R_D03_025_2015-03-06.seq 2-F_B01_013_2015-03-06.seq 2-R_D02_010_2015-03-06.seq 0 0 0 3-F_C01_011_2015-03-06.seq 3-R_E02_008_2015-03-06.seq 4-F_D01_009_2015-03-06.seq 0 0 0 4-R_F02_006_2015-03-06.seq 5-F_E01_007_2015-03-06.seq 5-R_G02_004_2015-03-06.seq 0 0 0 6-F_F01_005_2015-03-06.seq 6-R_H02_002_2015-03-06.seq 7-F_G01_003_2015-03-06.seq 0 0 0 7-R_A03_031_2015-03-06.seq 8-F_H01_001_2015-03-06.seq 8-R_B03_029_2015-03-06.seq 1 0 0 9-F_A02_016_2015-03-06.seq 9-R_C03_027_2015-03-06.seq 0 0 > sapply(mT.2, length) 1-F_A01_015_2015-03-06.seq 1-R_C02_012_2015-03-06.seq 10-F_B02_014_2015-03-06.seq 0 0 0 10-R_D03_025_2015-03-06.seq 2-F_B01_013_2015-03-06.seq 2-R_D02_010_2015-03-06.seq 0 0 0 3-F_C01_011_2015-03-06.seq 3-R_E02_008_2015-03-06.seq 4-F_D01_009_2015-03-06.seq 0 0 0 4-R_F02_006_2015-03-06.seq 5-F_E01_007_2015-03-06.seq 5-R_G02_004_2015-03-06.seq 0 0 0 6-F_F01_005_2015-03-06.seq 6-R_H02_002_2015-03-06.seq 7-F_G01_003_2015-03-06.seq 0 1 0 7-R_A03_031_2015-03-06.seq 8-F_H01_001_2015-03-06.seq 8-R_B03_029_2015-03-06.seq 0 0 0 9-F_A02_016_2015-03-06.seq 9-R_C03_027_2015-03-06.seq 0 0 |