1.安装 htslib-1.20
https://www.htslib.org/doc/tabix.html
@J3$ cd ~/Downloads/
$ wget https://github.com/samtools/htslib/releases/download/1.20/htslib-1.20.tar.bz2
$ tar jxvf htslib-1.20.tar.bz2编译安装:
$ cd htslib-1.20/
$ ./configure --prefix=/home/wangjl/soft/htslib-1.20/
$ make -j16
$ make install配置路径:
$ vim ~/.bashrc
export PATH=$PATH:/home/wangjl/soft/htslib-1.20/bin测试:
$ source ~/.bashrc
$ which bgzip
~/soft/htslib-1.20/bin/bgzip
$ which tabix
~/soft/htslib-1.20/bin/tabix
2. 生成fa和gtf的索引文件fa.fai, gtf.gz.tbi
$ cd /data/wangjl/scPolyA-seq2/ref/hg38/gencode/
$ (grep ^"#" GRCh38.p13.gtf; grep -v ^"#" GRCh38.p13.gtf | sort -t $'\t' -k1,1V -k4,4n -k5,5n) | bgzip > GRCh38.p13.sorted.gtf.gz$ tabix -p gff GRCh38.p13.sorted.gtf.gz$ samtools faidx input_ref.fa #生成 input_ref.fa.fai$ ls -lth
-rw-r--r--. 1 wangjl jinlab 370K Jul 24 20:03 GRCh38.p13.sorted.gtf.gz.tbi
-rw-r--r--. 1 wangjl jinlab 36M Jul 24 20:00 GRCh38.p13.sorted.gtf.gz
-rw-r--r--. 1 wangjl jinlab 905M May 1 2023 GRCh38.p13.gtf-rw-r--r--. 1 wangjl jinlab 22K May 8 2023 GRCh38.p13.genome.fa.fai
-rw-r--r--. 1 wangjl jinlab 3.1G May 1 2023 GRCh38.p13.genome.fa
3.在IGV.js中引用资源
其他资源使用默认的,我已下载到服务器本地。
// refer in local: gencode
var hg38_local_gencode={"id": "hg38","name": "Human (GRCh38/hg38)","fastaURL": "ref/hg38/gencode/GRCh38.p13.genome.fa", //?someRandomSeed=0"indexURL": "ref/hg38/gencode/GRCh38.p13.genome.fa.fai","cytobandURL": "ref/hg38/cytoBandIdeo.txt.gz","aliasURL": "ref/hg38/hg38_alias.tab","tracks": [{"name": "GRCh38","format": "gtf","type": "annotation","id": "hg38_genes","url": "ref/hg38/gencode/GRCh38.p13.sorted.gtf.gz","indexURL": "ref/hg38/gencode/GRCh38.p13.sorted.gtf.gz.tbi","visibilityWindow": -1,//"visibilityWindow": 10000000, // 轨道的可见窗口大小"supportsWholeGenome": false,"removable": false,"order": 1000000,"height":250, //height of ref track"infoURL": "https://www.ncbi.nlm.nih.gov/gene/?term=$$" //links to ncbi gene}],"chromosomeOrder": "chr1, chr2, chr3, chr4, chr5, chr6, chr7, chr8, chr9, chr10, chr11, chr12, chr13, chr14, chr15, chr16, chr17, chr18, chr19, chr20, chr21, chr22, chrX, chrY"
}var options =
{//genome: "hg38",//init screen"locus": "chr21:45,512,381-45,521,866",reference:hg38_local_gencode,tracks: []
};
测试结果
- 上图:IGV默认的基因组注释文件
ref/hg38/ncbiRefSeq.txt.gz
- 下图:我们导入的基因组注释文件
ref/hg38/gencode/GRCh38.p13.sorted.gtf.gz
两者在同一个地方还是有一些差异的:下图甚至多出一个转录本。
不过,不能评价哪个注释更可靠。毕竟都是知名学术机构发布的。
建议在一个项目中,使用最新的基因组注释文件,且
- 固定使用同一个机构、同一个版本号的gtf文件,
- 保证gtf和fa配对。