GitHub - ctSkennerton/minced: Mining CRISPRs in Environmental Datasets
安装
git clone http://github.com/ctSkennerton/minced
cd minced
make
使用
gunzip -k *
cat *.fa > all_MAG_contig.fasta
/home/zhongpei/hard_disk_sda2/zhongpei/Software/minced/minced all_MAG_contig.fasta all_MAG_contig.crisprs all_MAG_contig.gff
/home/zhongpei/hard_disk_sda2/zhongpei/Software/my_script/minCED_handel.py --gff3 all_MAG_contig.gff --input_fa all_MAG_contig.fasta --output_fa all_MAG_contig_CRISPR.fasta
#rm all_MAG_contig.fasta
#rm all_MAG_contig.gff
#rm all_MAG_contig.crisprs
rm *.fa
写了个代码来处理结果
#! /usr/bin/env python
#########################################################
# take minCED CRISPR result
# written by PeiZhong in IFR of CAASimport argparse
import os
from Bio.SeqRecord import SeqRecord
from Bio import SeqIOparser = argparse.ArgumentParser(description='take minCED CRISPR result')
parser.add_argument('--gff3', help='< minCED gff3 output >')
parser.add_argument('--input_fa', help='< your all MAGs contig >')
parser.add_argument('--output_fa', help='< output CRISPR fasta >')args = parser.parse_args()
gff3 = args.gff3
input_fa = args.input_fa
output_fa = args.output_faresult_db = {}
with open(gff3, "r") as gff, open(input_fa, "r") as ifa, open(output_fa, "w") as ofa:sequences = SeqIO.to_dict(SeqIO.parse(ifa, "fasta"))for line in gff:if line.startswith("#"):continuefields = line.strip("\n").split("\t")seq_id = fields[0]start = int(fields[3]) - 1end = int(fields[4])zhushi = fields[8]ID = zhushi.split(";")[0]ID = ID.split("=")[1]name = f"{seq_id}+{ID}"subseq = sequences[seq_id].seq[start:end]record = SeqRecord(subseq, id=name, description="")result_db[name] = recordSeqIO.write(result_db.values(), ofa, 'fasta')