Source code for VAPr.annovar_running

import logging
import os
import shlex
import subprocess
from collections import OrderedDict


[docs]class AnnovarWrapper(object): """ Wrapper around ANNOVAR download and annotation functions """ # TODO: someday: Refactor to dict, so there is guaranteed to be a list of dbs for each supported build version? hg_19_databases = OrderedDict({'knownGene': 'g', # 'tfbsConsSites': 'r', # 'cytoBand': 'r', # 'targetScanS': 'r', # 'genomicSuperDups': 'r', # 'esp6500siv2_all': 'f', '1000g2015aug': 'f'}) hg_38_databases = OrderedDict({'knownGene': 'g', # 'cytoBand': 'r', # 'genomicSuperDups': 'r', # 'esp6500siv2_all': 'f', '1000g2015aug': 'f'}) @classmethod def _get_annovar_dbs_to_use_for_build_version(cls, genome_build_version): # TODO: someday: refactor these strings--they should be the symbolic constants curr defined in VaprAnnotator if genome_build_version == 'hg19': databases = cls.hg_19_databases elif genome_build_version == 'hg38': databases = cls.hg_38_databases else: raise ValueError("Genome build version '{0}' not supported.".format(genome_build_version)) return databases def __init__(self, annovar_install_path, genome_build_version, custom_annovar_dbs_to_use=None): self._HUMANDB_FOLDER_NAME = "/humandb/" self._DOWN_DD = '-webfrom annovar' # TODO: someday: pull out string keys into symbolic constants self._ANNOVAR_DB_IS_HOSTED_BY_ANNOVAR = OrderedDict({'knownGene': True, # 'tfbsConsSites': False, # 'cytoBand': False, # 'targetScanS': False, # 'genomicSuperDups': False, # 'esp6500siv2_all': True, '1000g2015aug': True # 'popfreq_all_20150413': True, # 'clinvar_20161128': True, # 'cosmic70': True, # 'nci60': True, # 'avdblist': True }) self._annovar_install_path = annovar_install_path self._genome_build_version = genome_build_version self._annovar_dbs_to_use = self._get_annovar_dbs_to_use(custom_annovar_dbs_to_use)
[docs] def download_databases(self): list_commands = self._build_annovar_database_download_command_str() for command in list_commands: args = shlex.split(command) subprocess.call(args) logging.info('Finished downloading databases to {0}'.format( os.path.join(self._annovar_install_path, self._HUMANDB_FOLDER_NAME)))
[docs] def run_annotation(self, single_vcf_path, output_basename, output_dir): annovar_output_basename = output_basename + '_annotated' annovar_output_base = os.path.join(output_dir, annovar_output_basename) annovar_txt_output_fp = annovar_output_base + '.' + self._genome_build_version + '_multianno.txt' cmd_string = self._build_table_annovar_command_str(single_vcf_path, annovar_output_base) args = shlex.split(cmd_string) logging.info('Running Annovar') subprocess.call(args) logging.info('Finished running Annovar') return annovar_txt_output_fp
def _get_annovar_dbs_to_use(self, custom_annovar_dbs_to_use=None): annovar_dbs_for_build_version_dict = self._get_annovar_dbs_to_use_for_build_version( self._genome_build_version) annovar_dbs_to_get_dict = annovar_dbs_for_build_version_dict # by default, assume use all for build version if custom_annovar_dbs_to_use is not None: for annovar_db_name in custom_annovar_dbs_to_use: if annovar_db_name not in annovar_dbs_for_build_version_dict: raise ValueError('Database %s not supported for build version %s' % (annovar_db_name, self._genome_build_version)) annovar_dbs_to_get_dict = {db: annovar_dbs_for_build_version_dict[db] for db in custom_annovar_dbs_to_use} return annovar_dbs_to_get_dict def _build_table_annovar_command_str(self, vcf_path, csv_path): """Generate command string to run table_annovar.pl, which annotates a VCF file.""" dbs = ",".join(list(self._annovar_dbs_to_use.keys())) dbs_args = ",".join(list(self._annovar_dbs_to_use.values())) if '1000g2015aug' in dbs: dbs = dbs.replace('1000g2015aug', '1000g2015aug_all') command = " ".join([ 'perl', os.path.join(self._annovar_install_path, 'table_annovar.pl'), vcf_path, "".join([self._annovar_install_path, self._HUMANDB_FOLDER_NAME]), '--buildver', self._genome_build_version, '-out', csv_path, '-remove -protocol', dbs, '-operation', dbs_args, '-nastring .', '-otherinfo -vcfinput' ]) return command def _build_annovar_database_download_command_str(self): """Concatenate command string arguments for Annovar download database jobs""" command_list = [] # TODO: someday: refactor to remove duplicated command components for annovar_db_name in self._annovar_dbs_to_use: if self._ANNOVAR_DB_IS_HOSTED_BY_ANNOVAR[annovar_db_name]: command_list.append(" ".join( ['perl', self._annovar_install_path + '/annotate_variation.pl', '-build', self._genome_build_version, '-downdb', self._DOWN_DD, annovar_db_name, self._annovar_install_path + self._HUMANDB_FOLDER_NAME])) else: command_list.append(" ".join( ['perl', self._annovar_install_path + '/annotate_variation.pl', '-build', self._genome_build_version, '-downdb', annovar_db_name, self._annovar_install_path + self._HUMANDB_FOLDER_NAME])) return command_list