Source code for VAPr.filtering

# TODO: someday: I think these should probably be refactored to be in an external file; yaml, maybe?

SAMPLE_ID_SELECTOR = 'samples.sample_id'


[docs]def get_sample_id_filter(sample_name): return {SAMPLE_ID_SELECTOR: sample_name}
[docs]def get_any_of_sample_ids_filter(sample_names_list): return {SAMPLE_ID_SELECTOR: {'$in': sample_names_list}}
[docs]def make_de_novo_variants_filter(proband, ancestor1, ancestor2): """ Function for de novo variant analysis. Can be performed on multisample files or or on data coming from a collection of files. In the former case, every sample contains the same variants, although they have differences in their allele frequency and read values. A de novo variant is defined as a variant that occurs only in the specified sample (sample1) and not on the other two (sample2, sample3). Occurrence is defined as having allele frequencies greater than [0, 0] ([REF, ALT]). """ return { "$and": [ get_sample_id_filter(proband), { "$and": [ {SAMPLE_ID_SELECTOR: {"$ne": ancestor1}}, {SAMPLE_ID_SELECTOR: {"$ne": ancestor2}} ] } ] }
[docs]def make_deleterious_compound_heterozygous_variants_filter(sample_ids_list=None): and_list = [ {"genotype_subclass_by_class.heterozygous": "compound"}, {"cadd.phred": {"$gte": 10}} ] result = _append_sample_id_constraint_if_needed(and_list, sample_ids_list) return result
[docs]def make_known_disease_variants_filter(sample_ids_list=None): """ Function for retrieving known disease variants by presence in Clinvar and Cosmic.""" result = { "$or": [ { "$and": [ {"clinvar.rcv.accession": {"$exists": True}}, {"clinvar.rcv.clinical_significance": {"$nin": ["Benign", "Likely benign"]}} ] }, {"cosmic.cosmic_id": {"$exists": True}} ] } if sample_ids_list is not None: result = _append_sample_id_constraint_if_needed([result], sample_ids_list) return result
[docs]def make_rare_deleterious_variants_filter(sample_ids_list=None): """ Function for retrieving rare, deleterious variants """ and_list = [ { "$or": [ {"cadd.esp.af": {"$lt": 0.051}}, {"cadd.esp.af": {"$exists": False}} ] }, { "$or": [ {"func_knowngene": "exonic"}, {"func_knowngene": "splicing"} ] }, {"cadd.phred": {"$gte": 10}}, {"exonicfunc_knowngene": {"$ne": "synonymous SNV"}}, {"1000g2015aug_all": {"$lt": 0.051}} ] result = _append_sample_id_constraint_if_needed(and_list, sample_ids_list) return result
def _append_sample_id_constraint_if_needed(and_list, sample_ids_list): if sample_ids_list is not None: and_list.append(get_any_of_sample_ids_filter(sample_ids_list)) return {"$and": and_list}