Source code for VAPr.tests.test_vcf_genotype_fields_parsing

# standard libraries
import unittest
import warnings

# project-specific libraries
import VAPr.vcf_genotype_fields_parsing as ns_test

__author__ = 'Birmingham'

# Cause all warnings to always be triggered.
warnings.simplefilter("always")


def _help_get_warn_msg(warn_obj):
    return str(warn_obj[-1].message)


[docs]class TestFunctions(unittest.TestCase): # region _fill_genotype tests
[docs] def test_fill_genotype(self): genotype_to_fill = ns_test.VCFGenotypeInfo('') ns_test._fill_genotype('0/2', genotype_to_fill) self.assertEqual('0/2', genotype_to_fill.genotype)
[docs] def test_fill_genotype_warn(self): genotype_to_fill = ns_test.VCFGenotypeInfo('') with warnings.catch_warnings(record=True) as w: genotype_to_fill = ns_test._fill_genotype('1/0/2', genotype_to_fill) self.assertEqual("The GT tag value 1/0/2 does not split into exactly two values so genotype information " "could not be captured for the current variant.", _help_get_warn_msg(w)) self.assertIsNone(genotype_to_fill.genotype) # no genotype created if GT tag splits wrong
# endregion # region _fill_unfiltered_reads_counts tests
[docs] def test_fill_unfiltered_reads_counts(self): genotype_to_fill = ns_test.VCFGenotypeInfo('') ns_test._fill_unfiltered_reads_counts('0,64,12', genotype_to_fill) self.assertEqual(3, len(genotype_to_fill.alleles)) self.assertEqual(0, genotype_to_fill.alleles[0].unfiltered_read_counts) self.assertEqual(64, genotype_to_fill.alleles[1].unfiltered_read_counts) self.assertEqual(12, genotype_to_fill.alleles[2].unfiltered_read_counts)
[docs] def test_fill_unfiltered_reads_counts_warn(self): genotype_to_fill = ns_test.VCFGenotypeInfo('') with warnings.catch_warnings(record=True) as w: genotype_to_fill = ns_test._fill_unfiltered_reads_counts('0;64', genotype_to_fill) self.assertEqual("The AD tag value 0;64 does not split into at least two values so unfiltered allele depth" " information could not be captured for the current variant.", _help_get_warn_msg(w)) self.assertEqual(0, len(genotype_to_fill.alleles)) # no alleles created if AD tag splits wrong
# endregion # region fill_filtered_reads tests
[docs] def test_fill_filtered_reads_count(self): genotype_to_fill = ns_test.VCFGenotypeInfo('') ns_test._fill_filtered_reads_count('44', genotype_to_fill) self.assertEqual(44, genotype_to_fill.filter_passing_reads_count)
# endregion # region _fill_genotype_confidence tests
[docs] def test_fill_genotype_confidence(self): genotype_to_fill = ns_test.VCFGenotypeInfo('') ns_test._fill_genotype_confidence('44.1', genotype_to_fill) self.assertEqual(44.1, genotype_to_fill.genotype_confidence)
# endregion # region _fill_genotype_likelihoods tests
[docs] def test_fill_genotype_likelihoods_three_alleles(self): expected_values = [(0, 0, 495), (0, 1, 162), (1, 1, 123), (0, 2, 213), (1, 2, 129), (2, 2, 175), (0, 3, 67), (1, 3, 0), (2, 3, 46), (3, 3, 28.1)] genotype_to_fill = ns_test.VCFGenotypeInfo('') genotype_to_fill.alleles = [ns_test.Allele(10), ns_test.Allele(11), ns_test.Allele(12), ns_test.Allele(13)] ns_test._fill_genotype_likelihoods('495,162,123,213,129,175,67,0,46,28.1', genotype_to_fill) self.assertEqual(len(expected_values), len(genotype_to_fill.genotype_likelihoods)) for index in range(0, len(expected_values)): curr_expected_values = expected_values[index] real_values = genotype_to_fill.genotype_likelihoods[index] self.assertEqual(curr_expected_values[0], real_values.allele1_number) self.assertEqual(curr_expected_values[1], real_values.allele2_number) self.assertEqual(curr_expected_values[2], real_values.likelihood_neg_exponent)
[docs] def test_fill_genotype_likelihoods_no_alleles(self): expected_values = [(0, 0, 495), (0, 1, 162), (1, 1, 123)] genotype_to_fill = ns_test.VCFGenotypeInfo('') self.assertEqual(0, len(genotype_to_fill.alleles)) ns_test._fill_genotype_likelihoods('495,162,123', genotype_to_fill) self.assertEqual(2, len(genotype_to_fill.alleles)) # should have added two self.assertEqual(len(expected_values), len(genotype_to_fill.genotype_likelihoods)) for index in range(0, len(expected_values)): curr_expected_values = expected_values[index] real_values = genotype_to_fill.genotype_likelihoods[index] self.assertEqual(curr_expected_values[0], real_values.allele1_number) self.assertEqual(curr_expected_values[1], real_values.allele2_number) self.assertEqual(curr_expected_values[2], real_values.likelihood_neg_exponent)
[docs] def test_fill_genotype_likelihoods_warn_too_many_alleles_implied(self): genotype_to_fill = ns_test.VCFGenotypeInfo('') genotype_to_fill.alleles = [ns_test.Allele(10), ns_test.Allele(11), ns_test.Allele(12)] with warnings.catch_warnings(record=True) as w: genotype_to_fill = ns_test._fill_genotype_likelihoods('495,162,123,213,129,175,67,0,46,28.1', genotype_to_fill) self.assertEqual("The PL tag value 495,162,123,213,129,175,67,0,46,28.1 appears to contain information for " "more alleles than expected so 'normalized' Phred-scaled likelihoods of possible genotypes" " information could not be captured for the current variant.", _help_get_warn_msg(w)) self.assertEqual(0, len(genotype_to_fill.genotype_likelihoods)) # no likelihoods made if PL splits wrong
[docs] def test_fill_genotype_likelihoods_warn_too_few_likelihoods_1(self): genotype_to_fill = ns_test.VCFGenotypeInfo('') genotype_to_fill.alleles = [ns_test.Allele(10), ns_test.Allele(11), ns_test.Allele(12), ns_test.Allele(13)] with warnings.catch_warnings(record=True) as w: genotype_to_fill = ns_test._fill_genotype_likelihoods('495,162,123,213,129,175', genotype_to_fill) self.assertEqual("The PL tag value 495,162,123,213,129,175 appears to contain information for fewer alleles" " than expected so 'normalized' Phred-scaled likelihoods of possible genotypes information" " could not be captured for the current variant.", _help_get_warn_msg(w)) self.assertEqual(0, len(genotype_to_fill.genotype_likelihoods)) # no likelihoods made if PL splits wrong
[docs] def test_fill_genotype_likelihoods_warn_too_few_likelihoods_2(self): genotype_to_fill = ns_test.VCFGenotypeInfo('') genotype_to_fill.alleles = [ns_test.Allele(10), ns_test.Allele(11), ns_test.Allele(12), ns_test.Allele(13)] with warnings.catch_warnings(record=True) as w: genotype_to_fill = ns_test._fill_genotype_likelihoods('495,162,123,213,129,175,67,0,46', genotype_to_fill) self.assertEqual("The PL tag value 495,162,123,213,129,175,67,0,46 appears to contain information for fewer" " alleles than expected so 'normalized' Phred-scaled likelihoods of possible genotypes " "information could not be captured for the current variant.", _help_get_warn_msg(w)) self.assertEqual(0, len(genotype_to_fill.genotype_likelihoods)) # no likelihoods made if PL splits wrong
# endregion
[docs]class TestVCFGenotypeInfo(unittest.TestCase): # No tests of __init__ as it is just setting empty values # No explicit tests of getter properties as they're just returning an internal variable # region genotype_confidence setter tests
[docs] def test_genotype_confidence_setter(self): dummy_vcfgenotypeinfo = ns_test.VCFGenotypeInfo('') dummy_vcfgenotypeinfo.genotype_confidence = "89" self.assertEqual(89.0, dummy_vcfgenotypeinfo.genotype_confidence) dummy_vcfgenotypeinfo.genotype_confidence = 89 self.assertEqual(89.0, dummy_vcfgenotypeinfo.genotype_confidence) dummy_vcfgenotypeinfo.genotype_confidence = "-89.10" self.assertEqual(-89.1, dummy_vcfgenotypeinfo.genotype_confidence)
[docs] def test_genotype_confidence_setter_error(self): dummy_vcfgenotypeinfo = ns_test.VCFGenotypeInfo('') with self.assertRaises(ValueError): dummy_vcfgenotypeinfo.genotype_confidence = "blue"
# endregion # region filter_passing_reads_count setter tests
[docs] def test_filter_passing_reads_count_setter(self): dummy_vcfgenotypeinfo = ns_test.VCFGenotypeInfo('') dummy_vcfgenotypeinfo.filter_passing_reads_count = "42" self.assertEqual(42, dummy_vcfgenotypeinfo.filter_passing_reads_count) dummy_vcfgenotypeinfo.filter_passing_reads_count = 0 self.assertEqual(0, dummy_vcfgenotypeinfo.filter_passing_reads_count) dummy_vcfgenotypeinfo.filter_passing_reads_count = "." self.assertEqual(None, dummy_vcfgenotypeinfo.filter_passing_reads_count)
[docs] def test_filter_passing_reads_count_setter_error(self): dummy_vcfgenotypeinfo = ns_test.VCFGenotypeInfo('') with self.assertRaises(ValueError): dummy_vcfgenotypeinfo.filter_passing_reads_count = -1 with self.assertRaises(ValueError): dummy_vcfgenotypeinfo.filter_passing_reads_count = 48.5
# endregion
[docs]class TestAllele(unittest.TestCase): # No tests of __init__ as it is just setting values # No explicit tests of getter properties as they're just returning an internal variable
[docs] def test_read_counts_setter(self): dummy_allele = ns_test.Allele(0) self.assertEqual(0, dummy_allele.unfiltered_read_counts) dummy_allele.unfiltered_read_counts = 94 self.assertEqual(94, dummy_allele.unfiltered_read_counts)
[docs] def test_read_counts_setter_error(self): dummy_allele = ns_test.Allele(0) with self.assertRaises(ValueError): dummy_allele.unfiltered_read_counts = -94 with self.assertRaises(ValueError): dummy_allele.unfiltered_read_counts = 48.5
[docs]class TestGenotypeLikelihood(unittest.TestCase): # No tests of init as just calls tested setters # No explicit tests of getter properties as they're just returning an internal variable # region _validate_allele_relationship tests
[docs] def test__validate_allele_relationship_pass(self): ns_test.GenotypeLikelihood._validate_allele_relationship(0, 2)
# if we got this far, the test passed
[docs] def test__validate_allele_relationship_fail(self): with self.assertRaises(ValueError): ns_test.GenotypeLikelihood._validate_allele_relationship(2, 0)
# endregion # region allele1_number setter tests
[docs] def test_allele1_number_setter(self): temp_likelihood = ns_test.GenotypeLikelihood(0, 5, 0) temp_likelihood.allele1_number = 4 self.assertEqual(4, temp_likelihood.allele1_number)
[docs] def test_allele1_number_setter_error(self): with self.assertRaises(ValueError): ns_test.GenotypeLikelihood(0, 0, 0).allele1_number = -1 with self.assertRaises(ValueError): ns_test.GenotypeLikelihood(0, 5, 0).allele1_number = 4.5 with self.assertRaises(ValueError): ns_test.GenotypeLikelihood(0, 0, 0).allele1_number = "blue" temp_likelihood = ns_test.GenotypeLikelihood(0, 0, 0) temp_likelihood.allele2_number = 1 with self.assertRaises(ValueError): temp_likelihood.allele1_number = 4 # can't be greater than allele 2
# endregion # region allele2_number setter tests
[docs] def test_allele2_number_setter(self): temp_likelihood = ns_test.GenotypeLikelihood(0, 0, 0) temp_likelihood.allele2_number = 4 self.assertEqual(4, temp_likelihood.allele2_number)
[docs] def test_allele2_number_setter_error(self): with self.assertRaises(ValueError): ns_test.GenotypeLikelihood(0, 0, 0).allele2_number = -1 with self.assertRaises(ValueError): ns_test.GenotypeLikelihood(0, 0, 0).allele2_number = 4.5 with self.assertRaises(ValueError): ns_test.GenotypeLikelihood(0, 0, 0).allele2_number = "blue" temp_likelihood = ns_test.GenotypeLikelihood(4, 4, 0) with self.assertRaises(ValueError): temp_likelihood.allele2_number = 2 # can't be smaller than allele 1
# endregion # region likelihood_neg_exponent setter tests
[docs] def test_likelihood_neg_exponent_setter(self): temp_likelihood = ns_test.GenotypeLikelihood(0, 0, 0) temp_likelihood.likelihood_neg_exponent = "89" self.assertEqual(89.0, temp_likelihood.likelihood_neg_exponent) temp_likelihood.likelihood_neg_exponent = 89 self.assertEqual(89.0, temp_likelihood.likelihood_neg_exponent) temp_likelihood.likelihood_neg_exponent = "-89.10" self.assertEqual(-89.1, temp_likelihood.likelihood_neg_exponent)
[docs] def test_genotype_confidence_setter_error(self): temp_likelihood = ns_test.GenotypeLikelihood(0, 0, 0) with self.assertRaises(ValueError): temp_likelihood.likelihood_neg_exponent = "blue"
# endregion
[docs]class TestVCFGenotypeString(unittest.TestCase):
[docs] def test_is_valid_genotype_fields_string_true(self): self.assertTrue(ns_test.VCFGenotypeParser.is_valid_genotype_fields_string("1/1:0,2:2:6:89,6,0")) self.assertTrue(ns_test.VCFGenotypeParser.is_valid_genotype_fields_string("./.:0,2:.:.:.")) self.assertTrue(ns_test.VCFGenotypeParser.is_valid_genotype_fields_string("0/0"))
[docs] def test_is_valid_genotype_fields_string_false_period(self): self.assertFalse(ns_test.VCFGenotypeParser.is_valid_genotype_fields_string("."))
[docs] def test_is_valid_genotype_fields_string_false_delimited(self): self.assertFalse(ns_test.VCFGenotypeParser.is_valid_genotype_fields_string("./.:.:.:.:.")) self.assertFalse(ns_test.VCFGenotypeParser.is_valid_genotype_fields_string("./.:0,0"))
[docs] def test_parse_GT_GQ_PL(self): format_string = 'GT:GQ:PL' info_string = '1/1:99:1187.2,101,0' parser = ns_test.VCFGenotypeParser() genotype_to_fill = parser.parse(format_string, info_string) self.assertEqual('1/1', genotype_to_fill.genotype) self.assertIsNone(genotype_to_fill.filter_passing_reads_count) self.assertEqual(99, genotype_to_fill.genotype_confidence) self.assertEqual(2, len(genotype_to_fill.alleles)) self.assertEqual(3, len(genotype_to_fill.genotype_likelihoods)) self.assertEqual(1187.2, genotype_to_fill.genotype_likelihoods[0].likelihood_neg_exponent) self.assertEqual(101, genotype_to_fill.genotype_likelihoods[1].likelihood_neg_exponent) self.assertEqual(0, genotype_to_fill.genotype_likelihoods[2].likelihood_neg_exponent)
[docs] def test_parse_GT_AD_DP_GQ_PL(self): format_string = 'GT:AD:DP:GQ:PL' info_string = '1/1:0,34:34:99:1187.2,101,0' parser = ns_test.VCFGenotypeParser() genotype_to_fill = parser.parse(format_string, info_string) self.assertEqual('1/1', genotype_to_fill.genotype) self.assertEqual(34, genotype_to_fill.filter_passing_reads_count) self.assertEqual(99, genotype_to_fill.genotype_confidence) self.assertEqual(2, len(genotype_to_fill.alleles)) self.assertEqual(3, len(genotype_to_fill.genotype_likelihoods)) self.assertEqual(0, genotype_to_fill.alleles[0].unfiltered_read_counts) self.assertEqual(1187.2, genotype_to_fill.genotype_likelihoods[0].likelihood_neg_exponent) self.assertEqual(34, genotype_to_fill.alleles[1].unfiltered_read_counts) self.assertEqual(101, genotype_to_fill.genotype_likelihoods[1].likelihood_neg_exponent) self.assertEqual(0, genotype_to_fill.genotype_likelihoods[2].likelihood_neg_exponent)
[docs] def test_parse_error_reported_as_warn(self): format_string = 'GT:AD:GQ:PL' info_string = 'blue' parser = ns_test.VCFGenotypeParser() with warnings.catch_warnings(record=True) as w: genotype_to_fill = parser.parse(format_string, info_string) self.assertEqual("Encountered error 'list index out of range' so genotype fields information could not be " "captured for the current variant.", _help_get_warn_msg(w)) self.assertIsNone(genotype_to_fill) # warn and return None but don't error out if any one parse fails