Source code for VAPr.tests.test_vcf_genotype_fields_parsing

# standard libraries
import unittest
import warnings

# project-specific libraries
import VAPr.vcf_genotype_fields_parsing as ns_test

__author__ = 'Birmingham'

# Cause all warnings to always be triggered.
warnings.simplefilter("always")


def _help_get_warn_msg(warn_obj):
    return str(warn_obj[-1].message)


[docs]class TestFunctions(unittest.TestCase):
    # region _fill_genotype tests
[docs]    def test_fill_genotype(self):
        genotype_to_fill = ns_test.VCFGenotypeInfo('')
        ns_test._fill_genotype('0/2', genotype_to_fill)
        self.assertEqual('0/2', genotype_to_fill.genotype)

[docs]    def test_fill_genotype_warn(self):
        genotype_to_fill = ns_test.VCFGenotypeInfo('')
        with warnings.catch_warnings(record=True) as w:
            genotype_to_fill = ns_test._fill_genotype('1/0/2', genotype_to_fill)
            self.assertEqual("The GT tag value 1/0/2 does not split into exactly two values so genotype information "
                             "could not be captured for the current variant.", _help_get_warn_msg(w))
            self.assertIsNone(genotype_to_fill.genotype)  # no genotype created if GT tag splits wrong

    # endregion

    # region _fill_unfiltered_reads_counts tests
[docs]    def test_fill_unfiltered_reads_counts(self):
        genotype_to_fill = ns_test.VCFGenotypeInfo('')
        ns_test._fill_unfiltered_reads_counts('0,64,12', genotype_to_fill)
        self.assertEqual(3, len(genotype_to_fill.alleles))
        self.assertEqual(0, genotype_to_fill.alleles[0].unfiltered_read_counts)
        self.assertEqual(64, genotype_to_fill.alleles[1].unfiltered_read_counts)
        self.assertEqual(12, genotype_to_fill.alleles[2].unfiltered_read_counts)

[docs]    def test_fill_unfiltered_reads_counts_warn(self):
        genotype_to_fill = ns_test.VCFGenotypeInfo('')
        with warnings.catch_warnings(record=True) as w:
            genotype_to_fill = ns_test._fill_unfiltered_reads_counts('0;64', genotype_to_fill)
            self.assertEqual("The AD tag value 0;64 does not split into at least two values so unfiltered allele depth"
                             " information could not be captured for the current variant.", _help_get_warn_msg(w))
            self.assertEqual(0, len(genotype_to_fill.alleles))  # no alleles created if AD tag splits wrong

    # endregion

    # region fill_filtered_reads tests
[docs]    def test_fill_filtered_reads_count(self):
        genotype_to_fill = ns_test.VCFGenotypeInfo('')
        ns_test._fill_filtered_reads_count('44', genotype_to_fill)
        self.assertEqual(44, genotype_to_fill.filter_passing_reads_count)

    # endregion

    # region _fill_genotype_confidence tests
[docs]    def test_fill_genotype_confidence(self):
        genotype_to_fill = ns_test.VCFGenotypeInfo('')
        ns_test._fill_genotype_confidence('44.1', genotype_to_fill)
        self.assertEqual(44.1, genotype_to_fill.genotype_confidence)

    # endregion

    # region _fill_genotype_likelihoods tests
[docs]    def test_fill_genotype_likelihoods_three_alleles(self):
        expected_values = [(0, 0, 495),
                           (0, 1, 162),
                           (1, 1, 123),
                           (0, 2, 213),
                           (1, 2, 129),
                           (2, 2, 175),
                           (0, 3, 67),
                           (1, 3, 0),
                           (2, 3, 46),
                           (3, 3, 28.1)]
        genotype_to_fill = ns_test.VCFGenotypeInfo('')
        genotype_to_fill.alleles = [ns_test.Allele(10), ns_test.Allele(11), ns_test.Allele(12),
                                    ns_test.Allele(13)]
        ns_test._fill_genotype_likelihoods('495,162,123,213,129,175,67,0,46,28.1', genotype_to_fill)
        self.assertEqual(len(expected_values), len(genotype_to_fill.genotype_likelihoods))
        for index in range(0, len(expected_values)):
            curr_expected_values = expected_values[index]
            real_values = genotype_to_fill.genotype_likelihoods[index]
            self.assertEqual(curr_expected_values[0], real_values.allele1_number)
            self.assertEqual(curr_expected_values[1], real_values.allele2_number)
            self.assertEqual(curr_expected_values[2], real_values.likelihood_neg_exponent)

[docs]    def test_fill_genotype_likelihoods_no_alleles(self):
        expected_values = [(0, 0, 495),
                           (0, 1, 162),
                           (1, 1, 123)]
        genotype_to_fill = ns_test.VCFGenotypeInfo('')
        self.assertEqual(0, len(genotype_to_fill.alleles))
        ns_test._fill_genotype_likelihoods('495,162,123', genotype_to_fill)
        self.assertEqual(2, len(genotype_to_fill.alleles))  # should have added two
        self.assertEqual(len(expected_values), len(genotype_to_fill.genotype_likelihoods))
        for index in range(0, len(expected_values)):
            curr_expected_values = expected_values[index]
            real_values = genotype_to_fill.genotype_likelihoods[index]
            self.assertEqual(curr_expected_values[0], real_values.allele1_number)
            self.assertEqual(curr_expected_values[1], real_values.allele2_number)
            self.assertEqual(curr_expected_values[2], real_values.likelihood_neg_exponent)

[docs]    def test_fill_genotype_likelihoods_warn_too_many_alleles_implied(self):
        genotype_to_fill = ns_test.VCFGenotypeInfo('')
        genotype_to_fill.alleles = [ns_test.Allele(10), ns_test.Allele(11), ns_test.Allele(12)]
        with warnings.catch_warnings(record=True) as w:
            genotype_to_fill = ns_test._fill_genotype_likelihoods('495,162,123,213,129,175,67,0,46,28.1',
                                                                  genotype_to_fill)
            self.assertEqual("The PL tag value 495,162,123,213,129,175,67,0,46,28.1 appears to contain information for "
                             "more alleles than expected so 'normalized' Phred-scaled likelihoods of possible genotypes"
                             " information could not be captured for the current variant.", _help_get_warn_msg(w))
            self.assertEqual(0, len(genotype_to_fill.genotype_likelihoods))  # no likelihoods made if PL splits wrong

[docs]    def test_fill_genotype_likelihoods_warn_too_few_likelihoods_1(self):
        genotype_to_fill = ns_test.VCFGenotypeInfo('')
        genotype_to_fill.alleles = [ns_test.Allele(10), ns_test.Allele(11), ns_test.Allele(12),
                                    ns_test.Allele(13)]
        with warnings.catch_warnings(record=True) as w:
            genotype_to_fill = ns_test._fill_genotype_likelihoods('495,162,123,213,129,175', genotype_to_fill)
            self.assertEqual("The PL tag value 495,162,123,213,129,175 appears to contain information for fewer alleles"
                             " than expected so 'normalized' Phred-scaled likelihoods of possible genotypes information"
                             " could not be captured for the current variant.", _help_get_warn_msg(w))
            self.assertEqual(0, len(genotype_to_fill.genotype_likelihoods))  # no likelihoods made if PL splits wrong

[docs]    def test_fill_genotype_likelihoods_warn_too_few_likelihoods_2(self):
        genotype_to_fill = ns_test.VCFGenotypeInfo('')
        genotype_to_fill.alleles = [ns_test.Allele(10), ns_test.Allele(11), ns_test.Allele(12),
                                    ns_test.Allele(13)]
        with warnings.catch_warnings(record=True) as w:
            genotype_to_fill = ns_test._fill_genotype_likelihoods('495,162,123,213,129,175,67,0,46', genotype_to_fill)
            self.assertEqual("The PL tag value 495,162,123,213,129,175,67,0,46 appears to contain information for fewer"
                             " alleles than expected so 'normalized' Phred-scaled likelihoods of possible genotypes "
                             "information could not be captured for the current variant.", _help_get_warn_msg(w))
            self.assertEqual(0, len(genotype_to_fill.genotype_likelihoods))  # no likelihoods made if PL splits wrong

    # endregion


[docs]class TestVCFGenotypeInfo(unittest.TestCase):
    # No tests of __init__ as it is just setting empty values
    # No explicit tests of getter properties as they're just returning an internal variable

    # region genotype_confidence setter tests
[docs]    def test_genotype_confidence_setter(self):
        dummy_vcfgenotypeinfo = ns_test.VCFGenotypeInfo('')
        dummy_vcfgenotypeinfo.genotype_confidence = "89"
        self.assertEqual(89.0, dummy_vcfgenotypeinfo.genotype_confidence)
        dummy_vcfgenotypeinfo.genotype_confidence = 89
        self.assertEqual(89.0, dummy_vcfgenotypeinfo.genotype_confidence)
        dummy_vcfgenotypeinfo.genotype_confidence = "-89.10"
        self.assertEqual(-89.1, dummy_vcfgenotypeinfo.genotype_confidence)

[docs]    def test_genotype_confidence_setter_error(self):
        dummy_vcfgenotypeinfo = ns_test.VCFGenotypeInfo('')
        with self.assertRaises(ValueError):
            dummy_vcfgenotypeinfo.genotype_confidence = "blue"

    # endregion

    # region filter_passing_reads_count setter tests
[docs]    def test_filter_passing_reads_count_setter(self):
        dummy_vcfgenotypeinfo = ns_test.VCFGenotypeInfo('')
        dummy_vcfgenotypeinfo.filter_passing_reads_count = "42"
        self.assertEqual(42, dummy_vcfgenotypeinfo.filter_passing_reads_count)
        dummy_vcfgenotypeinfo.filter_passing_reads_count = 0
        self.assertEqual(0, dummy_vcfgenotypeinfo.filter_passing_reads_count)
        dummy_vcfgenotypeinfo.filter_passing_reads_count = "."
        self.assertEqual(None, dummy_vcfgenotypeinfo.filter_passing_reads_count)

[docs]    def test_filter_passing_reads_count_setter_error(self):
        dummy_vcfgenotypeinfo = ns_test.VCFGenotypeInfo('')
        with self.assertRaises(ValueError):
            dummy_vcfgenotypeinfo.filter_passing_reads_count = -1
        with self.assertRaises(ValueError):
            dummy_vcfgenotypeinfo.filter_passing_reads_count = 48.5

    # endregion


[docs]class TestAllele(unittest.TestCase):
    # No tests of __init__ as it is just setting values
    # No explicit tests of getter properties as they're just returning an internal variable

[docs]    def test_read_counts_setter(self):
        dummy_allele = ns_test.Allele(0)
        self.assertEqual(0, dummy_allele.unfiltered_read_counts)
        dummy_allele.unfiltered_read_counts = 94
        self.assertEqual(94, dummy_allele.unfiltered_read_counts)

[docs]    def test_read_counts_setter_error(self):
        dummy_allele = ns_test.Allele(0)
        with self.assertRaises(ValueError):
            dummy_allele.unfiltered_read_counts = -94
        with self.assertRaises(ValueError):
            dummy_allele.unfiltered_read_counts = 48.5


[docs]class TestGenotypeLikelihood(unittest.TestCase):
    # No tests of init as just calls tested setters
    # No explicit tests of getter properties as they're just returning an internal variable

    # region _validate_allele_relationship tests
[docs]    def test__validate_allele_relationship_pass(self):
        ns_test.GenotypeLikelihood._validate_allele_relationship(0, 2)
        # if we got this far, the test passed

[docs]    def test__validate_allele_relationship_fail(self):
        with self.assertRaises(ValueError):
            ns_test.GenotypeLikelihood._validate_allele_relationship(2, 0)

    # endregion

    # region allele1_number setter tests
[docs]    def test_allele1_number_setter(self):
        temp_likelihood = ns_test.GenotypeLikelihood(0, 5, 0)
        temp_likelihood.allele1_number = 4
        self.assertEqual(4, temp_likelihood.allele1_number)

[docs]    def test_allele1_number_setter_error(self):
        with self.assertRaises(ValueError):
            ns_test.GenotypeLikelihood(0, 0, 0).allele1_number = -1
        with self.assertRaises(ValueError):
            ns_test.GenotypeLikelihood(0, 5, 0).allele1_number = 4.5
        with self.assertRaises(ValueError):
            ns_test.GenotypeLikelihood(0, 0, 0).allele1_number = "blue"

        temp_likelihood = ns_test.GenotypeLikelihood(0, 0, 0)
        temp_likelihood.allele2_number = 1
        with self.assertRaises(ValueError):
            temp_likelihood.allele1_number = 4  # can't be greater than allele 2

    # endregion

    # region allele2_number setter tests
[docs]    def test_allele2_number_setter(self):
        temp_likelihood = ns_test.GenotypeLikelihood(0, 0, 0)
        temp_likelihood.allele2_number = 4
        self.assertEqual(4, temp_likelihood.allele2_number)

[docs]    def test_allele2_number_setter_error(self):
        with self.assertRaises(ValueError):
            ns_test.GenotypeLikelihood(0, 0, 0).allele2_number = -1
        with self.assertRaises(ValueError):
            ns_test.GenotypeLikelihood(0, 0, 0).allele2_number = 4.5
        with self.assertRaises(ValueError):
            ns_test.GenotypeLikelihood(0, 0, 0).allele2_number = "blue"

        temp_likelihood = ns_test.GenotypeLikelihood(4, 4, 0)
        with self.assertRaises(ValueError):
            temp_likelihood.allele2_number = 2  # can't be smaller than allele 1

    # endregion

    # region likelihood_neg_exponent setter tests
[docs]    def test_likelihood_neg_exponent_setter(self):
        temp_likelihood = ns_test.GenotypeLikelihood(0, 0, 0)
        temp_likelihood.likelihood_neg_exponent = "89"
        self.assertEqual(89.0, temp_likelihood.likelihood_neg_exponent)
        temp_likelihood.likelihood_neg_exponent = 89
        self.assertEqual(89.0, temp_likelihood.likelihood_neg_exponent)
        temp_likelihood.likelihood_neg_exponent = "-89.10"
        self.assertEqual(-89.1, temp_likelihood.likelihood_neg_exponent)

[docs]    def test_genotype_confidence_setter_error(self):
        temp_likelihood = ns_test.GenotypeLikelihood(0, 0, 0)
        with self.assertRaises(ValueError):
            temp_likelihood.likelihood_neg_exponent = "blue"

    # endregion


[docs]class TestVCFGenotypeString(unittest.TestCase):
[docs]    def test_is_valid_genotype_fields_string_true(self):
        self.assertTrue(ns_test.VCFGenotypeParser.is_valid_genotype_fields_string("1/1:0,2:2:6:89,6,0"))
        self.assertTrue(ns_test.VCFGenotypeParser.is_valid_genotype_fields_string("./.:0,2:.:.:."))
        self.assertTrue(ns_test.VCFGenotypeParser.is_valid_genotype_fields_string("0/0"))

[docs]    def test_is_valid_genotype_fields_string_false_period(self):
        self.assertFalse(ns_test.VCFGenotypeParser.is_valid_genotype_fields_string("."))

[docs]    def test_is_valid_genotype_fields_string_false_delimited(self):
        self.assertFalse(ns_test.VCFGenotypeParser.is_valid_genotype_fields_string("./.:.:.:.:."))
        self.assertFalse(ns_test.VCFGenotypeParser.is_valid_genotype_fields_string("./.:0,0"))

[docs]    def test_parse_GT_GQ_PL(self):
        format_string = 'GT:GQ:PL'
        info_string = '1/1:99:1187.2,101,0'
        parser = ns_test.VCFGenotypeParser()
        genotype_to_fill = parser.parse(format_string, info_string)
        self.assertEqual('1/1', genotype_to_fill.genotype)
        self.assertIsNone(genotype_to_fill.filter_passing_reads_count)
        self.assertEqual(99, genotype_to_fill.genotype_confidence)
        self.assertEqual(2, len(genotype_to_fill.alleles))
        self.assertEqual(3, len(genotype_to_fill.genotype_likelihoods))
        self.assertEqual(1187.2, genotype_to_fill.genotype_likelihoods[0].likelihood_neg_exponent)
        self.assertEqual(101, genotype_to_fill.genotype_likelihoods[1].likelihood_neg_exponent)
        self.assertEqual(0, genotype_to_fill.genotype_likelihoods[2].likelihood_neg_exponent)

[docs]    def test_parse_GT_AD_DP_GQ_PL(self):
        format_string = 'GT:AD:DP:GQ:PL'
        info_string = '1/1:0,34:34:99:1187.2,101,0'
        parser = ns_test.VCFGenotypeParser()
        genotype_to_fill = parser.parse(format_string, info_string)

        self.assertEqual('1/1', genotype_to_fill.genotype)
        self.assertEqual(34, genotype_to_fill.filter_passing_reads_count)
        self.assertEqual(99, genotype_to_fill.genotype_confidence)
        self.assertEqual(2, len(genotype_to_fill.alleles))
        self.assertEqual(3, len(genotype_to_fill.genotype_likelihoods))
        self.assertEqual(0, genotype_to_fill.alleles[0].unfiltered_read_counts)
        self.assertEqual(1187.2, genotype_to_fill.genotype_likelihoods[0].likelihood_neg_exponent)
        self.assertEqual(34, genotype_to_fill.alleles[1].unfiltered_read_counts)
        self.assertEqual(101, genotype_to_fill.genotype_likelihoods[1].likelihood_neg_exponent)
        self.assertEqual(0, genotype_to_fill.genotype_likelihoods[2].likelihood_neg_exponent)

[docs]    def test_parse_error_reported_as_warn(self):
        format_string = 'GT:AD:GQ:PL'
        info_string = 'blue'
        parser = ns_test.VCFGenotypeParser()
        with warnings.catch_warnings(record=True) as w:
            genotype_to_fill = parser.parse(format_string, info_string)
            self.assertEqual("Encountered error 'list index out of range' so genotype fields information could not be "
                             "captured for the current variant.", _help_get_warn_msg(w))
            self.assertIsNone(genotype_to_fill)  # warn and return None but don't error out if any one parse fails