Clover icon

Coverage Report

  1. Project Clover database Thu Aug 13 2020 12:04:21 BST
  2. Package jalview.io.vcf

File VCFLoaderTest.java

 

Code metrics

6
379
8
1
764
506
11
0.03
47.38
8
1.38

Classes

Class Line # Actions
VCFLoaderTest 33 379 11
1.0100%
 

Contributing tests

This file is covered by 4 tests. .

Source view

1    package jalview.io.vcf;
2   
3    import static jalview.io.gff.SequenceOntologyI.SEQUENCE_VARIANT;
4    import static org.testng.Assert.assertEquals;
5    import static org.testng.Assert.assertNull;
6    import static org.testng.Assert.assertTrue;
7   
8    import jalview.bin.Cache;
9    import jalview.datamodel.AlignmentI;
10    import jalview.datamodel.DBRefEntry;
11    import jalview.datamodel.Mapping;
12    import jalview.datamodel.Sequence;
13    import jalview.datamodel.SequenceFeature;
14    import jalview.datamodel.SequenceI;
15    import jalview.datamodel.features.FeatureAttributes;
16    import jalview.datamodel.features.SequenceFeatures;
17    import jalview.gui.AlignFrame;
18    import jalview.io.DataSourceType;
19    import jalview.io.FileLoader;
20    import jalview.io.gff.Gff3Helper;
21    import jalview.util.MapList;
22   
23    import java.io.File;
24    import java.io.IOException;
25    import java.io.PrintWriter;
26    import java.util.List;
27    import java.util.Map;
28   
29    import org.testng.annotations.BeforeClass;
30    import org.testng.annotations.BeforeTest;
31    import org.testng.annotations.Test;
32   
 
33    public class VCFLoaderTest
34    {
35    private static final float DELTA = 0.00001f;
36   
37    // columns 9717- of gene P30419 from Ensembl (much modified)
38    private static final String FASTA = ""
39    +
40    /*
41    * forward strand 'gene' and 'transcript' with two exons
42    */
43    ">gene1/1-25 chromosome:GRCh38:17:45051610:45051634:1\n"
44    + "CAAGCTGGCGGACGAGAGTGTGACA\n"
45    + ">transcript1/1-18\n--AGCTGGCG----AGAGTGTGAC-\n"
46   
47    /*
48    * reverse strand gene and transcript (reverse complement alleles!)
49    */
50    + ">gene2/1-25 chromosome:GRCh38:17:45051610:45051634:-1\n"
51    + "TGTCACACTCTCGTCCGCCAGCTTG\n"
52    + ">transcript2/1-18\n" + "-GTCACACTCT----CGCCAGCT--\n"
53   
54    /*
55    * 'gene' on chromosome 5 with two transcripts
56    */
57    + ">gene3/1-25 chromosome:GRCh38:5:45051610:45051634:1\n"
58    + "CAAGCTGGCGGACGAGAGTGTGACA\n"
59    + ">transcript3/1-18\n--AGCTGGCG----AGAGTGTGAC-\n"
60    + ">transcript4/1-18\n-----TGG-GGACGAGAGTGTGA-A\n";
61   
62    private static final String[] VCF = { "##fileformat=VCFv4.2",
63    // fields other than AF are ignored when parsing as they have no INFO definition
64    "##INFO=<ID=AF,Number=A,Type=Float,Description=\"Allele Frequency, for each ALT allele, in the same order as listed\">",
65    "##INFO=<ID=AC_Female,Number=A,Type=Integer,Description=\"Allele count in Female genotypes\"",
66    "##INFO=<ID=AF_AFR,Number=A,Type=Float,Description=\"Allele Frequency among African/African American genotypes\"",
67    "##reference=Homo_sapiens/GRCh38",
68    "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO",
69    // A/T,C variants in position 2 of gene sequence (precedes transcript)
70    // should create 2 variant features with respective AF values
71    // malformed values for AC_Female and AF_AFR should be ignored
72    "17\t45051611\trs384765\tA\tT,C\t1666.64\tRF;XYZ\tAC=15;AF=5.0e-03,4.0e-03;AC_Female=12,3d;AF_AFR=low,2.3e-4",
73    // SNP G/C in position 4 of gene sequence, position 2 of transcript
74    // insertion G/GA is transferred to nucleotide but not to peptide
75    "17\t45051613\t.\tG\tGA,C\t1666.65\t.\tAC=15;AF=3.0e-03,2.0e-03",
76    // '.' in INFO field should be ignored
77    "17\t45051615\t.\tG\tC\t1666.66\tRF\tAC=16;AF=." };
78   
 
79  1 toggle @BeforeClass(alwaysRun = true)
80    public void setUp()
81    {
82    /*
83    * configure to capture all available VCF and VEP (CSQ) fields
84    */
85  1 Cache.loadProperties("test/jalview/io/testProps.jvprops");
86  1 Cache.setProperty("VCF_FIELDS", ".*");
87  1 Cache.setProperty("VEP_FIELDS", ".*");
88  1 Cache.setProperty("VCF_ASSEMBLY", "GRCh38=GRCh38");
89  1 Cache.initLogger();
90    }
91   
 
92  1 toggle @BeforeTest(alwaysRun = true)
93    public void setUpBeforeTest()
94    {
95    /*
96    * clear down feature attributes metadata
97    */
98  1 FeatureAttributes.getInstance().clear();
99    }
100   
 
101  1 toggle @Test(groups = "Functional")
102    public void testDoLoad() throws IOException
103    {
104  1 AlignmentI al = buildAlignment();
105   
106  1 File f = makeVcfFile();
107  1 VCFLoader loader = new VCFLoader(f.getPath());
108   
109  1 loader.doLoad(al.getSequencesArray(), null);
110   
111    /*
112    * verify variant feature(s) added to gene
113    * NB alleles at a locus may not be processed, and features added,
114    * in the order in which they appear in the VCF record as method
115    * VariantContext.getAlternateAlleles() does not guarantee order
116    * - order of assertions here matches what we find (is not important)
117    */
118  1 List<SequenceFeature> geneFeatures = al.getSequenceAt(0)
119    .getSequenceFeatures();
120  1 SequenceFeatures.sortFeatures(geneFeatures, true);
121  1 assertEquals(geneFeatures.size(), 5);
122  1 SequenceFeature sf = geneFeatures.get(0);
123  1 assertEquals(sf.getFeatureGroup(), "VCF");
124  1 assertEquals(sf.getBegin(), 2);
125  1 assertEquals(sf.getEnd(), 2);
126  1 assertEquals(sf.getScore(), 0f);
127  1 assertEquals(sf.getValue("AF"), "4.0e-03");
128  1 assertEquals(sf.getValue("AF_AFR"), "2.3e-4");
129  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "A,C");
130  1 assertEquals(sf.getType(), SEQUENCE_VARIANT);
131  1 assertEquals(sf.getValue("POS"), "45051611");
132  1 assertEquals(sf.getValue("ID"), "rs384765");
133  1 assertEquals(sf.getValue("QUAL"), "1666.64");
134  1 assertEquals(sf.getValue("FILTER"), "RF;XYZ");
135    // malformed integer for AC_Female is ignored (JAL-3375)
136  1 assertNull(sf.getValue("AC_Female"));
137   
138  1 sf = geneFeatures.get(1);
139  1 assertEquals(sf.getFeatureGroup(), "VCF");
140  1 assertEquals(sf.getBegin(), 2);
141  1 assertEquals(sf.getEnd(), 2);
142  1 assertEquals(sf.getType(), SEQUENCE_VARIANT);
143  1 assertEquals(sf.getScore(), 0f);
144  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 5.0e-03,
145    DELTA);
146  1 assertEquals(sf.getValue("AC_Female"), "12");
147    // malformed float for AF_AFR is ignored (JAL-3375)
148  1 assertNull(sf.getValue("AC_AFR"));
149  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "A,T");
150   
151  1 sf = geneFeatures.get(2);
152  1 assertEquals(sf.getFeatureGroup(), "VCF");
153  1 assertEquals(sf.getBegin(), 4);
154  1 assertEquals(sf.getEnd(), 4);
155  1 assertEquals(sf.getType(), SEQUENCE_VARIANT);
156  1 assertEquals(sf.getScore(), 0f);
157  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 2.0e-03,
158    DELTA);
159  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,C");
160   
161  1 sf = geneFeatures.get(3);
162  1 assertEquals(sf.getFeatureGroup(), "VCF");
163  1 assertEquals(sf.getBegin(), 4);
164  1 assertEquals(sf.getEnd(), 4);
165  1 assertEquals(sf.getType(), SEQUENCE_VARIANT);
166  1 assertEquals(sf.getScore(), 0f);
167  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 3.0e-03,
168    DELTA);
169  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,GA");
170  1 assertNull(sf.getValue("ID")); // '.' is ignored
171  1 assertNull(sf.getValue("FILTER")); // '.' is ignored
172   
173  1 sf = geneFeatures.get(4);
174  1 assertEquals(sf.getFeatureGroup(), "VCF");
175  1 assertEquals(sf.getBegin(), 6);
176  1 assertEquals(sf.getEnd(), 6);
177  1 assertEquals(sf.getType(), SEQUENCE_VARIANT);
178  1 assertEquals(sf.getScore(), 0f);
179    // AF=. should not have been captured
180  1 assertNull(sf.getValue("AF"));
181  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,C");
182   
183    /*
184    * verify variant feature(s) added to transcript
185    */
186  1 List<SequenceFeature> transcriptFeatures = al.getSequenceAt(1)
187    .getSequenceFeatures();
188  1 assertEquals(transcriptFeatures.size(), 3);
189  1 sf = transcriptFeatures.get(0);
190  1 assertEquals(sf.getFeatureGroup(), "VCF");
191  1 assertEquals(sf.getBegin(), 2);
192  1 assertEquals(sf.getEnd(), 2);
193  1 assertEquals(sf.getType(), SEQUENCE_VARIANT);
194  1 assertEquals(sf.getScore(), 0f);
195  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 2.0e-03,
196    DELTA);
197  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,C");
198  1 sf = transcriptFeatures.get(1);
199  1 assertEquals(sf.getFeatureGroup(), "VCF");
200  1 assertEquals(sf.getBegin(), 2);
201  1 assertEquals(sf.getEnd(), 2);
202  1 assertEquals(sf.getType(), SEQUENCE_VARIANT);
203  1 assertEquals(sf.getScore(), 0f);
204  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 3.0e-03,
205    DELTA);
206  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,GA");
207   
208    /*
209    * verify SNP variant feature(s) computed and added to protein
210    * first codon AGC varies to ACC giving S/T
211    */
212  1 List<DBRefEntry> dbRefs = al.getSequenceAt(1).getDBRefs();
213  1 SequenceI peptide = null;
214  1 for (DBRefEntry dbref : dbRefs)
215    {
216  2 if (dbref.getMap().getMap().getFromRatio() == 3)
217    {
218  1 peptide = dbref.getMap().getTo();
219    }
220    }
221  1 List<SequenceFeature> proteinFeatures = peptide.getSequenceFeatures();
222   
223    /*
224    * JAL-3187 don't precompute protein features, do dynamically instead
225    */
226  1 assertTrue(proteinFeatures.isEmpty());
227    }
228   
 
229  2 toggle private File makeVcfFile() throws IOException
230    {
231  2 File f = File.createTempFile("Test", ".vcf");
232  2 f.deleteOnExit();
233  2 PrintWriter pw = new PrintWriter(f);
234  2 for (String vcfLine : VCF)
235    {
236  18 pw.println(vcfLine);
237    }
238  2 pw.close();
239  2 return f;
240    }
241   
242    /**
243    * Make a simple alignment with one 'gene' and one 'transcript'
244    *
245    * @return
246    */
 
247  3 toggle private AlignmentI buildAlignment()
248    {
249  3 AlignFrame af = new FileLoader().LoadFileWaitTillLoaded(FASTA,
250    DataSourceType.PASTE);
251   
252    /*
253    * map gene1 sequence to chromosome (normally done when the sequence is fetched
254    * from Ensembl and transcripts computed)
255    */
256  3 AlignmentI alignment = af.getViewport().getAlignment();
257  3 SequenceI gene1 = alignment.findName("gene1");
258  3 int[] to = new int[] { 45051610, 45051634 };
259  3 int[] from = new int[] { gene1.getStart(), gene1.getEnd() };
260  3 gene1.setGeneLoci("homo_sapiens", "GRCh38", "17", new MapList(from, to,
261    1, 1));
262   
263    /*
264    * map 'transcript1' to chromosome via 'gene1'
265    * transcript1/1-18 is gene1/3-10,15-24
266    * which is chromosome 45051612-45051619,45051624-45051633
267    */
268  3 to = new int[] { 45051612, 45051619, 45051624, 45051633 };
269  3 SequenceI transcript1 = alignment.findName("transcript1");
270  3 from = new int[] { transcript1.getStart(), transcript1.getEnd() };
271  3 transcript1.setGeneLoci("homo_sapiens", "GRCh38", "17", new MapList(
272    from, to,
273    1, 1));
274   
275    /*
276    * map gene2 to chromosome reverse strand
277    */
278  3 SequenceI gene2 = alignment.findName("gene2");
279  3 to = new int[] { 45051634, 45051610 };
280  3 from = new int[] { gene2.getStart(), gene2.getEnd() };
281  3 gene2.setGeneLoci("homo_sapiens", "GRCh38", "17", new MapList(from, to,
282    1, 1));
283   
284    /*
285    * map 'transcript2' to chromosome via 'gene2'
286    * transcript2/1-18 is gene2/2-11,16-23
287    * which is chromosome 45051633-45051624,45051619-45051612
288    */
289  3 to = new int[] { 45051633, 45051624, 45051619, 45051612 };
290  3 SequenceI transcript2 = alignment.findName("transcript2");
291  3 from = new int[] { transcript2.getStart(), transcript2.getEnd() };
292  3 transcript2.setGeneLoci("homo_sapiens", "GRCh38", "17", new MapList(
293    from, to,
294    1, 1));
295   
296    /*
297    * add a protein product as a DBRef on transcript1
298    */
299  3 SequenceI peptide1 = new Sequence("ENSP001", "SWRECD");
300  3 MapList mapList = new MapList(new int[] { 1, 18 }, new int[] { 1, 6 },
301    3, 1);
302  3 Mapping map = new Mapping(peptide1, mapList);
303  3 DBRefEntry product = new DBRefEntry("", "", "ENSP001", map);
304  3 transcript1.addDBRef(product);
305   
306    /*
307    * add a protein product as a DBRef on transcript2
308    */
309  3 SequenceI peptide2 = new Sequence("ENSP002", "VTLSPA");
310  3 mapList = new MapList(new int[] { 1, 18 }, new int[] { 1, 6 }, 3, 1);
311  3 map = new Mapping(peptide2, mapList);
312  3 product = new DBRefEntry("", "", "ENSP002", map);
313  3 transcript2.addDBRef(product);
314   
315    /*
316    * map gene3 to chromosome
317    */
318  3 SequenceI gene3 = alignment.findName("gene3");
319  3 to = new int[] { 45051610, 45051634 };
320  3 from = new int[] { gene3.getStart(), gene3.getEnd() };
321  3 gene3.setGeneLoci("homo_sapiens", "GRCh38", "5", new MapList(from, to,
322    1, 1));
323   
324    /*
325    * map 'transcript3' to chromosome
326    */
327  3 SequenceI transcript3 = alignment.findName("transcript3");
328  3 to = new int[] { 45051612, 45051619, 45051624, 45051633 };
329  3 from = new int[] { transcript3.getStart(), transcript3.getEnd() };
330  3 transcript3.setGeneLoci("homo_sapiens", "GRCh38", "5", new MapList(
331    from, to,
332    1, 1));
333   
334    /*
335    * map 'transcript4' to chromosome
336    */
337  3 SequenceI transcript4 = alignment.findName("transcript4");
338  3 to = new int[] { 45051615, 45051617, 45051619, 45051632, 45051634,
339    45051634 };
340  3 from = new int[] { transcript4.getStart(), transcript4.getEnd() };
341  3 transcript4.setGeneLoci("homo_sapiens", "GRCh38", "5", new MapList(
342    from, to,
343    1, 1));
344   
345    /*
346    * add a protein product as a DBRef on transcript3
347    */
348  3 SequenceI peptide3 = new Sequence("ENSP003", "SWRECD");
349  3 mapList = new MapList(new int[] { 1, 18 }, new int[] { 1, 6 }, 3, 1);
350  3 map = new Mapping(peptide3, mapList);
351  3 product = new DBRefEntry("", "", "ENSP003", map);
352  3 transcript3.addDBRef(product);
353   
354  3 return alignment;
355    }
356   
357    /**
358    * Test with 'gene' and 'transcript' mapped to the reverse strand of the
359    * chromosome. The VCF variant positions (in forward coordinates) should get
360    * correctly located on sequence positions.
361    *
362    * @throws IOException
363    */
 
364  1 toggle @Test(groups = "Functional")
365    public void testDoLoad_reverseStrand() throws IOException
366    {
367  1 AlignmentI al = buildAlignment();
368   
369  1 File f = makeVcfFile();
370   
371  1 VCFLoader loader = new VCFLoader(f.getPath());
372   
373  1 loader.doLoad(al.getSequencesArray(), null);
374   
375    /*
376    * verify variant feature(s) added to gene2
377    * gene2/1-25 maps to chromosome 45051634- reverse strand
378    */
379  1 List<SequenceFeature> geneFeatures = al.getSequenceAt(2)
380    .getSequenceFeatures();
381  1 SequenceFeatures.sortFeatures(geneFeatures, true);
382  1 assertEquals(geneFeatures.size(), 5);
383  1 SequenceFeature sf;
384   
385    /*
386    * insertion G/GA at 45051613 maps to an insertion at
387    * the preceding position (21) on reverse strand gene
388    * reference: CAAGC -> GCTTG/21-25
389    * genomic variant: CAAGAC (G/GA)
390    * gene variant: GTCTTG (G/GT at 21)
391    */
392  1 sf = geneFeatures.get(1);
393  1 assertEquals(sf.getFeatureGroup(), "VCF");
394  1 assertEquals(sf.getBegin(), 21);
395  1 assertEquals(sf.getEnd(), 21);
396  1 assertEquals(sf.getType(), SEQUENCE_VARIANT);
397  1 assertEquals(sf.getScore(), 0f);
398  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,GT");
399  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 3.0e-03,
400    DELTA);
401   
402    /*
403    * variant G/C at 45051613 maps to C/G at gene position 22
404    */
405  1 sf = geneFeatures.get(2);
406  1 assertEquals(sf.getFeatureGroup(), "VCF");
407  1 assertEquals(sf.getBegin(), 22);
408  1 assertEquals(sf.getEnd(), 22);
409  1 assertEquals(sf.getType(), SEQUENCE_VARIANT);
410  1 assertEquals(sf.getScore(), 0f);
411  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "C,G");
412  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 2.0e-03,
413    DELTA);
414   
415    /*
416    * variant A/C at 45051611 maps to T/G at gene position 24
417    */
418  1 sf = geneFeatures.get(3);
419  1 assertEquals(sf.getFeatureGroup(), "VCF");
420  1 assertEquals(sf.getBegin(), 24);
421  1 assertEquals(sf.getEnd(), 24);
422  1 assertEquals(sf.getType(), SEQUENCE_VARIANT);
423  1 assertEquals(sf.getScore(), 0f);
424  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "T,G");
425  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 4.0e-03,
426    DELTA);
427   
428    /*
429    * variant A/T at 45051611 maps to T/A at gene position 24
430    */
431  1 sf = geneFeatures.get(4);
432  1 assertEquals(sf.getFeatureGroup(), "VCF");
433  1 assertEquals(sf.getBegin(), 24);
434  1 assertEquals(sf.getEnd(), 24);
435  1 assertEquals(sf.getType(), SEQUENCE_VARIANT);
436  1 assertEquals(sf.getScore(), 0f);
437  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "T,A");
438  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 5.0e-03,
439    DELTA);
440   
441    /*
442    * verify 3 variant features added to transcript2
443    */
444  1 List<SequenceFeature> transcriptFeatures = al.getSequenceAt(3)
445    .getSequenceFeatures();
446  1 assertEquals(transcriptFeatures.size(), 3);
447   
448    /*
449    * insertion G/GT at position 21 of gene maps to position 16 of transcript
450    */
451  1 sf = transcriptFeatures.get(1);
452  1 assertEquals(sf.getFeatureGroup(), "VCF");
453  1 assertEquals(sf.getBegin(), 16);
454  1 assertEquals(sf.getEnd(), 16);
455  1 assertEquals(sf.getType(), SEQUENCE_VARIANT);
456  1 assertEquals(sf.getScore(), 0f);
457  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,GT");
458  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 3.0e-03,
459    DELTA);
460   
461    /*
462    * SNP C/G at position 22 of gene maps to position 17 of transcript
463    */
464  1 sf = transcriptFeatures.get(2);
465  1 assertEquals(sf.getFeatureGroup(), "VCF");
466  1 assertEquals(sf.getBegin(), 17);
467  1 assertEquals(sf.getEnd(), 17);
468  1 assertEquals(sf.getType(), SEQUENCE_VARIANT);
469  1 assertEquals(sf.getScore(), 0f);
470  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "C,G");
471  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 2.0e-03,
472    DELTA);
473   
474    /*
475    * verify variant feature(s) computed and added to protein
476    * last codon GCT varies to GGT giving A/G in the last peptide position
477    */
478  1 List<DBRefEntry> dbRefs = al.getSequenceAt(3).getDBRefs();
479  1 SequenceI peptide = null;
480  1 for (DBRefEntry dbref : dbRefs)
481    {
482  2 if (dbref.getMap().getMap().getFromRatio() == 3)
483    {
484  1 peptide = dbref.getMap().getTo();
485    }
486    }
487  1 List<SequenceFeature> proteinFeatures = peptide.getSequenceFeatures();
488   
489    /*
490    * JAL-3187 don't precompute protein features, do dynamically instead
491    */
492  1 assertTrue(proteinFeatures.isEmpty());
493    }
494   
495    /**
496    * Tests that if VEP consequence (CSQ) data is present in the VCF data, then
497    * it is added to the variant feature, but restricted where possible to the
498    * consequences for a specific transcript
499    *
500    * @throws IOException
501    */
 
502  1 toggle @Test(groups = "Functional")
503    public void testDoLoad_vepCsq() throws IOException
504    {
505  1 AlignmentI al = buildAlignment();
506   
507  1 VCFLoader loader = new VCFLoader("test/jalview/io/vcf/testVcf.vcf");
508   
509    /*
510    * VCF data file with variants at gene3 positions
511    * 1 C/A
512    * 5 C/T
513    * 9 CGT/C (deletion)
514    * 13 C/G, C/T
515    * 17 A/AC (insertion), A/G
516    */
517  1 loader.doLoad(al.getSequencesArray(), null);
518   
519    /*
520    * verify variant feature(s) added to gene3
521    */
522  1 List<SequenceFeature> geneFeatures = al.findName("gene3")
523    .getSequenceFeatures();
524  1 SequenceFeatures.sortFeatures(geneFeatures, true);
525  1 assertEquals(geneFeatures.size(), 7);
526  1 SequenceFeature sf = geneFeatures.get(0);
527  1 assertEquals(sf.getBegin(), 1);
528  1 assertEquals(sf.getEnd(), 1);
529  1 assertEquals(sf.getScore(), 0f);
530  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.1f, DELTA);
531  1 assertEquals(sf.getValue("alleles"), "C,A");
532    // gene features include Consequence for all transcripts
533  1 Map map = (Map) sf.getValue("CSQ");
534  1 assertEquals(map.size(), 9);
535  1 assertEquals(map.get("PolyPhen"), "Bad");
536   
537  1 sf = geneFeatures.get(1);
538  1 assertEquals(sf.getBegin(), 5);
539  1 assertEquals(sf.getEnd(), 5);
540  1 assertEquals(sf.getScore(), 0f);
541  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.2f, DELTA);
542  1 assertEquals(sf.getValue("alleles"), "C,T");
543  1 map = (Map) sf.getValue("CSQ");
544  1 assertEquals(map.size(), 9);
545  1 assertEquals(map.get("PolyPhen"), "Bad;;"); // %3B%3B decoded
546   
547  1 sf = geneFeatures.get(2);
548  1 assertEquals(sf.getBegin(), 9);
549  1 assertEquals(sf.getEnd(), 11); // deletion over 3 positions
550  1 assertEquals(sf.getScore(), 0f);
551  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.3f, DELTA);
552  1 assertEquals(sf.getValue("alleles"), "CGG,C");
553  1 map = (Map) sf.getValue("CSQ");
554  1 assertEquals(map.size(), 9);
555   
556  1 sf = geneFeatures.get(3);
557  1 assertEquals(sf.getBegin(), 13);
558  1 assertEquals(sf.getEnd(), 13);
559  1 assertEquals(sf.getScore(), 0f);
560  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.5f, DELTA);
561  1 assertEquals(sf.getValue("alleles"), "C,T");
562  1 map = (Map) sf.getValue("CSQ");
563  1 assertEquals(map.size(), 9);
564   
565  1 sf = geneFeatures.get(4);
566  1 assertEquals(sf.getBegin(), 13);
567  1 assertEquals(sf.getEnd(), 13);
568  1 assertEquals(sf.getScore(), 0f);
569  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.4f, DELTA);
570  1 assertEquals(sf.getValue("alleles"), "C,G");
571  1 map = (Map) sf.getValue("CSQ");
572  1 assertEquals(map.size(), 9);
573   
574  1 sf = geneFeatures.get(5);
575  1 assertEquals(sf.getBegin(), 17);
576  1 assertEquals(sf.getEnd(), 17);
577  1 assertEquals(sf.getScore(), 0f);
578  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.7f, DELTA);
579  1 assertEquals(sf.getValue("alleles"), "A,G");
580  1 map = (Map) sf.getValue("CSQ");
581  1 assertEquals(map.size(), 9);
582   
583  1 sf = geneFeatures.get(6);
584  1 assertEquals(sf.getBegin(), 17);
585  1 assertEquals(sf.getEnd(), 17); // insertion
586  1 assertEquals(sf.getScore(), 0f);
587  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.6f, DELTA);
588  1 assertEquals(sf.getValue("alleles"), "A,AC");
589  1 map = (Map) sf.getValue("CSQ");
590  1 assertEquals(map.size(), 9);
591   
592    /*
593    * verify variant feature(s) added to transcript3
594    * at columns 5 (1), 17 (2), positions 3, 11
595    * note the deletion at columns 9-11 is not transferred since col 11
596    * has no mapping to transcript 3
597    */
598  1 List<SequenceFeature> transcriptFeatures = al.findName("transcript3")
599    .getSequenceFeatures();
600  1 SequenceFeatures.sortFeatures(transcriptFeatures, true);
601  1 assertEquals(transcriptFeatures.size(), 3);
602  1 sf = transcriptFeatures.get(0);
603  1 assertEquals(sf.getBegin(), 3);
604  1 assertEquals(sf.getEnd(), 3);
605  1 assertEquals(sf.getScore(), 0f);
606  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.2f, DELTA);
607  1 assertEquals(sf.getValue("alleles"), "C,T");
608    // transcript features only have Consequence for that transcripts
609  1 map = (Map) sf.getValue("CSQ");
610  1 assertEquals(map.size(), 9);
611  1 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript3");
612   
613  1 sf = transcriptFeatures.get(1);
614  1 assertEquals(sf.getBegin(), 11);
615  1 assertEquals(sf.getEnd(), 11);
616  1 assertEquals(sf.getScore(), 0f);
617  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.7f, DELTA);
618  1 assertEquals(sf.getValue("alleles"), "A,G");
619  1 assertEquals(map.size(), 9);
620  1 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript3");
621   
622  1 sf = transcriptFeatures.get(2);
623  1 assertEquals(sf.getBegin(), 11);
624  1 assertEquals(sf.getEnd(), 11);
625  1 assertEquals(sf.getScore(), 0f);
626  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.6f, DELTA);
627  1 assertEquals(sf.getValue("alleles"), "A,AC");
628  1 assertEquals(map.size(), 9);
629  1 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript3");
630   
631    /*
632    * verify variants computed on protein product for transcript3
633    * peptide is SWRECD
634    * codon variants are AGC/AGT position 1 which is synonymous
635    * and GAG/GGG which is E/G in position 4
636    * the insertion variant is not transferred to the peptide
637    */
638  1 List<DBRefEntry> dbRefs = al.findName("transcript3").getDBRefs();
639  1 SequenceI peptide = null;
640  1 for (DBRefEntry dbref : dbRefs)
641    {
642  2 if (dbref.getMap().getMap().getFromRatio() == 3)
643    {
644  1 peptide = dbref.getMap().getTo();
645    }
646    }
647  1 List<SequenceFeature> proteinFeatures = peptide.getSequenceFeatures();
648    /*
649    * JAL-3187 don't precompute protein features, do dynamically instead
650    */
651  1 assertTrue(proteinFeatures.isEmpty());
652    // SequenceFeatures.sortFeatures(proteinFeatures, true);
653    // assertEquals(proteinFeatures.size(), 2);
654    // sf = proteinFeatures.get(0);
655    // assertEquals(sf.getFeatureGroup(), "VCF");
656    // assertEquals(sf.getBegin(), 1);
657    // assertEquals(sf.getEnd(), 1);
658    // assertEquals(sf.getType(), SequenceOntologyI.SYNONYMOUS_VARIANT);
659    // assertEquals(sf.getDescription(), "agC/agT");
660    // sf = proteinFeatures.get(1);
661    // assertEquals(sf.getFeatureGroup(), "VCF");
662    // assertEquals(sf.getBegin(), 4);
663    // assertEquals(sf.getEnd(), 4);
664    // assertEquals(sf.getType(), SequenceOntologyI.NONSYNONYMOUS_VARIANT);
665    // assertEquals(sf.getDescription(), "p.Glu4Gly");
666   
667    /*
668    * verify variant feature(s) added to transcript4
669    * at columns 13 (2) and 17 (2), positions 7 and 11
670    */
671  1 transcriptFeatures = al.findName("transcript4").getSequenceFeatures();
672  1 SequenceFeatures.sortFeatures(transcriptFeatures, true);
673  1 assertEquals(transcriptFeatures.size(), 4);
674  1 sf = transcriptFeatures.get(0);
675  1 assertEquals(sf.getBegin(), 7);
676  1 assertEquals(sf.getEnd(), 7);
677  1 assertEquals(sf.getScore(), 0f);
678  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.5f, DELTA);
679  1 assertEquals(sf.getValue("alleles"), "C,T");
680  1 assertEquals(map.size(), 9);
681  1 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript4");
682   
683  1 sf = transcriptFeatures.get(1);
684  1 assertEquals(sf.getBegin(), 7);
685  1 assertEquals(sf.getEnd(), 7);
686  1 assertEquals(sf.getScore(), 0f);
687  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.4f, DELTA);
688  1 assertEquals(sf.getValue("alleles"), "C,G");
689  1 assertEquals(map.size(), 9);
690  1 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript4");
691   
692  1 sf = transcriptFeatures.get(2);
693  1 assertEquals(sf.getBegin(), 11);
694  1 assertEquals(sf.getEnd(), 11);
695  1 assertEquals(sf.getScore(), 0f);
696  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.7f, DELTA);
697  1 assertEquals(sf.getValue("alleles"), "A,G");
698  1 assertEquals(map.size(), 9);
699  1 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript4");
700   
701  1 sf = transcriptFeatures.get(3);
702  1 assertEquals(sf.getBegin(), 11);
703  1 assertEquals(sf.getEnd(), 11);
704  1 assertEquals(sf.getScore(), 0f);
705  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.6f, DELTA);
706  1 assertEquals(sf.getValue("alleles"), "A,AC");
707  1 assertEquals(map.size(), 9);
708  1 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript4");
709    }
710   
711    /**
712    * A test that demonstrates loading a contig sequence from an indexed sequence
713    * database which is the reference for a VCF file
714    *
715    * @throws IOException
716    */
 
717  1 toggle @Test(groups = "Functional")
718    public void testLoadVCFContig() throws IOException
719    {
720  1 VCFLoader loader = new VCFLoader(
721    "test/jalview/io/vcf/testVcf2.vcf");
722   
723  1 SequenceI seq = loader.loadVCFContig("contig123");
724  1 assertEquals(seq.getLength(), 15);
725  1 assertEquals(seq.getSequenceAsString(), "AAAAACCCCCGGGGG");
726  1 List<SequenceFeature> features = seq.getSequenceFeatures();
727  1 SequenceFeatures.sortFeatures(features, true);
728  1 assertEquals(features.size(), 2);
729  1 SequenceFeature sf = features.get(0);
730  1 assertEquals(sf.getBegin(), 8);
731  1 assertEquals(sf.getEnd(), 8);
732  1 assertEquals(sf.getDescription(), "C,A");
733  1 sf = features.get(1);
734  1 assertEquals(sf.getBegin(), 12);
735  1 assertEquals(sf.getEnd(), 12);
736  1 assertEquals(sf.getDescription(), "G,T");
737   
738  1 seq = loader.loadVCFContig("contig789");
739  1 assertEquals(seq.getLength(), 25);
740  1 assertEquals(seq.getSequenceAsString(), "GGGGGTTTTTAAAAACCCCCGGGGG");
741  1 features = seq.getSequenceFeatures();
742  1 SequenceFeatures.sortFeatures(features, true);
743  1 assertEquals(features.size(), 2);
744  1 sf = features.get(0);
745  1 assertEquals(sf.getBegin(), 2);
746  1 assertEquals(sf.getEnd(), 2);
747  1 assertEquals(sf.getDescription(), "G,T");
748  1 sf = features.get(1);
749  1 assertEquals(sf.getBegin(), 21);
750  1 assertEquals(sf.getEnd(), 21);
751  1 assertEquals(sf.getDescription(), "G,A");
752   
753  1 seq = loader.loadVCFContig("contig456");
754  1 assertEquals(seq.getLength(), 20);
755  1 assertEquals(seq.getSequenceAsString(), "CCCCCGGGGGTTTTTAAAAA");
756  1 features = seq.getSequenceFeatures();
757  1 SequenceFeatures.sortFeatures(features, true);
758  1 assertEquals(features.size(), 1);
759  1 sf = features.get(0);
760  1 assertEquals(sf.getBegin(), 15);
761  1 assertEquals(sf.getEnd(), 15);
762  1 assertEquals(sf.getDescription(), "T,C");
763    }
764    }