Clover icon

jalviewX

  1. Project Clover database Wed Oct 31 2018 15:13:58 GMT
  2. Package jalview.io.vcf

File VCFLoaderTest.java

 

Code metrics

6
355
7
1
681
460
10
0.03
50.71
7
1.43

Classes

Class Line # Actions
VCFLoaderTest 29 355 10 5
0.9864130698.6%
 

Contributing tests

This file is covered by 4 tests. .

Source view

1    package jalview.io.vcf;
2   
3    import static org.testng.Assert.assertEquals;
4   
5    import jalview.bin.Cache;
6    import jalview.datamodel.AlignmentI;
7    import jalview.datamodel.DBRefEntry;
8    import jalview.datamodel.Mapping;
9    import jalview.datamodel.Sequence;
10    import jalview.datamodel.SequenceFeature;
11    import jalview.datamodel.SequenceI;
12    import jalview.datamodel.features.SequenceFeatures;
13    import jalview.gui.AlignFrame;
14    import jalview.io.DataSourceType;
15    import jalview.io.FileLoader;
16    import jalview.io.gff.Gff3Helper;
17    import jalview.io.gff.SequenceOntologyI;
18    import jalview.util.MapList;
19   
20    import java.io.File;
21    import java.io.IOException;
22    import java.io.PrintWriter;
23    import java.util.List;
24    import java.util.Map;
25   
26    import org.testng.annotations.BeforeClass;
27    import org.testng.annotations.Test;
28   
 
29    public class VCFLoaderTest
30    {
31    private static final float DELTA = 0.00001f;
32   
33    // columns 9717- of gene P30419 from Ensembl (much modified)
34    private static final String FASTA = ""
35    +
36    /*
37    * forward strand 'gene' and 'transcript' with two exons
38    */
39    ">gene1/1-25 chromosome:GRCh38:17:45051610:45051634:1\n"
40    + "CAAGCTGGCGGACGAGAGTGTGACA\n"
41    + ">transcript1/1-18\n--AGCTGGCG----AGAGTGTGAC-\n"
42   
43    /*
44    * reverse strand gene and transcript (reverse complement alleles!)
45    */
46    + ">gene2/1-25 chromosome:GRCh38:17:45051610:45051634:-1\n"
47    + "TGTCACACTCTCGTCCGCCAGCTTG\n"
48    + ">transcript2/1-18\n" + "-GTCACACTCT----CGCCAGCT--\n"
49   
50    /*
51    * 'gene' on chromosome 5 with two transcripts
52    */
53    + ">gene3/1-25 chromosome:GRCh38:5:45051610:45051634:1\n"
54    + "CAAGCTGGCGGACGAGAGTGTGACA\n"
55    + ">transcript3/1-18\n--AGCTGGCG----AGAGTGTGAC-\n"
56    + ">transcript4/1-18\n-----TGG-GGACGAGAGTGTGA-A\n";
57   
58    private static final String[] VCF = { "##fileformat=VCFv4.2",
59    "##INFO=<ID=AF,Number=A,Type=Float,Description=\"Allele Frequency, for each ALT allele, in the same order as listed\">",
60    "##reference=Homo_sapiens/GRCh38",
61    "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO",
62    // A/T,C variants in position 2 of gene sequence (precedes transcript)
63    // should create 2 variant features with respective scores
64    "17\t45051611\t.\tA\tT,C\t1666.64\tRF\tAC=15;AF=5.0e-03,4.0e-03",
65    // SNP G/C in position 4 of gene sequence, position 2 of transcript
66    // insertion G/GA is transferred to nucleotide but not to peptide
67    "17\t45051613\t.\tG\tGA,C\t1666.64\tRF\tAC=15;AF=3.0e-03,2.0e-03" };
68   
 
69  0 toggle @BeforeClass
70    public void setUp()
71    {
72    /*
73    * configure to capture all available VCF and VEP (CSQ) fields
74    */
75  0 Cache.loadProperties("test/jalview/io/testProps.jvprops");
76  0 Cache.setProperty("VCF_FIELDS", ".*");
77  0 Cache.setProperty("VEP_FIELDS", ".*");
78  0 Cache.initLogger();
79    }
80   
 
81  1 toggle @Test(groups = "Functional")
82    public void testDoLoad() throws IOException
83    {
84  1 AlignmentI al = buildAlignment();
85   
86  1 File f = makeVcf();
87  1 VCFLoader loader = new VCFLoader(f.getPath());
88   
89  1 loader.doLoad(al.getSequencesArray(), null);
90   
91    /*
92    * verify variant feature(s) added to gene
93    * NB alleles at a locus may not be processed, and features added,
94    * in the order in which they appear in the VCF record as method
95    * VariantContext.getAlternateAlleles() does not guarantee order
96    * - order of assertions here matches what we find (is not important)
97    */
98  1 List<SequenceFeature> geneFeatures = al.getSequenceAt(0)
99    .getSequenceFeatures();
100  1 SequenceFeatures.sortFeatures(geneFeatures, true);
101  1 assertEquals(geneFeatures.size(), 4);
102  1 SequenceFeature sf = geneFeatures.get(0);
103  1 assertEquals(sf.getFeatureGroup(), "VCF");
104  1 assertEquals(sf.getBegin(), 2);
105  1 assertEquals(sf.getEnd(), 2);
106  1 assertEquals(sf.getScore(), 4.0e-03, DELTA);
107  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "A,C");
108  1 assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
109  1 sf = geneFeatures.get(1);
110  1 assertEquals(sf.getFeatureGroup(), "VCF");
111  1 assertEquals(sf.getBegin(), 2);
112  1 assertEquals(sf.getEnd(), 2);
113  1 assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
114  1 assertEquals(sf.getScore(), 5.0e-03, DELTA);
115  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "A,T");
116   
117  1 sf = geneFeatures.get(2);
118  1 assertEquals(sf.getFeatureGroup(), "VCF");
119  1 assertEquals(sf.getBegin(), 4);
120  1 assertEquals(sf.getEnd(), 4);
121  1 assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
122  1 assertEquals(sf.getScore(), 2.0e-03, DELTA);
123  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,C");
124   
125  1 sf = geneFeatures.get(3);
126  1 assertEquals(sf.getFeatureGroup(), "VCF");
127  1 assertEquals(sf.getBegin(), 4);
128  1 assertEquals(sf.getEnd(), 4);
129  1 assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
130  1 assertEquals(sf.getScore(), 3.0e-03, DELTA);
131  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,GA");
132   
133    /*
134    * verify variant feature(s) added to transcript
135    */
136  1 List<SequenceFeature> transcriptFeatures = al.getSequenceAt(1)
137    .getSequenceFeatures();
138  1 assertEquals(transcriptFeatures.size(), 2);
139  1 sf = transcriptFeatures.get(0);
140  1 assertEquals(sf.getFeatureGroup(), "VCF");
141  1 assertEquals(sf.getBegin(), 2);
142  1 assertEquals(sf.getEnd(), 2);
143  1 assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
144  1 assertEquals(sf.getScore(), 2.0e-03, DELTA);
145  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,C");
146  1 sf = transcriptFeatures.get(1);
147  1 assertEquals(sf.getFeatureGroup(), "VCF");
148  1 assertEquals(sf.getBegin(), 2);
149  1 assertEquals(sf.getEnd(), 2);
150  1 assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
151  1 assertEquals(sf.getScore(), 3.0e-03, DELTA);
152  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,GA");
153   
154    /*
155    * verify SNP variant feature(s) computed and added to protein
156    * first codon AGC varies to ACC giving S/T
157    */
158  1 DBRefEntry[] dbRefs = al.getSequenceAt(1).getDBRefs();
159  1 SequenceI peptide = null;
160  1 for (DBRefEntry dbref : dbRefs)
161    {
162  2 if (dbref.getMap().getMap().getFromRatio() == 3)
163    {
164  1 peptide = dbref.getMap().getTo();
165    }
166    }
167  1 List<SequenceFeature> proteinFeatures = peptide.getSequenceFeatures();
168  1 assertEquals(proteinFeatures.size(), 1);
169  1 sf = proteinFeatures.get(0);
170  1 assertEquals(sf.getFeatureGroup(), "VCF");
171  1 assertEquals(sf.getBegin(), 1);
172  1 assertEquals(sf.getEnd(), 1);
173  1 assertEquals(sf.getType(), SequenceOntologyI.NONSYNONYMOUS_VARIANT);
174  1 assertEquals(sf.getDescription(), "p.Ser1Thr");
175    }
176   
 
177  2 toggle private File makeVcf() throws IOException
178    {
179  2 File f = File.createTempFile("Test", ".vcf");
180  2 f.deleteOnExit();
181  2 PrintWriter pw = new PrintWriter(f);
182  2 for (String vcfLine : VCF)
183    {
184  12 pw.println(vcfLine);
185    }
186  2 pw.close();
187  2 return f;
188    }
189   
190    /**
191    * Make a simple alignment with one 'gene' and one 'transcript'
192    *
193    * @return
194    */
 
195  3 toggle private AlignmentI buildAlignment()
196    {
197  3 AlignFrame af = new FileLoader().LoadFileWaitTillLoaded(FASTA,
198    DataSourceType.PASTE);
199   
200    /*
201    * map gene1 sequence to chromosome (normally done when the sequence is fetched
202    * from Ensembl and transcripts computed)
203    */
204  3 AlignmentI alignment = af.getViewport().getAlignment();
205  3 SequenceI gene1 = alignment.findName("gene1");
206  3 int[] to = new int[] { 45051610, 45051634 };
207  3 int[] from = new int[] { gene1.getStart(), gene1.getEnd() };
208  3 gene1.setGeneLoci("homo_sapiens", "GRCh38", "17", new MapList(from, to,
209    1, 1));
210   
211    /*
212    * map 'transcript1' to chromosome via 'gene1'
213    * transcript1/1-18 is gene1/3-10,15-24
214    * which is chromosome 45051612-45051619,45051624-45051633
215    */
216  3 to = new int[] { 45051612, 45051619, 45051624, 45051633 };
217  3 SequenceI transcript1 = alignment.findName("transcript1");
218  3 from = new int[] { transcript1.getStart(), transcript1.getEnd() };
219  3 transcript1.setGeneLoci("homo_sapiens", "GRCh38", "17", new MapList(
220    from, to,
221    1, 1));
222   
223    /*
224    * map gene2 to chromosome reverse strand
225    */
226  3 SequenceI gene2 = alignment.findName("gene2");
227  3 to = new int[] { 45051634, 45051610 };
228  3 from = new int[] { gene2.getStart(), gene2.getEnd() };
229  3 gene2.setGeneLoci("homo_sapiens", "GRCh38", "17", new MapList(from, to,
230    1, 1));
231   
232    /*
233    * map 'transcript2' to chromosome via 'gene2'
234    * transcript2/1-18 is gene2/2-11,16-23
235    * which is chromosome 45051633-45051624,45051619-45051612
236    */
237  3 to = new int[] { 45051633, 45051624, 45051619, 45051612 };
238  3 SequenceI transcript2 = alignment.findName("transcript2");
239  3 from = new int[] { transcript2.getStart(), transcript2.getEnd() };
240  3 transcript2.setGeneLoci("homo_sapiens", "GRCh38", "17", new MapList(
241    from, to,
242    1, 1));
243   
244    /*
245    * add a protein product as a DBRef on transcript1
246    */
247  3 SequenceI peptide1 = new Sequence("ENSP001", "SWRECD");
248  3 MapList mapList = new MapList(new int[] { 1, 18 }, new int[] { 1, 6 },
249    3, 1);
250  3 Mapping map = new Mapping(peptide1, mapList);
251  3 DBRefEntry product = new DBRefEntry("", "", "ENSP001", map);
252  3 transcript1.addDBRef(product);
253   
254    /*
255    * add a protein product as a DBRef on transcript2
256    */
257  3 SequenceI peptide2 = new Sequence("ENSP002", "VTLSPA");
258  3 mapList = new MapList(new int[] { 1, 18 }, new int[] { 1, 6 }, 3, 1);
259  3 map = new Mapping(peptide2, mapList);
260  3 product = new DBRefEntry("", "", "ENSP002", map);
261  3 transcript2.addDBRef(product);
262   
263    /*
264    * map gene3 to chromosome
265    */
266  3 SequenceI gene3 = alignment.findName("gene3");
267  3 to = new int[] { 45051610, 45051634 };
268  3 from = new int[] { gene3.getStart(), gene3.getEnd() };
269  3 gene3.setGeneLoci("homo_sapiens", "GRCh38", "5", new MapList(from, to,
270    1, 1));
271   
272    /*
273    * map 'transcript3' to chromosome
274    */
275  3 SequenceI transcript3 = alignment.findName("transcript3");
276  3 to = new int[] { 45051612, 45051619, 45051624, 45051633 };
277  3 from = new int[] { transcript3.getStart(), transcript3.getEnd() };
278  3 transcript3.setGeneLoci("homo_sapiens", "GRCh38", "5", new MapList(
279    from, to,
280    1, 1));
281   
282    /*
283    * map 'transcript4' to chromosome
284    */
285  3 SequenceI transcript4 = alignment.findName("transcript4");
286  3 to = new int[] { 45051615, 45051617, 45051619, 45051632, 45051634,
287    45051634 };
288  3 from = new int[] { transcript4.getStart(), transcript4.getEnd() };
289  3 transcript4.setGeneLoci("homo_sapiens", "GRCh38", "5", new MapList(
290    from, to,
291    1, 1));
292   
293    /*
294    * add a protein product as a DBRef on transcript3
295    */
296  3 SequenceI peptide3 = new Sequence("ENSP003", "SWRECD");
297  3 mapList = new MapList(new int[] { 1, 18 }, new int[] { 1, 6 }, 3, 1);
298  3 map = new Mapping(peptide3, mapList);
299  3 product = new DBRefEntry("", "", "ENSP003", map);
300  3 transcript3.addDBRef(product);
301   
302  3 return alignment;
303    }
304   
305    /**
306    * Test with 'gene' and 'transcript' mapped to the reverse strand of the
307    * chromosome. The VCF variant positions (in forward coordinates) should get
308    * correctly located on sequence positions.
309    *
310    * @throws IOException
311    */
 
312  1 toggle @Test(groups = "Functional")
313    public void testDoLoad_reverseStrand() throws IOException
314    {
315  1 AlignmentI al = buildAlignment();
316   
317  1 File f = makeVcf();
318   
319  1 VCFLoader loader = new VCFLoader(f.getPath());
320   
321  1 loader.doLoad(al.getSequencesArray(), null);
322   
323    /*
324    * verify variant feature(s) added to gene2
325    * gene2/1-25 maps to chromosome 45051634- reverse strand
326    */
327  1 List<SequenceFeature> geneFeatures = al.getSequenceAt(2)
328    .getSequenceFeatures();
329  1 SequenceFeatures.sortFeatures(geneFeatures, true);
330  1 assertEquals(geneFeatures.size(), 4);
331   
332    /*
333    * variant A/T at 45051611 maps to T/A at gene position 24
334    */
335  1 SequenceFeature sf = geneFeatures.get(3);
336  1 assertEquals(sf.getFeatureGroup(), "VCF");
337  1 assertEquals(sf.getBegin(), 24);
338  1 assertEquals(sf.getEnd(), 24);
339  1 assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
340  1 assertEquals(sf.getScore(), 5.0e-03, DELTA);
341  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "T,A");
342   
343    /*
344    * variant A/C at 45051611 maps to T/G at gene position 24
345    */
346  1 sf = geneFeatures.get(2);
347  1 assertEquals(sf.getFeatureGroup(), "VCF");
348  1 assertEquals(sf.getBegin(), 24);
349  1 assertEquals(sf.getEnd(), 24);
350  1 assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
351  1 assertEquals(sf.getScore(), 4.0e-03, DELTA);
352  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "T,G");
353   
354    /*
355    * variant G/C at 45051613 maps to C/G at gene position 22
356    */
357  1 sf = geneFeatures.get(1);
358  1 assertEquals(sf.getFeatureGroup(), "VCF");
359  1 assertEquals(sf.getBegin(), 22);
360  1 assertEquals(sf.getEnd(), 22);
361  1 assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
362  1 assertEquals(sf.getScore(), 2.0e-03, DELTA);
363  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "C,G");
364   
365    /*
366    * insertion G/GA at 45051613 maps to an insertion at
367    * the preceding position (21) on reverse strand gene
368    * reference: CAAGC -> GCTTG/21-25
369    * genomic variant: CAAGAC (G/GA)
370    * gene variant: GTCTTG (G/GT at 21)
371    */
372  1 sf = geneFeatures.get(0);
373  1 assertEquals(sf.getFeatureGroup(), "VCF");
374  1 assertEquals(sf.getBegin(), 21);
375  1 assertEquals(sf.getEnd(), 21);
376  1 assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
377  1 assertEquals(sf.getScore(), 3.0e-03, DELTA);
378  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,GT");
379   
380    /*
381    * verify 2 variant features added to transcript2
382    */
383  1 List<SequenceFeature> transcriptFeatures = al.getSequenceAt(3)
384    .getSequenceFeatures();
385  1 assertEquals(transcriptFeatures.size(), 2);
386   
387    /*
388    * insertion G/GT at position 21 of gene maps to position 16 of transcript
389    */
390  1 sf = transcriptFeatures.get(0);
391  1 assertEquals(sf.getFeatureGroup(), "VCF");
392  1 assertEquals(sf.getBegin(), 16);
393  1 assertEquals(sf.getEnd(), 16);
394  1 assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
395  1 assertEquals(sf.getScore(), 3.0e-03, DELTA);
396  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,GT");
397   
398    /*
399    * SNP C/G at position 22 of gene maps to position 17 of transcript
400    */
401  1 sf = transcriptFeatures.get(1);
402  1 assertEquals(sf.getFeatureGroup(), "VCF");
403  1 assertEquals(sf.getBegin(), 17);
404  1 assertEquals(sf.getEnd(), 17);
405  1 assertEquals(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT);
406  1 assertEquals(sf.getScore(), 2.0e-03, DELTA);
407  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "C,G");
408   
409    /*
410    * verify variant feature(s) computed and added to protein
411    * last codon GCT varies to GGT giving A/G in the last peptide position
412    */
413  1 DBRefEntry[] dbRefs = al.getSequenceAt(3).getDBRefs();
414  1 SequenceI peptide = null;
415  1 for (DBRefEntry dbref : dbRefs)
416    {
417  2 if (dbref.getMap().getMap().getFromRatio() == 3)
418    {
419  1 peptide = dbref.getMap().getTo();
420    }
421    }
422  1 List<SequenceFeature> proteinFeatures = peptide.getSequenceFeatures();
423  1 assertEquals(proteinFeatures.size(), 1);
424  1 sf = proteinFeatures.get(0);
425  1 assertEquals(sf.getFeatureGroup(), "VCF");
426  1 assertEquals(sf.getBegin(), 6);
427  1 assertEquals(sf.getEnd(), 6);
428  1 assertEquals(sf.getType(), SequenceOntologyI.NONSYNONYMOUS_VARIANT);
429  1 assertEquals(sf.getDescription(), "p.Ala6Gly");
430    }
431   
432    /**
433    * Tests that if VEP consequence (CSQ) data is present in the VCF data, then
434    * it is added to the variant feature, but restricted where possible to the
435    * consequences for a specific transcript
436    *
437    * @throws IOException
438    */
 
439  1 toggle @Test(groups = "Functional")
440    public void testDoLoad_vepCsq() throws IOException
441    {
442  1 AlignmentI al = buildAlignment();
443   
444  1 VCFLoader loader = new VCFLoader("test/jalview/io/vcf/testVcf.vcf");
445   
446    /*
447    * VCF data file with variants at gene3 positions
448    * 1 C/A
449    * 5 C/T
450    * 9 CGT/C (deletion)
451    * 13 C/G, C/T
452    * 17 A/AC (insertion), A/G
453    */
454  1 loader.doLoad(al.getSequencesArray(), null);
455   
456    /*
457    * verify variant feature(s) added to gene3
458    */
459  1 List<SequenceFeature> geneFeatures = al.findName("gene3")
460    .getSequenceFeatures();
461  1 SequenceFeatures.sortFeatures(geneFeatures, true);
462  1 assertEquals(geneFeatures.size(), 7);
463  1 SequenceFeature sf = geneFeatures.get(0);
464  1 assertEquals(sf.getBegin(), 1);
465  1 assertEquals(sf.getEnd(), 1);
466  1 assertEquals(sf.getScore(), 0.1f, DELTA);
467  1 assertEquals(sf.getValue("alleles"), "C,A");
468    // gene features include Consequence for all transcripts
469  1 Map map = (Map) sf.getValue("CSQ");
470  1 assertEquals(map.size(), 9);
471   
472  1 sf = geneFeatures.get(1);
473  1 assertEquals(sf.getBegin(), 5);
474  1 assertEquals(sf.getEnd(), 5);
475  1 assertEquals(sf.getScore(), 0.2f, DELTA);
476  1 assertEquals(sf.getValue("alleles"), "C,T");
477  1 map = (Map) sf.getValue("CSQ");
478  1 assertEquals(map.size(), 9);
479   
480  1 sf = geneFeatures.get(2);
481  1 assertEquals(sf.getBegin(), 9);
482  1 assertEquals(sf.getEnd(), 11); // deletion over 3 positions
483  1 assertEquals(sf.getScore(), 0.3f, DELTA);
484  1 assertEquals(sf.getValue("alleles"), "CGG,C");
485  1 map = (Map) sf.getValue("CSQ");
486  1 assertEquals(map.size(), 9);
487   
488  1 sf = geneFeatures.get(3);
489  1 assertEquals(sf.getBegin(), 13);
490  1 assertEquals(sf.getEnd(), 13);
491  1 assertEquals(sf.getScore(), 0.5f, DELTA);
492  1 assertEquals(sf.getValue("alleles"), "C,T");
493  1 map = (Map) sf.getValue("CSQ");
494  1 assertEquals(map.size(), 9);
495   
496  1 sf = geneFeatures.get(4);
497  1 assertEquals(sf.getBegin(), 13);
498  1 assertEquals(sf.getEnd(), 13);
499  1 assertEquals(sf.getScore(), 0.4f, DELTA);
500  1 assertEquals(sf.getValue("alleles"), "C,G");
501  1 map = (Map) sf.getValue("CSQ");
502  1 assertEquals(map.size(), 9);
503   
504  1 sf = geneFeatures.get(5);
505  1 assertEquals(sf.getBegin(), 17);
506  1 assertEquals(sf.getEnd(), 17);
507  1 assertEquals(sf.getScore(), 0.7f, DELTA);
508  1 assertEquals(sf.getValue("alleles"), "A,G");
509  1 map = (Map) sf.getValue("CSQ");
510  1 assertEquals(map.size(), 9);
511   
512  1 sf = geneFeatures.get(6);
513  1 assertEquals(sf.getBegin(), 17);
514  1 assertEquals(sf.getEnd(), 17); // insertion
515  1 assertEquals(sf.getScore(), 0.6f, DELTA);
516  1 assertEquals(sf.getValue("alleles"), "A,AC");
517  1 map = (Map) sf.getValue("CSQ");
518  1 assertEquals(map.size(), 9);
519   
520    /*
521    * verify variant feature(s) added to transcript3
522    * at columns 5 (1), 17 (2), positions 3, 11
523    * note the deletion at columns 9-11 is not transferred since col 11
524    * has no mapping to transcript 3
525    */
526  1 List<SequenceFeature> transcriptFeatures = al.findName("transcript3")
527    .getSequenceFeatures();
528  1 SequenceFeatures.sortFeatures(transcriptFeatures, true);
529  1 assertEquals(transcriptFeatures.size(), 3);
530  1 sf = transcriptFeatures.get(0);
531  1 assertEquals(sf.getBegin(), 3);
532  1 assertEquals(sf.getEnd(), 3);
533  1 assertEquals(sf.getScore(), 0.2f, DELTA);
534  1 assertEquals(sf.getValue("alleles"), "C,T");
535    // transcript features only have Consequence for that transcripts
536  1 map = (Map) sf.getValue("CSQ");
537  1 assertEquals(map.size(), 9);
538  1 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript3");
539   
540  1 sf = transcriptFeatures.get(1);
541  1 assertEquals(sf.getBegin(), 11);
542  1 assertEquals(sf.getEnd(), 11);
543  1 assertEquals(sf.getScore(), 0.7f, DELTA);
544  1 assertEquals(sf.getValue("alleles"), "A,G");
545  1 assertEquals(map.size(), 9);
546  1 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript3");
547   
548  1 sf = transcriptFeatures.get(2);
549  1 assertEquals(sf.getBegin(), 11);
550  1 assertEquals(sf.getEnd(), 11);
551  1 assertEquals(sf.getScore(), 0.6f, DELTA);
552  1 assertEquals(sf.getValue("alleles"), "A,AC");
553  1 assertEquals(map.size(), 9);
554  1 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript3");
555   
556    /*
557    * verify variants computed on protein product for transcript3
558    * peptide is SWRECD
559    * codon variants are AGC/AGT position 1 which is synonymous
560    * and GAG/GGG which is E/G in position 4
561    * the insertion variant is not transferred to the peptide
562    */
563  1 DBRefEntry[] dbRefs = al.findName("transcript3").getDBRefs();
564  1 SequenceI peptide = null;
565  1 for (DBRefEntry dbref : dbRefs)
566    {
567  2 if (dbref.getMap().getMap().getFromRatio() == 3)
568    {
569  1 peptide = dbref.getMap().getTo();
570    }
571    }
572  1 List<SequenceFeature> proteinFeatures = peptide.getSequenceFeatures();
573  1 SequenceFeatures.sortFeatures(proteinFeatures, true);
574  1 assertEquals(proteinFeatures.size(), 2);
575  1 sf = proteinFeatures.get(0);
576  1 assertEquals(sf.getFeatureGroup(), "VCF");
577  1 assertEquals(sf.getBegin(), 1);
578  1 assertEquals(sf.getEnd(), 1);
579  1 assertEquals(sf.getType(), SequenceOntologyI.SYNONYMOUS_VARIANT);
580  1 assertEquals(sf.getDescription(), "agC/agT");
581  1 sf = proteinFeatures.get(1);
582  1 assertEquals(sf.getFeatureGroup(), "VCF");
583  1 assertEquals(sf.getBegin(), 4);
584  1 assertEquals(sf.getEnd(), 4);
585  1 assertEquals(sf.getType(), SequenceOntologyI.NONSYNONYMOUS_VARIANT);
586  1 assertEquals(sf.getDescription(), "p.Glu4Gly");
587   
588    /*
589    * verify variant feature(s) added to transcript4
590    * at columns 13 (2) and 17 (2), positions 7 and 11
591    */
592  1 transcriptFeatures = al.findName("transcript4").getSequenceFeatures();
593  1 SequenceFeatures.sortFeatures(transcriptFeatures, true);
594  1 assertEquals(transcriptFeatures.size(), 4);
595  1 sf = transcriptFeatures.get(0);
596  1 assertEquals(sf.getBegin(), 7);
597  1 assertEquals(sf.getEnd(), 7);
598  1 assertEquals(sf.getScore(), 0.5f, DELTA);
599  1 assertEquals(sf.getValue("alleles"), "C,T");
600  1 assertEquals(map.size(), 9);
601  1 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript4");
602   
603  1 sf = transcriptFeatures.get(1);
604  1 assertEquals(sf.getBegin(), 7);
605  1 assertEquals(sf.getEnd(), 7);
606  1 assertEquals(sf.getScore(), 0.4f, DELTA);
607  1 assertEquals(sf.getValue("alleles"), "C,G");
608  1 assertEquals(map.size(), 9);
609  1 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript4");
610   
611  1 sf = transcriptFeatures.get(2);
612  1 assertEquals(sf.getBegin(), 11);
613  1 assertEquals(sf.getEnd(), 11);
614  1 assertEquals(sf.getScore(), 0.7f, DELTA);
615  1 assertEquals(sf.getValue("alleles"), "A,G");
616  1 assertEquals(map.size(), 9);
617  1 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript4");
618   
619  1 sf = transcriptFeatures.get(3);
620  1 assertEquals(sf.getBegin(), 11);
621  1 assertEquals(sf.getEnd(), 11);
622  1 assertEquals(sf.getScore(), 0.6f, DELTA);
623  1 assertEquals(sf.getValue("alleles"), "A,AC");
624  1 assertEquals(map.size(), 9);
625  1 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript4");
626    }
627   
628    /**
629    * A test that demonstrates loading a contig sequence from an indexed sequence
630    * database which is the reference for a VCF file
631    *
632    * @throws IOException
633    */
 
634  1 toggle @Test(groups = "Functional")
635    public void testLoadVCFContig() throws IOException
636    {
637  1 VCFLoader loader = new VCFLoader(
638    "test/jalview/io/vcf/testVcf2.vcf");
639   
640  1 SequenceI seq = loader.loadVCFContig("contig123");
641  1 assertEquals(seq.getLength(), 15);
642  1 assertEquals(seq.getSequenceAsString(), "AAAAACCCCCGGGGG");
643  1 List<SequenceFeature> features = seq.getSequenceFeatures();
644  1 SequenceFeatures.sortFeatures(features, true);
645  1 assertEquals(features.size(), 2);
646  1 SequenceFeature sf = features.get(0);
647  1 assertEquals(sf.getBegin(), 8);
648  1 assertEquals(sf.getEnd(), 8);
649  1 assertEquals(sf.getDescription(), "C,A");
650  1 sf = features.get(1);
651  1 assertEquals(sf.getBegin(), 12);
652  1 assertEquals(sf.getEnd(), 12);
653  1 assertEquals(sf.getDescription(), "G,T");
654   
655  1 seq = loader.loadVCFContig("contig789");
656  1 assertEquals(seq.getLength(), 25);
657  1 assertEquals(seq.getSequenceAsString(), "GGGGGTTTTTAAAAACCCCCGGGGG");
658  1 features = seq.getSequenceFeatures();
659  1 SequenceFeatures.sortFeatures(features, true);
660  1 assertEquals(features.size(), 2);
661  1 sf = features.get(0);
662  1 assertEquals(sf.getBegin(), 2);
663  1 assertEquals(sf.getEnd(), 2);
664  1 assertEquals(sf.getDescription(), "G,T");
665  1 sf = features.get(1);
666  1 assertEquals(sf.getBegin(), 21);
667  1 assertEquals(sf.getEnd(), 21);
668  1 assertEquals(sf.getDescription(), "G,A");
669   
670  1 seq = loader.loadVCFContig("contig456");
671  1 assertEquals(seq.getLength(), 20);
672  1 assertEquals(seq.getSequenceAsString(), "CCCCCGGGGGTTTTTAAAAA");
673  1 features = seq.getSequenceFeatures();
674  1 SequenceFeatures.sortFeatures(features, true);
675  1 assertEquals(features.size(), 1);
676  1 sf = features.get(0);
677  1 assertEquals(sf.getBegin(), 15);
678  1 assertEquals(sf.getEnd(), 15);
679  1 assertEquals(sf.getDescription(), "T,C");
680    }
681    }