Clover icon

Coverage Report

  1. Project Clover database Thu Nov 7 2024 17:01:39 GMT
  2. Package jalview.io.vcf

File VCFLoaderTest.java

 

Code metrics

6
379
8
1
780
501
11
0.03
47.38
8
1.38

Classes

Class Line # Actions
VCFLoaderTest 54 379 11
0.00%
 

Contributing tests

No tests hitting this source file were found.

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.io.vcf;
22   
23    import static jalview.io.gff.SequenceOntologyI.SEQUENCE_VARIANT;
24    import static org.testng.Assert.assertEquals;
25    import static org.testng.Assert.assertNull;
26    import static org.testng.Assert.assertTrue;
27   
28    import jalview.bin.Cache;
29    import jalview.bin.Console;
30    import jalview.datamodel.AlignmentI;
31    import jalview.datamodel.DBRefEntry;
32    import jalview.datamodel.Mapping;
33    import jalview.datamodel.Sequence;
34    import jalview.datamodel.SequenceFeature;
35    import jalview.datamodel.SequenceI;
36    import jalview.datamodel.features.FeatureAttributes;
37    import jalview.datamodel.features.SequenceFeatures;
38    import jalview.gui.AlignFrame;
39    import jalview.io.DataSourceType;
40    import jalview.io.FileLoader;
41    import jalview.io.gff.Gff3Helper;
42    import jalview.util.MapList;
43   
44    import java.io.File;
45    import java.io.IOException;
46    import java.io.PrintWriter;
47    import java.util.List;
48    import java.util.Map;
49   
50    import org.testng.annotations.BeforeClass;
51    import org.testng.annotations.BeforeTest;
52    import org.testng.annotations.Test;
53   
 
54    public class VCFLoaderTest
55    {
56    private static final float DELTA = 0.00001f;
57   
58    // columns 9717- of gene P30419 from Ensembl (much modified)
59    private static final String FASTA = "" +
60    /*
61    * forward strand 'gene' and 'transcript' with two exons
62    */
63    ">gene1/1-25 chromosome:GRCh38:17:45051610:45051634:1\n"
64    + "CAAGCTGGCGGACGAGAGTGTGACA\n"
65    + ">transcript1/1-18\n--AGCTGGCG----AGAGTGTGAC-\n"
66   
67    /*
68    * reverse strand gene and transcript (reverse complement alleles!)
69    */
70    + ">gene2/1-25 chromosome:GRCh38:17:45051610:45051634:-1\n"
71    + "TGTCACACTCTCGTCCGCCAGCTTG\n" + ">transcript2/1-18\n"
72    + "-GTCACACTCT----CGCCAGCT--\n"
73   
74    /*
75    * 'gene' on chromosome 5 with two transcripts
76    */
77    + ">gene3/1-25 chromosome:GRCh38:5:45051610:45051634:1\n"
78    + "CAAGCTGGCGGACGAGAGTGTGACA\n"
79    + ">transcript3/1-18\n--AGCTGGCG----AGAGTGTGAC-\n"
80    + ">transcript4/1-18\n-----TGG-GGACGAGAGTGTGA-A\n";
81   
82    private static final String[] VCF = { "##fileformat=VCFv4.2",
83    // fields other than AF are ignored when parsing as they have no INFO
84    // definition
85    "##INFO=<ID=AF,Number=A,Type=Float,Description=\"Allele Frequency, for each ALT allele, in the same order as listed\">",
86    "##INFO=<ID=AC_Female,Number=A,Type=Integer,Description=\"Allele count in Female genotypes\"",
87    "##INFO=<ID=AF_AFR,Number=A,Type=Float,Description=\"Allele Frequency among African/African American genotypes\"",
88    "##reference=Homo_sapiens/GRCh38",
89    "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO",
90    // A/T,C variants in position 2 of gene sequence (precedes transcript)
91    // should create 2 variant features with respective AF values
92    // malformed values for AC_Female and AF_AFR should be ignored
93    "17\t45051611\trs384765\tA\tT,C\t1666.64\tRF;XYZ\tAC=15;AF=5.0e-03,4.0e-03;AC_Female=12,3d;AF_AFR=low,2.3e-4",
94    // SNP G/C in position 4 of gene sequence, position 2 of transcript
95    // insertion G/GA is transferred to nucleotide but not to peptide
96    "17\t45051613\t.\tG\tGA,C\t1666.65\t.\tAC=15;AF=3.0e-03,2.0e-03",
97    // '.' in INFO field should be ignored
98    "17\t45051615\t.\tG\tC\t1666.66\tRF\tAC=16;AF=." };
99   
 
100  0 toggle @BeforeClass(alwaysRun = true)
101    public void setUp()
102    {
103    /*
104    * configure to capture all available VCF and VEP (CSQ) fields
105    */
106  0 Cache.loadProperties("test/jalview/io/testProps.jvprops");
107  0 Cache.setProperty("VCF_FIELDS", ".*");
108  0 Cache.setProperty("VEP_FIELDS", ".*");
109  0 Cache.setProperty("VCF_ASSEMBLY", "GRCh38=GRCh38");
110  0 Console.initLogger();
111    }
112   
 
113  0 toggle @BeforeTest(alwaysRun = true)
114    public void setUpBeforeTest()
115    {
116    /*
117    * clear down feature attributes metadata
118    */
119  0 FeatureAttributes.getInstance().clear();
120    }
121   
 
122  0 toggle @Test(groups = "Functional")
123    public void testDoLoad() throws IOException
124    {
125  0 AlignmentI al = buildAlignment();
126   
127  0 File f = makeVcfFile();
128  0 VCFLoader loader = new VCFLoader(f.getPath());
129   
130  0 loader.doLoad(al.getSequencesArray(), null);
131   
132    /*
133    * verify variant feature(s) added to gene
134    * NB alleles at a locus may not be processed, and features added,
135    * in the order in which they appear in the VCF record as method
136    * VariantContext.getAlternateAlleles() does not guarantee order
137    * - order of assertions here matches what we find (is not important)
138    */
139  0 List<SequenceFeature> geneFeatures = al.getSequenceAt(0)
140    .getSequenceFeatures();
141  0 SequenceFeatures.sortFeatures(geneFeatures, true);
142  0 assertEquals(geneFeatures.size(), 5);
143  0 SequenceFeature sf = geneFeatures.get(0);
144  0 assertEquals(sf.getFeatureGroup(), "VCF");
145  0 assertEquals(sf.getBegin(), 2);
146  0 assertEquals(sf.getEnd(), 2);
147  0 assertEquals(sf.getScore(), 0f);
148  0 assertEquals(sf.getValue("AF"), "4.0e-03");
149  0 assertEquals(sf.getValue("AF_AFR"), "2.3e-4");
150  0 assertEquals(sf.getValue(Gff3Helper.ALLELES), "A,C");
151  0 assertEquals(sf.getType(), SEQUENCE_VARIANT);
152  0 assertEquals(sf.getValue("POS"), "45051611");
153  0 assertEquals(sf.getValue("ID"), "rs384765");
154  0 assertEquals(sf.getValue("QUAL"), "1666.64");
155  0 assertEquals(sf.getValue("FILTER"), "RF;XYZ");
156    // malformed integer for AC_Female is ignored (JAL-3375)
157  0 assertNull(sf.getValue("AC_Female"));
158   
159  0 sf = geneFeatures.get(1);
160  0 assertEquals(sf.getFeatureGroup(), "VCF");
161  0 assertEquals(sf.getBegin(), 2);
162  0 assertEquals(sf.getEnd(), 2);
163  0 assertEquals(sf.getType(), SEQUENCE_VARIANT);
164  0 assertEquals(sf.getScore(), 0f);
165  0 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 5.0e-03,
166    DELTA);
167  0 assertEquals(sf.getValue("AC_Female"), "12");
168    // malformed float for AF_AFR is ignored (JAL-3375)
169  0 assertNull(sf.getValue("AC_AFR"));
170  0 assertEquals(sf.getValue(Gff3Helper.ALLELES), "A,T");
171   
172  0 sf = geneFeatures.get(2);
173  0 assertEquals(sf.getFeatureGroup(), "VCF");
174  0 assertEquals(sf.getBegin(), 4);
175  0 assertEquals(sf.getEnd(), 4);
176  0 assertEquals(sf.getType(), SEQUENCE_VARIANT);
177  0 assertEquals(sf.getScore(), 0f);
178  0 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 2.0e-03,
179    DELTA);
180  0 assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,C");
181   
182  0 sf = geneFeatures.get(3);
183  0 assertEquals(sf.getFeatureGroup(), "VCF");
184  0 assertEquals(sf.getBegin(), 4);
185  0 assertEquals(sf.getEnd(), 4);
186  0 assertEquals(sf.getType(), SEQUENCE_VARIANT);
187  0 assertEquals(sf.getScore(), 0f);
188  0 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 3.0e-03,
189    DELTA);
190  0 assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,GA");
191  0 assertNull(sf.getValue("ID")); // '.' is ignored
192  0 assertNull(sf.getValue("FILTER")); // '.' is ignored
193   
194  0 sf = geneFeatures.get(4);
195  0 assertEquals(sf.getFeatureGroup(), "VCF");
196  0 assertEquals(sf.getBegin(), 6);
197  0 assertEquals(sf.getEnd(), 6);
198  0 assertEquals(sf.getType(), SEQUENCE_VARIANT);
199  0 assertEquals(sf.getScore(), 0f);
200    // AF=. should not have been captured
201  0 assertNull(sf.getValue("AF"));
202  0 assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,C");
203   
204    /*
205    * verify variant feature(s) added to transcript
206    */
207  0 List<SequenceFeature> transcriptFeatures = al.getSequenceAt(1)
208    .getSequenceFeatures();
209  0 assertEquals(transcriptFeatures.size(), 3);
210  0 sf = transcriptFeatures.get(0);
211  0 assertEquals(sf.getFeatureGroup(), "VCF");
212  0 assertEquals(sf.getBegin(), 2);
213  0 assertEquals(sf.getEnd(), 2);
214  0 assertEquals(sf.getType(), SEQUENCE_VARIANT);
215  0 assertEquals(sf.getScore(), 0f);
216  0 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 2.0e-03,
217    DELTA);
218  0 assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,C");
219  0 sf = transcriptFeatures.get(1);
220  0 assertEquals(sf.getFeatureGroup(), "VCF");
221  0 assertEquals(sf.getBegin(), 2);
222  0 assertEquals(sf.getEnd(), 2);
223  0 assertEquals(sf.getType(), SEQUENCE_VARIANT);
224  0 assertEquals(sf.getScore(), 0f);
225  0 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 3.0e-03,
226    DELTA);
227  0 assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,GA");
228   
229    /*
230    * verify SNP variant feature(s) computed and added to protein
231    * first codon AGC varies to ACC giving S/T
232    */
233  0 List<DBRefEntry> dbRefs = al.getSequenceAt(1).getDBRefs();
234  0 SequenceI peptide = null;
235  0 for (DBRefEntry dbref : dbRefs)
236    {
237  0 if (dbref.getMap().getMap().getFromRatio() == 3)
238    {
239  0 peptide = dbref.getMap().getTo();
240    }
241    }
242  0 List<SequenceFeature> proteinFeatures = peptide.getSequenceFeatures();
243   
244    /*
245    * JAL-3187 don't precompute protein features, do dynamically instead
246    */
247  0 assertTrue(proteinFeatures.isEmpty());
248    }
249   
 
250  0 toggle private File makeVcfFile() throws IOException
251    {
252  0 File f = File.createTempFile("Test", ".vcf");
253  0 f.deleteOnExit();
254  0 PrintWriter pw = new PrintWriter(f);
255  0 for (String vcfLine : VCF)
256    {
257  0 pw.println(vcfLine);
258    }
259  0 pw.close();
260  0 return f;
261    }
262   
263    /**
264    * Make a simple alignment with one 'gene' and one 'transcript'
265    *
266    * @return
267    */
 
268  0 toggle private AlignmentI buildAlignment()
269    {
270  0 AlignFrame af = new FileLoader().LoadFileWaitTillLoaded(FASTA,
271    DataSourceType.PASTE);
272   
273    /*
274    * map gene1 sequence to chromosome (normally done when the sequence is fetched
275    * from Ensembl and transcripts computed)
276    */
277  0 AlignmentI alignment = af.getViewport().getAlignment();
278  0 SequenceI gene1 = alignment.findName("gene1");
279  0 int[] to = new int[] { 45051610, 45051634 };
280  0 int[] from = new int[] { gene1.getStart(), gene1.getEnd() };
281  0 gene1.setGeneLoci("homo_sapiens", "GRCh38", "17",
282    new MapList(from, to, 1, 1));
283   
284    /*
285    * map 'transcript1' to chromosome via 'gene1'
286    * transcript1/1-18 is gene1/3-10,15-24
287    * which is chromosome 45051612-45051619,45051624-45051633
288    */
289  0 to = new int[] { 45051612, 45051619, 45051624, 45051633 };
290  0 SequenceI transcript1 = alignment.findName("transcript1");
291  0 from = new int[] { transcript1.getStart(), transcript1.getEnd() };
292  0 transcript1.setGeneLoci("homo_sapiens", "GRCh38", "17",
293    new MapList(from, to, 1, 1));
294   
295    /*
296    * map gene2 to chromosome reverse strand
297    */
298  0 SequenceI gene2 = alignment.findName("gene2");
299  0 to = new int[] { 45051634, 45051610 };
300  0 from = new int[] { gene2.getStart(), gene2.getEnd() };
301  0 gene2.setGeneLoci("homo_sapiens", "GRCh38", "17",
302    new MapList(from, to, 1, 1));
303   
304    /*
305    * map 'transcript2' to chromosome via 'gene2'
306    * transcript2/1-18 is gene2/2-11,16-23
307    * which is chromosome 45051633-45051624,45051619-45051612
308    */
309  0 to = new int[] { 45051633, 45051624, 45051619, 45051612 };
310  0 SequenceI transcript2 = alignment.findName("transcript2");
311  0 from = new int[] { transcript2.getStart(), transcript2.getEnd() };
312  0 transcript2.setGeneLoci("homo_sapiens", "GRCh38", "17",
313    new MapList(from, to, 1, 1));
314   
315    /*
316    * add a protein product as a DBRef on transcript1
317    */
318  0 SequenceI peptide1 = new Sequence("ENSP001", "SWRECD");
319  0 MapList mapList = new MapList(new int[] { 1, 18 }, new int[] { 1, 6 },
320    3, 1);
321  0 Mapping map = new Mapping(peptide1, mapList);
322  0 DBRefEntry product = new DBRefEntry("", "", "ENSP001", map);
323  0 transcript1.addDBRef(product);
324   
325    /*
326    * add a protein product as a DBRef on transcript2
327    */
328  0 SequenceI peptide2 = new Sequence("ENSP002", "VTLSPA");
329  0 mapList = new MapList(new int[] { 1, 18 }, new int[] { 1, 6 }, 3, 1);
330  0 map = new Mapping(peptide2, mapList);
331  0 product = new DBRefEntry("", "", "ENSP002", map);
332  0 transcript2.addDBRef(product);
333   
334    /*
335    * map gene3 to chromosome
336    */
337  0 SequenceI gene3 = alignment.findName("gene3");
338  0 to = new int[] { 45051610, 45051634 };
339  0 from = new int[] { gene3.getStart(), gene3.getEnd() };
340  0 gene3.setGeneLoci("homo_sapiens", "GRCh38", "5",
341    new MapList(from, to, 1, 1));
342   
343    /*
344    * map 'transcript3' to chromosome
345    */
346  0 SequenceI transcript3 = alignment.findName("transcript3");
347  0 to = new int[] { 45051612, 45051619, 45051624, 45051633 };
348  0 from = new int[] { transcript3.getStart(), transcript3.getEnd() };
349  0 transcript3.setGeneLoci("homo_sapiens", "GRCh38", "5",
350    new MapList(from, to, 1, 1));
351   
352    /*
353    * map 'transcript4' to chromosome
354    */
355  0 SequenceI transcript4 = alignment.findName("transcript4");
356  0 to = new int[] { 45051615, 45051617, 45051619, 45051632, 45051634,
357    45051634 };
358  0 from = new int[] { transcript4.getStart(), transcript4.getEnd() };
359  0 transcript4.setGeneLoci("homo_sapiens", "GRCh38", "5",
360    new MapList(from, to, 1, 1));
361   
362    /*
363    * add a protein product as a DBRef on transcript3
364    */
365  0 SequenceI peptide3 = new Sequence("ENSP003", "SWRECD");
366  0 mapList = new MapList(new int[] { 1, 18 }, new int[] { 1, 6 }, 3, 1);
367  0 map = new Mapping(peptide3, mapList);
368  0 product = new DBRefEntry("", "", "ENSP003", map);
369  0 transcript3.addDBRef(product);
370   
371  0 return alignment;
372    }
373   
374    /**
375    * Test with 'gene' and 'transcript' mapped to the reverse strand of the
376    * chromosome. The VCF variant positions (in forward coordinates) should get
377    * correctly located on sequence positions.
378    *
379    * @throws IOException
380    */
 
381  0 toggle @Test(groups = "Functional")
382    public void testDoLoad_reverseStrand() throws IOException
383    {
384  0 AlignmentI al = buildAlignment();
385   
386  0 File f = makeVcfFile();
387   
388  0 VCFLoader loader = new VCFLoader(f.getPath());
389   
390  0 loader.doLoad(al.getSequencesArray(), null);
391   
392    /*
393    * verify variant feature(s) added to gene2
394    * gene2/1-25 maps to chromosome 45051634- reverse strand
395    */
396  0 List<SequenceFeature> geneFeatures = al.getSequenceAt(2)
397    .getSequenceFeatures();
398  0 SequenceFeatures.sortFeatures(geneFeatures, true);
399  0 assertEquals(geneFeatures.size(), 5);
400  0 SequenceFeature sf;
401   
402    /*
403    * insertion G/GA at 45051613 maps to an insertion at
404    * the preceding position (21) on reverse strand gene
405    * reference: CAAGC -> GCTTG/21-25
406    * genomic variant: CAAGAC (G/GA)
407    * gene variant: GTCTTG (G/GT at 21)
408    */
409  0 sf = geneFeatures.get(1);
410  0 assertEquals(sf.getFeatureGroup(), "VCF");
411  0 assertEquals(sf.getBegin(), 21);
412  0 assertEquals(sf.getEnd(), 21);
413  0 assertEquals(sf.getType(), SEQUENCE_VARIANT);
414  0 assertEquals(sf.getScore(), 0f);
415  0 assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,GT");
416  0 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 3.0e-03,
417    DELTA);
418   
419    /*
420    * variant G/C at 45051613 maps to C/G at gene position 22
421    */
422  0 sf = geneFeatures.get(2);
423  0 assertEquals(sf.getFeatureGroup(), "VCF");
424  0 assertEquals(sf.getBegin(), 22);
425  0 assertEquals(sf.getEnd(), 22);
426  0 assertEquals(sf.getType(), SEQUENCE_VARIANT);
427  0 assertEquals(sf.getScore(), 0f);
428  0 assertEquals(sf.getValue(Gff3Helper.ALLELES), "C,G");
429  0 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 2.0e-03,
430    DELTA);
431   
432    /*
433    * variant A/C at 45051611 maps to T/G at gene position 24
434    */
435  0 sf = geneFeatures.get(3);
436  0 assertEquals(sf.getFeatureGroup(), "VCF");
437  0 assertEquals(sf.getBegin(), 24);
438  0 assertEquals(sf.getEnd(), 24);
439  0 assertEquals(sf.getType(), SEQUENCE_VARIANT);
440  0 assertEquals(sf.getScore(), 0f);
441  0 assertEquals(sf.getValue(Gff3Helper.ALLELES), "T,G");
442  0 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 4.0e-03,
443    DELTA);
444   
445    /*
446    * variant A/T at 45051611 maps to T/A at gene position 24
447    */
448  0 sf = geneFeatures.get(4);
449  0 assertEquals(sf.getFeatureGroup(), "VCF");
450  0 assertEquals(sf.getBegin(), 24);
451  0 assertEquals(sf.getEnd(), 24);
452  0 assertEquals(sf.getType(), SEQUENCE_VARIANT);
453  0 assertEquals(sf.getScore(), 0f);
454  0 assertEquals(sf.getValue(Gff3Helper.ALLELES), "T,A");
455  0 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 5.0e-03,
456    DELTA);
457   
458    /*
459    * verify 3 variant features added to transcript2
460    */
461  0 List<SequenceFeature> transcriptFeatures = al.getSequenceAt(3)
462    .getSequenceFeatures();
463  0 assertEquals(transcriptFeatures.size(), 3);
464   
465    /*
466    * insertion G/GT at position 21 of gene maps to position 16 of transcript
467    */
468  0 sf = transcriptFeatures.get(1);
469  0 assertEquals(sf.getFeatureGroup(), "VCF");
470  0 assertEquals(sf.getBegin(), 16);
471  0 assertEquals(sf.getEnd(), 16);
472  0 assertEquals(sf.getType(), SEQUENCE_VARIANT);
473  0 assertEquals(sf.getScore(), 0f);
474  0 assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,GT");
475  0 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 3.0e-03,
476    DELTA);
477   
478    /*
479    * SNP C/G at position 22 of gene maps to position 17 of transcript
480    */
481  0 sf = transcriptFeatures.get(2);
482  0 assertEquals(sf.getFeatureGroup(), "VCF");
483  0 assertEquals(sf.getBegin(), 17);
484  0 assertEquals(sf.getEnd(), 17);
485  0 assertEquals(sf.getType(), SEQUENCE_VARIANT);
486  0 assertEquals(sf.getScore(), 0f);
487  0 assertEquals(sf.getValue(Gff3Helper.ALLELES), "C,G");
488  0 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 2.0e-03,
489    DELTA);
490   
491    /*
492    * verify variant feature(s) computed and added to protein
493    * last codon GCT varies to GGT giving A/G in the last peptide position
494    */
495  0 List<DBRefEntry> dbRefs = al.getSequenceAt(3).getDBRefs();
496  0 SequenceI peptide = null;
497  0 for (DBRefEntry dbref : dbRefs)
498    {
499  0 if (dbref.getMap().getMap().getFromRatio() == 3)
500    {
501  0 peptide = dbref.getMap().getTo();
502    }
503    }
504  0 List<SequenceFeature> proteinFeatures = peptide.getSequenceFeatures();
505   
506    /*
507    * JAL-3187 don't precompute protein features, do dynamically instead
508    */
509  0 assertTrue(proteinFeatures.isEmpty());
510    }
511   
512    /**
513    * Tests that if VEP consequence (CSQ) data is present in the VCF data, then
514    * it is added to the variant feature, but restricted where possible to the
515    * consequences for a specific transcript
516    *
517    * @throws IOException
518    */
 
519  0 toggle @Test(groups = "Functional")
520    public void testDoLoad_vepCsq() throws IOException
521    {
522  0 AlignmentI al = buildAlignment();
523   
524  0 VCFLoader loader = new VCFLoader("test/jalview/io/vcf/testVcf.vcf");
525   
526    /*
527    * VCF data file with variants at gene3 positions
528    * 1 C/A
529    * 5 C/T
530    * 9 CGT/C (deletion)
531    * 13 C/G, C/T
532    * 17 A/AC (insertion), A/G
533    */
534  0 loader.doLoad(al.getSequencesArray(), null);
535   
536    /*
537    * verify variant feature(s) added to gene3
538    */
539  0 List<SequenceFeature> geneFeatures = al.findName("gene3")
540    .getSequenceFeatures();
541  0 SequenceFeatures.sortFeatures(geneFeatures, true);
542  0 assertEquals(geneFeatures.size(), 7);
543  0 SequenceFeature sf = geneFeatures.get(0);
544  0 assertEquals(sf.getBegin(), 1);
545  0 assertEquals(sf.getEnd(), 1);
546  0 assertEquals(sf.getScore(), 0f);
547  0 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.1f, DELTA);
548  0 assertEquals(sf.getValue("alleles"), "C,A");
549    // gene features include Consequence for all transcripts
550  0 Map map = (Map) sf.getValue("CSQ");
551  0 assertEquals(map.size(), 9);
552  0 assertEquals(map.get("PolyPhen"), "Bad");
553   
554  0 sf = geneFeatures.get(1);
555  0 assertEquals(sf.getBegin(), 5);
556  0 assertEquals(sf.getEnd(), 5);
557  0 assertEquals(sf.getScore(), 0f);
558  0 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.2f, DELTA);
559  0 assertEquals(sf.getValue("alleles"), "C,T");
560  0 map = (Map) sf.getValue("CSQ");
561  0 assertEquals(map.size(), 9);
562  0 assertEquals(map.get("PolyPhen"), "Bad;;"); // %3B%3B decoded
563   
564  0 sf = geneFeatures.get(2);
565  0 assertEquals(sf.getBegin(), 9);
566  0 assertEquals(sf.getEnd(), 11); // deletion over 3 positions
567  0 assertEquals(sf.getScore(), 0f);
568  0 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.3f, DELTA);
569  0 assertEquals(sf.getValue("alleles"), "CGG,C");
570  0 map = (Map) sf.getValue("CSQ");
571  0 assertEquals(map.size(), 9);
572   
573  0 sf = geneFeatures.get(3);
574  0 assertEquals(sf.getBegin(), 13);
575  0 assertEquals(sf.getEnd(), 13);
576  0 assertEquals(sf.getScore(), 0f);
577  0 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.5f, DELTA);
578  0 assertEquals(sf.getValue("alleles"), "C,T");
579  0 map = (Map) sf.getValue("CSQ");
580  0 assertEquals(map.size(), 9);
581   
582  0 sf = geneFeatures.get(4);
583  0 assertEquals(sf.getBegin(), 13);
584  0 assertEquals(sf.getEnd(), 13);
585  0 assertEquals(sf.getScore(), 0f);
586  0 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.4f, DELTA);
587  0 assertEquals(sf.getValue("alleles"), "C,G");
588  0 map = (Map) sf.getValue("CSQ");
589  0 assertEquals(map.size(), 9);
590   
591  0 sf = geneFeatures.get(5);
592  0 assertEquals(sf.getBegin(), 17);
593  0 assertEquals(sf.getEnd(), 17);
594  0 assertEquals(sf.getScore(), 0f);
595  0 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.7f, DELTA);
596  0 assertEquals(sf.getValue("alleles"), "A,G");
597  0 map = (Map) sf.getValue("CSQ");
598  0 assertEquals(map.size(), 9);
599   
600  0 sf = geneFeatures.get(6);
601  0 assertEquals(sf.getBegin(), 17);
602  0 assertEquals(sf.getEnd(), 17); // insertion
603  0 assertEquals(sf.getScore(), 0f);
604  0 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.6f, DELTA);
605  0 assertEquals(sf.getValue("alleles"), "A,AC");
606  0 map = (Map) sf.getValue("CSQ");
607  0 assertEquals(map.size(), 9);
608   
609    /*
610    * verify variant feature(s) added to transcript3
611    * at columns 5 (1), 17 (2), positions 3, 11
612    * note the deletion at columns 9-11 is not transferred since col 11
613    * has no mapping to transcript 3
614    */
615  0 List<SequenceFeature> transcriptFeatures = al.findName("transcript3")
616    .getSequenceFeatures();
617  0 SequenceFeatures.sortFeatures(transcriptFeatures, true);
618  0 assertEquals(transcriptFeatures.size(), 3);
619  0 sf = transcriptFeatures.get(0);
620  0 assertEquals(sf.getBegin(), 3);
621  0 assertEquals(sf.getEnd(), 3);
622  0 assertEquals(sf.getScore(), 0f);
623  0 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.2f, DELTA);
624  0 assertEquals(sf.getValue("alleles"), "C,T");
625    // transcript features only have Consequence for that transcripts
626  0 map = (Map) sf.getValue("CSQ");
627  0 assertEquals(map.size(), 9);
628  0 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript3");
629   
630  0 sf = transcriptFeatures.get(1);
631  0 assertEquals(sf.getBegin(), 11);
632  0 assertEquals(sf.getEnd(), 11);
633  0 assertEquals(sf.getScore(), 0f);
634  0 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.7f, DELTA);
635  0 assertEquals(sf.getValue("alleles"), "A,G");
636  0 assertEquals(map.size(), 9);
637  0 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript3");
638   
639  0 sf = transcriptFeatures.get(2);
640  0 assertEquals(sf.getBegin(), 11);
641  0 assertEquals(sf.getEnd(), 11);
642  0 assertEquals(sf.getScore(), 0f);
643  0 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.6f, DELTA);
644  0 assertEquals(sf.getValue("alleles"), "A,AC");
645  0 assertEquals(map.size(), 9);
646  0 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript3");
647   
648    /*
649    * verify variants computed on protein product for transcript3
650    * peptide is SWRECD
651    * codon variants are AGC/AGT position 1 which is synonymous
652    * and GAG/GGG which is E/G in position 4
653    * the insertion variant is not transferred to the peptide
654    */
655  0 List<DBRefEntry> dbRefs = al.findName("transcript3").getDBRefs();
656  0 SequenceI peptide = null;
657  0 for (DBRefEntry dbref : dbRefs)
658    {
659  0 if (dbref.getMap().getMap().getFromRatio() == 3)
660    {
661  0 peptide = dbref.getMap().getTo();
662    }
663    }
664  0 List<SequenceFeature> proteinFeatures = peptide.getSequenceFeatures();
665    /*
666    * JAL-3187 don't precompute protein features, do dynamically instead
667    */
668  0 assertTrue(proteinFeatures.isEmpty());
669    // SequenceFeatures.sortFeatures(proteinFeatures, true);
670    // assertEquals(proteinFeatures.size(), 2);
671    // sf = proteinFeatures.get(0);
672    // assertEquals(sf.getFeatureGroup(), "VCF");
673    // assertEquals(sf.getBegin(), 1);
674    // assertEquals(sf.getEnd(), 1);
675    // assertEquals(sf.getType(), SequenceOntologyI.SYNONYMOUS_VARIANT);
676    // assertEquals(sf.getDescription(), "agC/agT");
677    // sf = proteinFeatures.get(1);
678    // assertEquals(sf.getFeatureGroup(), "VCF");
679    // assertEquals(sf.getBegin(), 4);
680    // assertEquals(sf.getEnd(), 4);
681    // assertEquals(sf.getType(), SequenceOntologyI.NONSYNONYMOUS_VARIANT);
682    // assertEquals(sf.getDescription(), "p.Glu4Gly");
683   
684    /*
685    * verify variant feature(s) added to transcript4
686    * at columns 13 (2) and 17 (2), positions 7 and 11
687    */
688  0 transcriptFeatures = al.findName("transcript4").getSequenceFeatures();
689  0 SequenceFeatures.sortFeatures(transcriptFeatures, true);
690  0 assertEquals(transcriptFeatures.size(), 4);
691  0 sf = transcriptFeatures.get(0);
692  0 assertEquals(sf.getBegin(), 7);
693  0 assertEquals(sf.getEnd(), 7);
694  0 assertEquals(sf.getScore(), 0f);
695  0 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.5f, DELTA);
696  0 assertEquals(sf.getValue("alleles"), "C,T");
697  0 assertEquals(map.size(), 9);
698  0 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript4");
699   
700  0 sf = transcriptFeatures.get(1);
701  0 assertEquals(sf.getBegin(), 7);
702  0 assertEquals(sf.getEnd(), 7);
703  0 assertEquals(sf.getScore(), 0f);
704  0 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.4f, DELTA);
705  0 assertEquals(sf.getValue("alleles"), "C,G");
706  0 assertEquals(map.size(), 9);
707  0 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript4");
708   
709  0 sf = transcriptFeatures.get(2);
710  0 assertEquals(sf.getBegin(), 11);
711  0 assertEquals(sf.getEnd(), 11);
712  0 assertEquals(sf.getScore(), 0f);
713  0 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.7f, DELTA);
714  0 assertEquals(sf.getValue("alleles"), "A,G");
715  0 assertEquals(map.size(), 9);
716  0 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript4");
717   
718  0 sf = transcriptFeatures.get(3);
719  0 assertEquals(sf.getBegin(), 11);
720  0 assertEquals(sf.getEnd(), 11);
721  0 assertEquals(sf.getScore(), 0f);
722  0 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.6f, DELTA);
723  0 assertEquals(sf.getValue("alleles"), "A,AC");
724  0 assertEquals(map.size(), 9);
725  0 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript4");
726    }
727   
728    /**
729    * A test that demonstrates loading a contig sequence from an indexed sequence
730    * database which is the reference for a VCF file
731    *
732    * @throws IOException
733    */
 
734  0 toggle @Test(groups = "Functional")
735    public void testLoadVCFContig() throws IOException
736    {
737  0 VCFLoader loader = new VCFLoader("test/jalview/io/vcf/testVcf2.vcf");
738   
739  0 SequenceI seq = loader.loadVCFContig("contig123");
740  0 assertEquals(seq.getLength(), 15);
741  0 assertEquals(seq.getSequenceAsString(), "AAAAACCCCCGGGGG");
742  0 List<SequenceFeature> features = seq.getSequenceFeatures();
743  0 SequenceFeatures.sortFeatures(features, true);
744  0 assertEquals(features.size(), 2);
745  0 SequenceFeature sf = features.get(0);
746  0 assertEquals(sf.getBegin(), 8);
747  0 assertEquals(sf.getEnd(), 8);
748  0 assertEquals(sf.getDescription(), "C,A");
749  0 sf = features.get(1);
750  0 assertEquals(sf.getBegin(), 12);
751  0 assertEquals(sf.getEnd(), 12);
752  0 assertEquals(sf.getDescription(), "G,T");
753   
754  0 seq = loader.loadVCFContig("contig789");
755  0 assertEquals(seq.getLength(), 25);
756  0 assertEquals(seq.getSequenceAsString(), "GGGGGTTTTTAAAAACCCCCGGGGG");
757  0 features = seq.getSequenceFeatures();
758  0 SequenceFeatures.sortFeatures(features, true);
759  0 assertEquals(features.size(), 2);
760  0 sf = features.get(0);
761  0 assertEquals(sf.getBegin(), 2);
762  0 assertEquals(sf.getEnd(), 2);
763  0 assertEquals(sf.getDescription(), "G,T");
764  0 sf = features.get(1);
765  0 assertEquals(sf.getBegin(), 21);
766  0 assertEquals(sf.getEnd(), 21);
767  0 assertEquals(sf.getDescription(), "G,A");
768   
769  0 seq = loader.loadVCFContig("contig456");
770  0 assertEquals(seq.getLength(), 20);
771  0 assertEquals(seq.getSequenceAsString(), "CCCCCGGGGGTTTTTAAAAA");
772  0 features = seq.getSequenceFeatures();
773  0 SequenceFeatures.sortFeatures(features, true);
774  0 assertEquals(features.size(), 1);
775  0 sf = features.get(0);
776  0 assertEquals(sf.getBegin(), 15);
777  0 assertEquals(sf.getEnd(), 15);
778  0 assertEquals(sf.getDescription(), "T,C");
779    }
780    }