Clover icon

Coverage Report

  1. Project Clover database Mon Nov 18 2024 09:56:54 GMT
  2. Package jalview.io.vcf

File VCFLoaderTest.java

 

Code metrics

6
379
8
1
780
501
11
0.03
47.38
8
1.38

Classes

Class Line # Actions
VCFLoaderTest 54 379 11
1.0100%
 

Contributing tests

This file is covered by 4 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.io.vcf;
22   
23    import static jalview.io.gff.SequenceOntologyI.SEQUENCE_VARIANT;
24    import static org.testng.Assert.assertEquals;
25    import static org.testng.Assert.assertNull;
26    import static org.testng.Assert.assertTrue;
27   
28    import jalview.bin.Cache;
29    import jalview.bin.Console;
30    import jalview.datamodel.AlignmentI;
31    import jalview.datamodel.DBRefEntry;
32    import jalview.datamodel.Mapping;
33    import jalview.datamodel.Sequence;
34    import jalview.datamodel.SequenceFeature;
35    import jalview.datamodel.SequenceI;
36    import jalview.datamodel.features.FeatureAttributes;
37    import jalview.datamodel.features.SequenceFeatures;
38    import jalview.gui.AlignFrame;
39    import jalview.io.DataSourceType;
40    import jalview.io.FileLoader;
41    import jalview.io.gff.Gff3Helper;
42    import jalview.util.MapList;
43   
44    import java.io.File;
45    import java.io.IOException;
46    import java.io.PrintWriter;
47    import java.util.List;
48    import java.util.Map;
49   
50    import org.testng.annotations.BeforeClass;
51    import org.testng.annotations.BeforeTest;
52    import org.testng.annotations.Test;
53   
 
54    public class VCFLoaderTest
55    {
56    private static final float DELTA = 0.00001f;
57   
58    // columns 9717- of gene P30419 from Ensembl (much modified)
59    private static final String FASTA = "" +
60    /*
61    * forward strand 'gene' and 'transcript' with two exons
62    */
63    ">gene1/1-25 chromosome:GRCh38:17:45051610:45051634:1\n"
64    + "CAAGCTGGCGGACGAGAGTGTGACA\n"
65    + ">transcript1/1-18\n--AGCTGGCG----AGAGTGTGAC-\n"
66   
67    /*
68    * reverse strand gene and transcript (reverse complement alleles!)
69    */
70    + ">gene2/1-25 chromosome:GRCh38:17:45051610:45051634:-1\n"
71    + "TGTCACACTCTCGTCCGCCAGCTTG\n" + ">transcript2/1-18\n"
72    + "-GTCACACTCT----CGCCAGCT--\n"
73   
74    /*
75    * 'gene' on chromosome 5 with two transcripts
76    */
77    + ">gene3/1-25 chromosome:GRCh38:5:45051610:45051634:1\n"
78    + "CAAGCTGGCGGACGAGAGTGTGACA\n"
79    + ">transcript3/1-18\n--AGCTGGCG----AGAGTGTGAC-\n"
80    + ">transcript4/1-18\n-----TGG-GGACGAGAGTGTGA-A\n";
81   
82    private static final String[] VCF = { "##fileformat=VCFv4.2",
83    // fields other than AF are ignored when parsing as they have no INFO
84    // definition
85    "##INFO=<ID=AF,Number=A,Type=Float,Description=\"Allele Frequency, for each ALT allele, in the same order as listed\">",
86    "##INFO=<ID=AC_Female,Number=A,Type=Integer,Description=\"Allele count in Female genotypes\"",
87    "##INFO=<ID=AF_AFR,Number=A,Type=Float,Description=\"Allele Frequency among African/African American genotypes\"",
88    "##reference=Homo_sapiens/GRCh38",
89    "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO",
90    // A/T,C variants in position 2 of gene sequence (precedes transcript)
91    // should create 2 variant features with respective AF values
92    // malformed values for AC_Female and AF_AFR should be ignored
93    "17\t45051611\trs384765\tA\tT,C\t1666.64\tRF;XYZ\tAC=15;AF=5.0e-03,4.0e-03;AC_Female=12,3d;AF_AFR=low,2.3e-4",
94    // SNP G/C in position 4 of gene sequence, position 2 of transcript
95    // insertion G/GA is transferred to nucleotide but not to peptide
96    "17\t45051613\t.\tG\tGA,C\t1666.65\t.\tAC=15;AF=3.0e-03,2.0e-03",
97    // '.' in INFO field should be ignored
98    "17\t45051615\t.\tG\tC\t1666.66\tRF\tAC=16;AF=." };
99   
 
100  1 toggle @BeforeClass(alwaysRun = true)
101    public void setUp()
102    {
103    /*
104    * configure to capture all available VCF and VEP (CSQ) fields
105    */
106  1 Cache.loadProperties("test/jalview/io/testProps.jvprops");
107  1 Cache.setProperty("VCF_FIELDS", ".*");
108  1 Cache.setProperty("VEP_FIELDS", ".*");
109  1 Cache.setProperty("VCF_ASSEMBLY", "GRCh38=GRCh38");
110  1 Console.initLogger();
111    }
112   
 
113  4 toggle @BeforeTest(alwaysRun = true)
114    public void setUpBeforeTest()
115    {
116    /*
117    * clear down feature attributes metadata
118    */
119  4 FeatureAttributes.getInstance().clear();
120    }
121   
 
122  1 toggle @Test(groups = "Functional")
123    public void testDoLoad() throws IOException
124    {
125  1 AlignmentI al = buildAlignment();
126   
127  1 File f = makeVcfFile();
128  1 VCFLoader loader = new VCFLoader(f.getPath());
129   
130  1 loader.doLoad(al.getSequencesArray(), null);
131   
132    /*
133    * verify variant feature(s) added to gene
134    * NB alleles at a locus may not be processed, and features added,
135    * in the order in which they appear in the VCF record as method
136    * VariantContext.getAlternateAlleles() does not guarantee order
137    * - order of assertions here matches what we find (is not important)
138    */
139  1 List<SequenceFeature> geneFeatures = al.getSequenceAt(0)
140    .getSequenceFeatures();
141  1 SequenceFeatures.sortFeatures(geneFeatures, true);
142  1 assertEquals(geneFeatures.size(), 5);
143  1 SequenceFeature sf = geneFeatures.get(0);
144  1 assertEquals(sf.getFeatureGroup(), "VCF");
145  1 assertEquals(sf.getBegin(), 2);
146  1 assertEquals(sf.getEnd(), 2);
147  1 assertEquals(sf.getScore(), 0f);
148  1 assertEquals(sf.getValue("AF"), "4.0e-03");
149  1 assertEquals(sf.getValue("AF_AFR"), "2.3e-4");
150  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "A,C");
151  1 assertEquals(sf.getType(), SEQUENCE_VARIANT);
152  1 assertEquals(sf.getValue("POS"), "45051611");
153  1 assertEquals(sf.getValue("ID"), "rs384765");
154  1 assertEquals(sf.getValue("QUAL"), "1666.64");
155  1 assertEquals(sf.getValue("FILTER"), "RF;XYZ");
156    // malformed integer for AC_Female is ignored (JAL-3375)
157  1 assertNull(sf.getValue("AC_Female"));
158   
159  1 sf = geneFeatures.get(1);
160  1 assertEquals(sf.getFeatureGroup(), "VCF");
161  1 assertEquals(sf.getBegin(), 2);
162  1 assertEquals(sf.getEnd(), 2);
163  1 assertEquals(sf.getType(), SEQUENCE_VARIANT);
164  1 assertEquals(sf.getScore(), 0f);
165  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 5.0e-03,
166    DELTA);
167  1 assertEquals(sf.getValue("AC_Female"), "12");
168    // malformed float for AF_AFR is ignored (JAL-3375)
169  1 assertNull(sf.getValue("AC_AFR"));
170  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "A,T");
171   
172  1 sf = geneFeatures.get(2);
173  1 assertEquals(sf.getFeatureGroup(), "VCF");
174  1 assertEquals(sf.getBegin(), 4);
175  1 assertEquals(sf.getEnd(), 4);
176  1 assertEquals(sf.getType(), SEQUENCE_VARIANT);
177  1 assertEquals(sf.getScore(), 0f);
178  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 2.0e-03,
179    DELTA);
180  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,C");
181   
182  1 sf = geneFeatures.get(3);
183  1 assertEquals(sf.getFeatureGroup(), "VCF");
184  1 assertEquals(sf.getBegin(), 4);
185  1 assertEquals(sf.getEnd(), 4);
186  1 assertEquals(sf.getType(), SEQUENCE_VARIANT);
187  1 assertEquals(sf.getScore(), 0f);
188  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 3.0e-03,
189    DELTA);
190  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,GA");
191  1 assertNull(sf.getValue("ID")); // '.' is ignored
192  1 assertNull(sf.getValue("FILTER")); // '.' is ignored
193   
194  1 sf = geneFeatures.get(4);
195  1 assertEquals(sf.getFeatureGroup(), "VCF");
196  1 assertEquals(sf.getBegin(), 6);
197  1 assertEquals(sf.getEnd(), 6);
198  1 assertEquals(sf.getType(), SEQUENCE_VARIANT);
199  1 assertEquals(sf.getScore(), 0f);
200    // AF=. should not have been captured
201  1 assertNull(sf.getValue("AF"));
202  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,C");
203   
204    /*
205    * verify variant feature(s) added to transcript
206    */
207  1 List<SequenceFeature> transcriptFeatures = al.getSequenceAt(1)
208    .getSequenceFeatures();
209  1 assertEquals(transcriptFeatures.size(), 3);
210  1 sf = transcriptFeatures.get(0);
211  1 assertEquals(sf.getFeatureGroup(), "VCF");
212  1 assertEquals(sf.getBegin(), 2);
213  1 assertEquals(sf.getEnd(), 2);
214  1 assertEquals(sf.getType(), SEQUENCE_VARIANT);
215  1 assertEquals(sf.getScore(), 0f);
216  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 2.0e-03,
217    DELTA);
218  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,C");
219  1 sf = transcriptFeatures.get(1);
220  1 assertEquals(sf.getFeatureGroup(), "VCF");
221  1 assertEquals(sf.getBegin(), 2);
222  1 assertEquals(sf.getEnd(), 2);
223  1 assertEquals(sf.getType(), SEQUENCE_VARIANT);
224  1 assertEquals(sf.getScore(), 0f);
225  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 3.0e-03,
226    DELTA);
227  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,GA");
228   
229    /*
230    * verify SNP variant feature(s) computed and added to protein
231    * first codon AGC varies to ACC giving S/T
232    */
233  1 List<DBRefEntry> dbRefs = al.getSequenceAt(1).getDBRefs();
234  1 SequenceI peptide = null;
235  1 for (DBRefEntry dbref : dbRefs)
236    {
237  2 if (dbref.getMap().getMap().getFromRatio() == 3)
238    {
239  1 peptide = dbref.getMap().getTo();
240    }
241    }
242  1 List<SequenceFeature> proteinFeatures = peptide.getSequenceFeatures();
243   
244    /*
245    * JAL-3187 don't precompute protein features, do dynamically instead
246    */
247  1 assertTrue(proteinFeatures.isEmpty());
248    }
249   
 
250  2 toggle private File makeVcfFile() throws IOException
251    {
252  2 File f = File.createTempFile("Test", ".vcf");
253  2 f.deleteOnExit();
254  2 PrintWriter pw = new PrintWriter(f);
255  2 for (String vcfLine : VCF)
256    {
257  18 pw.println(vcfLine);
258    }
259  2 pw.close();
260  2 return f;
261    }
262   
263    /**
264    * Make a simple alignment with one 'gene' and one 'transcript'
265    *
266    * @return
267    */
 
268  3 toggle private AlignmentI buildAlignment()
269    {
270  3 AlignFrame af = new FileLoader().LoadFileWaitTillLoaded(FASTA,
271    DataSourceType.PASTE);
272   
273    /*
274    * map gene1 sequence to chromosome (normally done when the sequence is fetched
275    * from Ensembl and transcripts computed)
276    */
277  3 AlignmentI alignment = af.getViewport().getAlignment();
278  3 SequenceI gene1 = alignment.findName("gene1");
279  3 int[] to = new int[] { 45051610, 45051634 };
280  3 int[] from = new int[] { gene1.getStart(), gene1.getEnd() };
281  3 gene1.setGeneLoci("homo_sapiens", "GRCh38", "17",
282    new MapList(from, to, 1, 1));
283   
284    /*
285    * map 'transcript1' to chromosome via 'gene1'
286    * transcript1/1-18 is gene1/3-10,15-24
287    * which is chromosome 45051612-45051619,45051624-45051633
288    */
289  3 to = new int[] { 45051612, 45051619, 45051624, 45051633 };
290  3 SequenceI transcript1 = alignment.findName("transcript1");
291  3 from = new int[] { transcript1.getStart(), transcript1.getEnd() };
292  3 transcript1.setGeneLoci("homo_sapiens", "GRCh38", "17",
293    new MapList(from, to, 1, 1));
294   
295    /*
296    * map gene2 to chromosome reverse strand
297    */
298  3 SequenceI gene2 = alignment.findName("gene2");
299  3 to = new int[] { 45051634, 45051610 };
300  3 from = new int[] { gene2.getStart(), gene2.getEnd() };
301  3 gene2.setGeneLoci("homo_sapiens", "GRCh38", "17",
302    new MapList(from, to, 1, 1));
303   
304    /*
305    * map 'transcript2' to chromosome via 'gene2'
306    * transcript2/1-18 is gene2/2-11,16-23
307    * which is chromosome 45051633-45051624,45051619-45051612
308    */
309  3 to = new int[] { 45051633, 45051624, 45051619, 45051612 };
310  3 SequenceI transcript2 = alignment.findName("transcript2");
311  3 from = new int[] { transcript2.getStart(), transcript2.getEnd() };
312  3 transcript2.setGeneLoci("homo_sapiens", "GRCh38", "17",
313    new MapList(from, to, 1, 1));
314   
315    /*
316    * add a protein product as a DBRef on transcript1
317    */
318  3 SequenceI peptide1 = new Sequence("ENSP001", "SWRECD");
319  3 MapList mapList = new MapList(new int[] { 1, 18 }, new int[] { 1, 6 },
320    3, 1);
321  3 Mapping map = new Mapping(peptide1, mapList);
322  3 DBRefEntry product = new DBRefEntry("", "", "ENSP001", map);
323  3 transcript1.addDBRef(product);
324   
325    /*
326    * add a protein product as a DBRef on transcript2
327    */
328  3 SequenceI peptide2 = new Sequence("ENSP002", "VTLSPA");
329  3 mapList = new MapList(new int[] { 1, 18 }, new int[] { 1, 6 }, 3, 1);
330  3 map = new Mapping(peptide2, mapList);
331  3 product = new DBRefEntry("", "", "ENSP002", map);
332  3 transcript2.addDBRef(product);
333   
334    /*
335    * map gene3 to chromosome
336    */
337  3 SequenceI gene3 = alignment.findName("gene3");
338  3 to = new int[] { 45051610, 45051634 };
339  3 from = new int[] { gene3.getStart(), gene3.getEnd() };
340  3 gene3.setGeneLoci("homo_sapiens", "GRCh38", "5",
341    new MapList(from, to, 1, 1));
342   
343    /*
344    * map 'transcript3' to chromosome
345    */
346  3 SequenceI transcript3 = alignment.findName("transcript3");
347  3 to = new int[] { 45051612, 45051619, 45051624, 45051633 };
348  3 from = new int[] { transcript3.getStart(), transcript3.getEnd() };
349  3 transcript3.setGeneLoci("homo_sapiens", "GRCh38", "5",
350    new MapList(from, to, 1, 1));
351   
352    /*
353    * map 'transcript4' to chromosome
354    */
355  3 SequenceI transcript4 = alignment.findName("transcript4");
356  3 to = new int[] { 45051615, 45051617, 45051619, 45051632, 45051634,
357    45051634 };
358  3 from = new int[] { transcript4.getStart(), transcript4.getEnd() };
359  3 transcript4.setGeneLoci("homo_sapiens", "GRCh38", "5",
360    new MapList(from, to, 1, 1));
361   
362    /*
363    * add a protein product as a DBRef on transcript3
364    */
365  3 SequenceI peptide3 = new Sequence("ENSP003", "SWRECD");
366  3 mapList = new MapList(new int[] { 1, 18 }, new int[] { 1, 6 }, 3, 1);
367  3 map = new Mapping(peptide3, mapList);
368  3 product = new DBRefEntry("", "", "ENSP003", map);
369  3 transcript3.addDBRef(product);
370   
371  3 return alignment;
372    }
373   
374    /**
375    * Test with 'gene' and 'transcript' mapped to the reverse strand of the
376    * chromosome. The VCF variant positions (in forward coordinates) should get
377    * correctly located on sequence positions.
378    *
379    * @throws IOException
380    */
 
381  1 toggle @Test(groups = "Functional")
382    public void testDoLoad_reverseStrand() throws IOException
383    {
384  1 AlignmentI al = buildAlignment();
385   
386  1 File f = makeVcfFile();
387   
388  1 VCFLoader loader = new VCFLoader(f.getPath());
389   
390  1 loader.doLoad(al.getSequencesArray(), null);
391   
392    /*
393    * verify variant feature(s) added to gene2
394    * gene2/1-25 maps to chromosome 45051634- reverse strand
395    */
396  1 List<SequenceFeature> geneFeatures = al.getSequenceAt(2)
397    .getSequenceFeatures();
398  1 SequenceFeatures.sortFeatures(geneFeatures, true);
399  1 assertEquals(geneFeatures.size(), 5);
400  1 SequenceFeature sf;
401   
402    /*
403    * insertion G/GA at 45051613 maps to an insertion at
404    * the preceding position (21) on reverse strand gene
405    * reference: CAAGC -> GCTTG/21-25
406    * genomic variant: CAAGAC (G/GA)
407    * gene variant: GTCTTG (G/GT at 21)
408    */
409  1 sf = geneFeatures.get(1);
410  1 assertEquals(sf.getFeatureGroup(), "VCF");
411  1 assertEquals(sf.getBegin(), 21);
412  1 assertEquals(sf.getEnd(), 21);
413  1 assertEquals(sf.getType(), SEQUENCE_VARIANT);
414  1 assertEquals(sf.getScore(), 0f);
415  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,GT");
416  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 3.0e-03,
417    DELTA);
418   
419    /*
420    * variant G/C at 45051613 maps to C/G at gene position 22
421    */
422  1 sf = geneFeatures.get(2);
423  1 assertEquals(sf.getFeatureGroup(), "VCF");
424  1 assertEquals(sf.getBegin(), 22);
425  1 assertEquals(sf.getEnd(), 22);
426  1 assertEquals(sf.getType(), SEQUENCE_VARIANT);
427  1 assertEquals(sf.getScore(), 0f);
428  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "C,G");
429  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 2.0e-03,
430    DELTA);
431   
432    /*
433    * variant A/C at 45051611 maps to T/G at gene position 24
434    */
435  1 sf = geneFeatures.get(3);
436  1 assertEquals(sf.getFeatureGroup(), "VCF");
437  1 assertEquals(sf.getBegin(), 24);
438  1 assertEquals(sf.getEnd(), 24);
439  1 assertEquals(sf.getType(), SEQUENCE_VARIANT);
440  1 assertEquals(sf.getScore(), 0f);
441  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "T,G");
442  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 4.0e-03,
443    DELTA);
444   
445    /*
446    * variant A/T at 45051611 maps to T/A at gene position 24
447    */
448  1 sf = geneFeatures.get(4);
449  1 assertEquals(sf.getFeatureGroup(), "VCF");
450  1 assertEquals(sf.getBegin(), 24);
451  1 assertEquals(sf.getEnd(), 24);
452  1 assertEquals(sf.getType(), SEQUENCE_VARIANT);
453  1 assertEquals(sf.getScore(), 0f);
454  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "T,A");
455  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 5.0e-03,
456    DELTA);
457   
458    /*
459    * verify 3 variant features added to transcript2
460    */
461  1 List<SequenceFeature> transcriptFeatures = al.getSequenceAt(3)
462    .getSequenceFeatures();
463  1 assertEquals(transcriptFeatures.size(), 3);
464   
465    /*
466    * insertion G/GT at position 21 of gene maps to position 16 of transcript
467    */
468  1 sf = transcriptFeatures.get(1);
469  1 assertEquals(sf.getFeatureGroup(), "VCF");
470  1 assertEquals(sf.getBegin(), 16);
471  1 assertEquals(sf.getEnd(), 16);
472  1 assertEquals(sf.getType(), SEQUENCE_VARIANT);
473  1 assertEquals(sf.getScore(), 0f);
474  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "G,GT");
475  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 3.0e-03,
476    DELTA);
477   
478    /*
479    * SNP C/G at position 22 of gene maps to position 17 of transcript
480    */
481  1 sf = transcriptFeatures.get(2);
482  1 assertEquals(sf.getFeatureGroup(), "VCF");
483  1 assertEquals(sf.getBegin(), 17);
484  1 assertEquals(sf.getEnd(), 17);
485  1 assertEquals(sf.getType(), SEQUENCE_VARIANT);
486  1 assertEquals(sf.getScore(), 0f);
487  1 assertEquals(sf.getValue(Gff3Helper.ALLELES), "C,G");
488  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 2.0e-03,
489    DELTA);
490   
491    /*
492    * verify variant feature(s) computed and added to protein
493    * last codon GCT varies to GGT giving A/G in the last peptide position
494    */
495  1 List<DBRefEntry> dbRefs = al.getSequenceAt(3).getDBRefs();
496  1 SequenceI peptide = null;
497  1 for (DBRefEntry dbref : dbRefs)
498    {
499  2 if (dbref.getMap().getMap().getFromRatio() == 3)
500    {
501  1 peptide = dbref.getMap().getTo();
502    }
503    }
504  1 List<SequenceFeature> proteinFeatures = peptide.getSequenceFeatures();
505   
506    /*
507    * JAL-3187 don't precompute protein features, do dynamically instead
508    */
509  1 assertTrue(proteinFeatures.isEmpty());
510    }
511   
512    /**
513    * Tests that if VEP consequence (CSQ) data is present in the VCF data, then
514    * it is added to the variant feature, but restricted where possible to the
515    * consequences for a specific transcript
516    *
517    * @throws IOException
518    */
 
519  1 toggle @Test(groups = "Functional")
520    public void testDoLoad_vepCsq() throws IOException
521    {
522  1 AlignmentI al = buildAlignment();
523   
524  1 VCFLoader loader = new VCFLoader("test/jalview/io/vcf/testVcf.vcf");
525   
526    /*
527    * VCF data file with variants at gene3 positions
528    * 1 C/A
529    * 5 C/T
530    * 9 CGT/C (deletion)
531    * 13 C/G, C/T
532    * 17 A/AC (insertion), A/G
533    */
534  1 loader.doLoad(al.getSequencesArray(), null);
535   
536    /*
537    * verify variant feature(s) added to gene3
538    */
539  1 List<SequenceFeature> geneFeatures = al.findName("gene3")
540    .getSequenceFeatures();
541  1 SequenceFeatures.sortFeatures(geneFeatures, true);
542  1 assertEquals(geneFeatures.size(), 7);
543  1 SequenceFeature sf = geneFeatures.get(0);
544  1 assertEquals(sf.getBegin(), 1);
545  1 assertEquals(sf.getEnd(), 1);
546  1 assertEquals(sf.getScore(), 0f);
547  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.1f, DELTA);
548  1 assertEquals(sf.getValue("alleles"), "C,A");
549    // gene features include Consequence for all transcripts
550  1 Map map = (Map) sf.getValue("CSQ");
551  1 assertEquals(map.size(), 9);
552  1 assertEquals(map.get("PolyPhen"), "Bad");
553   
554  1 sf = geneFeatures.get(1);
555  1 assertEquals(sf.getBegin(), 5);
556  1 assertEquals(sf.getEnd(), 5);
557  1 assertEquals(sf.getScore(), 0f);
558  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.2f, DELTA);
559  1 assertEquals(sf.getValue("alleles"), "C,T");
560  1 map = (Map) sf.getValue("CSQ");
561  1 assertEquals(map.size(), 9);
562  1 assertEquals(map.get("PolyPhen"), "Bad;;"); // %3B%3B decoded
563   
564  1 sf = geneFeatures.get(2);
565  1 assertEquals(sf.getBegin(), 9);
566  1 assertEquals(sf.getEnd(), 11); // deletion over 3 positions
567  1 assertEquals(sf.getScore(), 0f);
568  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.3f, DELTA);
569  1 assertEquals(sf.getValue("alleles"), "CGG,C");
570  1 map = (Map) sf.getValue("CSQ");
571  1 assertEquals(map.size(), 9);
572   
573  1 sf = geneFeatures.get(3);
574  1 assertEquals(sf.getBegin(), 13);
575  1 assertEquals(sf.getEnd(), 13);
576  1 assertEquals(sf.getScore(), 0f);
577  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.5f, DELTA);
578  1 assertEquals(sf.getValue("alleles"), "C,T");
579  1 map = (Map) sf.getValue("CSQ");
580  1 assertEquals(map.size(), 9);
581   
582  1 sf = geneFeatures.get(4);
583  1 assertEquals(sf.getBegin(), 13);
584  1 assertEquals(sf.getEnd(), 13);
585  1 assertEquals(sf.getScore(), 0f);
586  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.4f, DELTA);
587  1 assertEquals(sf.getValue("alleles"), "C,G");
588  1 map = (Map) sf.getValue("CSQ");
589  1 assertEquals(map.size(), 9);
590   
591  1 sf = geneFeatures.get(5);
592  1 assertEquals(sf.getBegin(), 17);
593  1 assertEquals(sf.getEnd(), 17);
594  1 assertEquals(sf.getScore(), 0f);
595  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.7f, DELTA);
596  1 assertEquals(sf.getValue("alleles"), "A,G");
597  1 map = (Map) sf.getValue("CSQ");
598  1 assertEquals(map.size(), 9);
599   
600  1 sf = geneFeatures.get(6);
601  1 assertEquals(sf.getBegin(), 17);
602  1 assertEquals(sf.getEnd(), 17); // insertion
603  1 assertEquals(sf.getScore(), 0f);
604  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.6f, DELTA);
605  1 assertEquals(sf.getValue("alleles"), "A,AC");
606  1 map = (Map) sf.getValue("CSQ");
607  1 assertEquals(map.size(), 9);
608   
609    /*
610    * verify variant feature(s) added to transcript3
611    * at columns 5 (1), 17 (2), positions 3, 11
612    * note the deletion at columns 9-11 is not transferred since col 11
613    * has no mapping to transcript 3
614    */
615  1 List<SequenceFeature> transcriptFeatures = al.findName("transcript3")
616    .getSequenceFeatures();
617  1 SequenceFeatures.sortFeatures(transcriptFeatures, true);
618  1 assertEquals(transcriptFeatures.size(), 3);
619  1 sf = transcriptFeatures.get(0);
620  1 assertEquals(sf.getBegin(), 3);
621  1 assertEquals(sf.getEnd(), 3);
622  1 assertEquals(sf.getScore(), 0f);
623  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.2f, DELTA);
624  1 assertEquals(sf.getValue("alleles"), "C,T");
625    // transcript features only have Consequence for that transcripts
626  1 map = (Map) sf.getValue("CSQ");
627  1 assertEquals(map.size(), 9);
628  1 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript3");
629   
630  1 sf = transcriptFeatures.get(1);
631  1 assertEquals(sf.getBegin(), 11);
632  1 assertEquals(sf.getEnd(), 11);
633  1 assertEquals(sf.getScore(), 0f);
634  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.7f, DELTA);
635  1 assertEquals(sf.getValue("alleles"), "A,G");
636  1 assertEquals(map.size(), 9);
637  1 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript3");
638   
639  1 sf = transcriptFeatures.get(2);
640  1 assertEquals(sf.getBegin(), 11);
641  1 assertEquals(sf.getEnd(), 11);
642  1 assertEquals(sf.getScore(), 0f);
643  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.6f, DELTA);
644  1 assertEquals(sf.getValue("alleles"), "A,AC");
645  1 assertEquals(map.size(), 9);
646  1 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript3");
647   
648    /*
649    * verify variants computed on protein product for transcript3
650    * peptide is SWRECD
651    * codon variants are AGC/AGT position 1 which is synonymous
652    * and GAG/GGG which is E/G in position 4
653    * the insertion variant is not transferred to the peptide
654    */
655  1 List<DBRefEntry> dbRefs = al.findName("transcript3").getDBRefs();
656  1 SequenceI peptide = null;
657  1 for (DBRefEntry dbref : dbRefs)
658    {
659  2 if (dbref.getMap().getMap().getFromRatio() == 3)
660    {
661  1 peptide = dbref.getMap().getTo();
662    }
663    }
664  1 List<SequenceFeature> proteinFeatures = peptide.getSequenceFeatures();
665    /*
666    * JAL-3187 don't precompute protein features, do dynamically instead
667    */
668  1 assertTrue(proteinFeatures.isEmpty());
669    // SequenceFeatures.sortFeatures(proteinFeatures, true);
670    // assertEquals(proteinFeatures.size(), 2);
671    // sf = proteinFeatures.get(0);
672    // assertEquals(sf.getFeatureGroup(), "VCF");
673    // assertEquals(sf.getBegin(), 1);
674    // assertEquals(sf.getEnd(), 1);
675    // assertEquals(sf.getType(), SequenceOntologyI.SYNONYMOUS_VARIANT);
676    // assertEquals(sf.getDescription(), "agC/agT");
677    // sf = proteinFeatures.get(1);
678    // assertEquals(sf.getFeatureGroup(), "VCF");
679    // assertEquals(sf.getBegin(), 4);
680    // assertEquals(sf.getEnd(), 4);
681    // assertEquals(sf.getType(), SequenceOntologyI.NONSYNONYMOUS_VARIANT);
682    // assertEquals(sf.getDescription(), "p.Glu4Gly");
683   
684    /*
685    * verify variant feature(s) added to transcript4
686    * at columns 13 (2) and 17 (2), positions 7 and 11
687    */
688  1 transcriptFeatures = al.findName("transcript4").getSequenceFeatures();
689  1 SequenceFeatures.sortFeatures(transcriptFeatures, true);
690  1 assertEquals(transcriptFeatures.size(), 4);
691  1 sf = transcriptFeatures.get(0);
692  1 assertEquals(sf.getBegin(), 7);
693  1 assertEquals(sf.getEnd(), 7);
694  1 assertEquals(sf.getScore(), 0f);
695  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.5f, DELTA);
696  1 assertEquals(sf.getValue("alleles"), "C,T");
697  1 assertEquals(map.size(), 9);
698  1 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript4");
699   
700  1 sf = transcriptFeatures.get(1);
701  1 assertEquals(sf.getBegin(), 7);
702  1 assertEquals(sf.getEnd(), 7);
703  1 assertEquals(sf.getScore(), 0f);
704  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.4f, DELTA);
705  1 assertEquals(sf.getValue("alleles"), "C,G");
706  1 assertEquals(map.size(), 9);
707  1 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript4");
708   
709  1 sf = transcriptFeatures.get(2);
710  1 assertEquals(sf.getBegin(), 11);
711  1 assertEquals(sf.getEnd(), 11);
712  1 assertEquals(sf.getScore(), 0f);
713  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.7f, DELTA);
714  1 assertEquals(sf.getValue("alleles"), "A,G");
715  1 assertEquals(map.size(), 9);
716  1 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript4");
717   
718  1 sf = transcriptFeatures.get(3);
719  1 assertEquals(sf.getBegin(), 11);
720  1 assertEquals(sf.getEnd(), 11);
721  1 assertEquals(sf.getScore(), 0f);
722  1 assertEquals(Float.parseFloat((String) sf.getValue("AF")), 0.6f, DELTA);
723  1 assertEquals(sf.getValue("alleles"), "A,AC");
724  1 assertEquals(map.size(), 9);
725  1 assertEquals(sf.getValueAsString("CSQ", "Feature"), "transcript4");
726    }
727   
728    /**
729    * A test that demonstrates loading a contig sequence from an indexed sequence
730    * database which is the reference for a VCF file
731    *
732    * @throws IOException
733    */
 
734  1 toggle @Test(groups = "Functional")
735    public void testLoadVCFContig() throws IOException
736    {
737  1 VCFLoader loader = new VCFLoader("test/jalview/io/vcf/testVcf2.vcf");
738   
739  1 SequenceI seq = loader.loadVCFContig("contig123");
740  1 assertEquals(seq.getLength(), 15);
741  1 assertEquals(seq.getSequenceAsString(), "AAAAACCCCCGGGGG");
742  1 List<SequenceFeature> features = seq.getSequenceFeatures();
743  1 SequenceFeatures.sortFeatures(features, true);
744  1 assertEquals(features.size(), 2);
745  1 SequenceFeature sf = features.get(0);
746  1 assertEquals(sf.getBegin(), 8);
747  1 assertEquals(sf.getEnd(), 8);
748  1 assertEquals(sf.getDescription(), "C,A");
749  1 sf = features.get(1);
750  1 assertEquals(sf.getBegin(), 12);
751  1 assertEquals(sf.getEnd(), 12);
752  1 assertEquals(sf.getDescription(), "G,T");
753   
754  1 seq = loader.loadVCFContig("contig789");
755  1 assertEquals(seq.getLength(), 25);
756  1 assertEquals(seq.getSequenceAsString(), "GGGGGTTTTTAAAAACCCCCGGGGG");
757  1 features = seq.getSequenceFeatures();
758  1 SequenceFeatures.sortFeatures(features, true);
759  1 assertEquals(features.size(), 2);
760  1 sf = features.get(0);
761  1 assertEquals(sf.getBegin(), 2);
762  1 assertEquals(sf.getEnd(), 2);
763  1 assertEquals(sf.getDescription(), "G,T");
764  1 sf = features.get(1);
765  1 assertEquals(sf.getBegin(), 21);
766  1 assertEquals(sf.getEnd(), 21);
767  1 assertEquals(sf.getDescription(), "G,A");
768   
769  1 seq = loader.loadVCFContig("contig456");
770  1 assertEquals(seq.getLength(), 20);
771  1 assertEquals(seq.getSequenceAsString(), "CCCCCGGGGGTTTTTAAAAA");
772  1 features = seq.getSequenceFeatures();
773  1 SequenceFeatures.sortFeatures(features, true);
774  1 assertEquals(features.size(), 1);
775  1 sf = features.get(0);
776  1 assertEquals(sf.getBegin(), 15);
777  1 assertEquals(sf.getEnd(), 15);
778  1 assertEquals(sf.getDescription(), "T,C");
779    }
780    }