Class |
Line # |
Actions |
|||
---|---|---|---|---|---|
VCFReaderTest | 37 | 77 | 5 |
1 | /* | |
2 | * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) | |
3 | * Copyright (C) $$Year-Rel$$ The Jalview Authors | |
4 | * | |
5 | * This file is part of Jalview. | |
6 | * | |
7 | * Jalview is free software: you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation, either version 3 | |
10 | * of the License, or (at your option) any later version. | |
11 | * | |
12 | * Jalview is distributed in the hope that it will be useful, but | |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty | |
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR | |
15 | * PURPOSE. See the GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with Jalview. If not, see <http://www.gnu.org/licenses/>. | |
19 | * The Jalview Authors are detailed in the 'AUTHORS' file. | |
20 | */ | |
21 | package jalview.ext.htsjdk; | |
22 | ||
23 | import static org.testng.Assert.assertEquals; | |
24 | import static org.testng.Assert.assertFalse; | |
25 | import static org.testng.Assert.assertTrue; | |
26 | import htsjdk.samtools.util.CloseableIterator; | |
27 | import htsjdk.variant.variantcontext.Allele; | |
28 | import htsjdk.variant.variantcontext.VariantContext; | |
29 | ||
30 | import java.io.File; | |
31 | import java.io.IOException; | |
32 | import java.io.PrintWriter; | |
33 | import java.util.List; | |
34 | ||
35 | import org.testng.annotations.Test; | |
36 | ||
37 | public class VCFReaderTest | |
38 | { | |
39 | private static final String[] VCF = new String[] { "##fileformat=VCFv4.2", | |
40 | "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO", | |
41 | "20\t3\t.\tC\tG\t.\tPASS\tDP=100", // SNP C/G | |
42 | "20\t7\t.\tG\tGA\t.\tPASS\tDP=100", // insertion G/GA | |
43 | "18\t2\t.\tACG\tA\t.\tPASS\tDP=100" }; // deletion ACG/A | |
44 | ||
45 | // gnomAD exome variant dataset | |
46 | private static final String VCF_PATH = "/Volumes/gjb/smacgowan/NOBACK/resources/gnomad/gnomad.exomes.r2.0.1.sites.vcf.gz"; | |
47 | ||
48 | // "https://storage.cloud.google.com/gnomad-public/release/2.0.1/vcf/exomes/gnomad.exomes.r2.0.1.sites.vcf.gz"; | |
49 | ||
50 | /** | |
51 | * A test to exercise some basic functionality of the htsjdk VCF reader, | |
52 | * reading from a non-index VCF file | |
53 | * | |
54 | * @throws IOException | |
55 | */ | |
56 | 1 | @Test(groups = "Functional") |
57 | public void testReadVcf_plain() throws IOException | |
58 | { | |
59 | 1 | File f = writeVcfFile(); |
60 | 1 | VCFReader reader = new VCFReader(f.getAbsolutePath()); |
61 | 1 | CloseableIterator<VariantContext> variants = reader.iterator(); |
62 | ||
63 | /* | |
64 | * SNP C/G variant | |
65 | */ | |
66 | 1 | VariantContext vc = variants.next(); |
67 | 1 | assertTrue(vc.isSNP()); |
68 | 1 | Allele ref = vc.getReference(); |
69 | 1 | assertEquals(ref.getBaseString(), "C"); |
70 | 1 | List<Allele> alleles = vc.getAlleles(); |
71 | 1 | assertEquals(alleles.size(), 2); |
72 | 1 | assertTrue(alleles.get(0).isReference()); |
73 | 1 | assertEquals(alleles.get(0).getBaseString(), "C"); |
74 | 1 | assertFalse(alleles.get(1).isReference()); |
75 | 1 | assertEquals(alleles.get(1).getBaseString(), "G"); |
76 | ||
77 | /* | |
78 | * Insertion G -> GA | |
79 | */ | |
80 | 1 | vc = variants.next(); |
81 | 1 | assertFalse(vc.isSNP()); |
82 | 1 | assertTrue(vc.isSimpleInsertion()); |
83 | 1 | ref = vc.getReference(); |
84 | 1 | assertEquals(ref.getBaseString(), "G"); |
85 | 1 | alleles = vc.getAlleles(); |
86 | 1 | assertEquals(alleles.size(), 2); |
87 | 1 | assertTrue(alleles.get(0).isReference()); |
88 | 1 | assertEquals(alleles.get(0).getBaseString(), "G"); |
89 | 1 | assertFalse(alleles.get(1).isReference()); |
90 | 1 | assertEquals(alleles.get(1).getBaseString(), "GA"); |
91 | ||
92 | /* | |
93 | * Deletion ACG -> A | |
94 | */ | |
95 | 1 | vc = variants.next(); |
96 | 1 | assertFalse(vc.isSNP()); |
97 | 1 | assertTrue(vc.isSimpleDeletion()); |
98 | 1 | ref = vc.getReference(); |
99 | 1 | assertEquals(ref.getBaseString(), "ACG"); |
100 | 1 | alleles = vc.getAlleles(); |
101 | 1 | assertEquals(alleles.size(), 2); |
102 | 1 | assertTrue(alleles.get(0).isReference()); |
103 | 1 | assertEquals(alleles.get(0).getBaseString(), "ACG"); |
104 | 1 | assertFalse(alleles.get(1).isReference()); |
105 | 1 | assertEquals(alleles.get(1).getBaseString(), "A"); |
106 | ||
107 | 1 | assertFalse(variants.hasNext()); |
108 | ||
109 | 1 | variants.close(); |
110 | 1 | reader.close(); |
111 | } | |
112 | ||
113 | /** | |
114 | * Creates a temporary file to be read by the htsjdk VCF reader | |
115 | * | |
116 | * @return | |
117 | * @throws IOException | |
118 | */ | |
119 | 2 | protected File writeVcfFile() throws IOException |
120 | { | |
121 | 2 | File f = File.createTempFile("Test", "vcf"); |
122 | 2 | f.deleteOnExit(); |
123 | 2 | PrintWriter pw = new PrintWriter(f); |
124 | 2 | for (String vcfLine : VCF) |
125 | { | |
126 | 10 | pw.println(vcfLine); |
127 | } | |
128 | 2 | pw.close(); |
129 | 2 | return f; |
130 | } | |
131 | ||
132 | /** | |
133 | * A 'test' that demonstrates querying an indexed VCF file for features in a | |
134 | * specified interval | |
135 | * | |
136 | * @throws IOException | |
137 | */ | |
138 | 0 | @Test |
139 | public void testQuery_indexed() throws IOException | |
140 | { | |
141 | /* | |
142 | * if not specified, assumes index file is filename.tbi | |
143 | */ | |
144 | 0 | VCFReader reader = new VCFReader(VCF_PATH); |
145 | ||
146 | /* | |
147 | * gene NMT1 (human) is on chromosome 17 | |
148 | * GCHR38 (Ensembl): 45051610-45109016 | |
149 | * GCHR37 (gnoMAD): 43128978-43186384 | |
150 | * CDS begins at offset 9720, first CDS variant at offset 9724 | |
151 | */ | |
152 | 0 | CloseableIterator<VariantContext> features = reader.query("17", |
153 | 43128978 + 9724, 43128978 + 9734); // first 11 CDS positions | |
154 | ||
155 | 0 | assertEquals(printNext(features), 43138702); |
156 | 0 | assertEquals(printNext(features), 43138704); |
157 | 0 | assertEquals(printNext(features), 43138707); |
158 | 0 | assertEquals(printNext(features), 43138708); |
159 | 0 | assertEquals(printNext(features), 43138710); |
160 | 0 | assertEquals(printNext(features), 43138711); |
161 | 0 | assertFalse(features.hasNext()); |
162 | ||
163 | 0 | features.close(); |
164 | 0 | reader.close(); |
165 | } | |
166 | ||
167 | /** | |
168 | * Prints the toString value of the next variant, and returns its start | |
169 | * location | |
170 | * | |
171 | * @param features | |
172 | * @return | |
173 | */ | |
174 | 0 | protected int printNext(CloseableIterator<VariantContext> features) |
175 | { | |
176 | 0 | VariantContext next = features.next(); |
177 | 0 | System.out.println(next.toString()); |
178 | 0 | return next.getStart(); |
179 | } | |
180 | ||
181 | // "https://storage.cloud.google.com/gnomad-public/release/2.0.1/vcf/exomes/gnomad.exomes.r2.0.1.sites.vcf.gz"; | |
182 | ||
183 | /** | |
184 | * Test the query method that wraps a non-indexed VCF file | |
185 | * | |
186 | * @throws IOException | |
187 | */ | |
188 | 1 | @Test(groups = "Functional") |
189 | public void testQuery_plain() throws IOException | |
190 | { | |
191 | 1 | File f = writeVcfFile(); |
192 | 1 | VCFReader reader = new VCFReader(f.getAbsolutePath()); |
193 | ||
194 | /* | |
195 | * query for overlap of 5-8 - should find variant at 7 | |
196 | */ | |
197 | 1 | CloseableIterator<VariantContext> variants = reader.query("20", 5, 8); |
198 | ||
199 | /* | |
200 | * INDEL G/GA variant | |
201 | */ | |
202 | 1 | VariantContext vc = variants.next(); |
203 | 1 | assertTrue(vc.isIndel()); |
204 | 1 | assertEquals(vc.getStart(), 7); |
205 | 1 | assertEquals(vc.getEnd(), 7); |
206 | 1 | Allele ref = vc.getReference(); |
207 | 1 | assertEquals(ref.getBaseString(), "G"); |
208 | 1 | List<Allele> alleles = vc.getAlleles(); |
209 | 1 | assertEquals(alleles.size(), 2); |
210 | 1 | assertTrue(alleles.get(0).isReference()); |
211 | 1 | assertEquals(alleles.get(0).getBaseString(), "G"); |
212 | 1 | assertFalse(alleles.get(1).isReference()); |
213 | 1 | assertEquals(alleles.get(1).getBaseString(), "GA"); |
214 | ||
215 | 1 | assertFalse(variants.hasNext()); |
216 | ||
217 | 1 | variants.close(); |
218 | 1 | reader.close(); |
219 | } | |
220 | } |