Class |
Line # |
Actions |
|||
---|---|---|---|---|---|
VCFReader | 37 | 46 | 29 |
1 | /* | |
2 | * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) | |
3 | * Copyright (C) $$Year-Rel$$ The Jalview Authors | |
4 | * | |
5 | * This file is part of Jalview. | |
6 | * | |
7 | * Jalview is free software: you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation, either version 3 | |
10 | * of the License, or (at your option) any later version. | |
11 | * | |
12 | * Jalview is distributed in the hope that it will be useful, but | |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty | |
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR | |
15 | * PURPOSE. See the GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with Jalview. If not, see <http://www.gnu.org/licenses/>. | |
19 | * The Jalview Authors are detailed in the 'AUTHORS' file. | |
20 | */ | |
21 | package jalview.ext.htsjdk; | |
22 | ||
23 | import java.io.Closeable; | |
24 | import java.io.File; | |
25 | import java.io.IOException; | |
26 | ||
27 | import htsjdk.samtools.util.CloseableIterator; | |
28 | import htsjdk.variant.variantcontext.VariantContext; | |
29 | import htsjdk.variant.vcf.VCFFileReader; | |
30 | import htsjdk.variant.vcf.VCFHeader; | |
31 | import jalview.bin.Console; | |
32 | ||
33 | /** | |
34 | * A thin wrapper for htsjdk classes to read either plain, or compressed, or | |
35 | * compressed and indexed VCF files | |
36 | */ | |
37 | public class VCFReader implements Closeable, Iterable<VariantContext> | |
38 | { | |
39 | private static final String GZ = "gz"; | |
40 | ||
41 | private static final String TBI_EXTENSION = ".tbi"; | |
42 | ||
43 | private static final String CSI_EXTENSION = ".csi"; | |
44 | ||
45 | private boolean indexed; | |
46 | ||
47 | private VCFFileReader reader; | |
48 | ||
49 | /** | |
50 | * Constructor given a raw or compressed VCF file or a (csi or tabix) index | |
51 | * file | |
52 | * <p> | |
53 | * If the file path ends in ".tbi" or ".csi", <em>or</em> appending one of | |
54 | * these extensions gives a valid file path, open as indexed, else as | |
55 | * unindexed. | |
56 | * | |
57 | * @param f | |
58 | * @throws IOException | |
59 | */ | |
60 | 6 | public VCFReader(String filePath) throws IOException |
61 | { | |
62 | 6 | indexed = false; |
63 | 6 | if (filePath.endsWith(TBI_EXTENSION) |
64 | || filePath.endsWith(CSI_EXTENSION)) | |
65 | { | |
66 | 0 | indexed = true; |
67 | 0 | filePath = filePath.substring(0, filePath.length() - 4); |
68 | } | |
69 | 6 | else if (new File(filePath + TBI_EXTENSION).exists()) |
70 | { | |
71 | 0 | indexed = true; |
72 | } | |
73 | 6 | else if (new File(filePath + CSI_EXTENSION).exists()) |
74 | { | |
75 | 0 | indexed = true; |
76 | } | |
77 | ||
78 | /* | |
79 | * we pass the name of the unindexed file to htsjdk, | |
80 | * with a flag to assert whether it is indexed | |
81 | */ | |
82 | 6 | File file = new File(filePath); |
83 | 6 | if (file.exists()) |
84 | { | |
85 | 6 | reader = new VCFFileReader(file, indexed); |
86 | } | |
87 | else | |
88 | { | |
89 | 0 | Console.error("File not found: " + filePath); |
90 | } | |
91 | } | |
92 | ||
93 | 5 | @Override |
94 | public void close() throws IOException | |
95 | { | |
96 | 5 | if (reader != null) |
97 | { | |
98 | 5 | reader.close(); |
99 | } | |
100 | } | |
101 | ||
102 | /** | |
103 | * Returns an iterator over VCF variants in the file. The client should call | |
104 | * close() on the iterator when finished with it. | |
105 | */ | |
106 | 1 | @Override |
107 | public CloseableIterator<VariantContext> iterator() | |
108 | { | |
109 | 1 | return reader == null ? null : reader.iterator(); |
110 | } | |
111 | ||
112 | /** | |
113 | * Queries for records overlapping the region specified. Note that this method | |
114 | * is performant if the VCF file is indexed, and may be very slow if it is | |
115 | * not. | |
116 | * <p> | |
117 | * Client code should call close() on the iterator when finished with it. | |
118 | * | |
119 | * @param chrom | |
120 | * the chromosome to query | |
121 | * @param start | |
122 | * query interval start | |
123 | * @param end | |
124 | * query interval end | |
125 | * @return | |
126 | */ | |
127 | 40 | public CloseableIterator<VariantContext> query(final String chrom, |
128 | final int start, final int end) | |
129 | { | |
130 | 40 | if (reader == null) |
131 | { | |
132 | 0 | return null; |
133 | } | |
134 | 40 | if (indexed) |
135 | { | |
136 | 0 | return reader.query(chrom, start, end); |
137 | } | |
138 | else | |
139 | { | |
140 | 40 | return queryUnindexed(chrom, start, end); |
141 | } | |
142 | } | |
143 | ||
144 | /** | |
145 | * Returns an iterator over variant records read from a flat file which | |
146 | * overlap the specified chromosomal positions. Call close() on the iterator | |
147 | * when finished with it! | |
148 | * | |
149 | * @param chrom | |
150 | * @param start | |
151 | * @param end | |
152 | * @return | |
153 | */ | |
154 | 40 | protected CloseableIterator<VariantContext> queryUnindexed( |
155 | final String chrom, final int start, final int end) | |
156 | { | |
157 | 40 | final CloseableIterator<VariantContext> it = reader.iterator(); |
158 | ||
159 | 40 | return new CloseableIterator<VariantContext>() |
160 | { | |
161 | boolean atEnd = false; | |
162 | ||
163 | // prime look-ahead buffer with next matching record | |
164 | private VariantContext next = findNext(); | |
165 | ||
166 | 77 | private VariantContext findNext() |
167 | { | |
168 | 77 | if (atEnd) |
169 | { | |
170 | 0 | return null; |
171 | } | |
172 | 77 | VariantContext variant = null; |
173 | 179 | while (it.hasNext()) |
174 | { | |
175 | 145 | variant = it.next(); |
176 | 145 | int vstart = variant.getStart(); |
177 | ||
178 | 145 | if (vstart > end) |
179 | { | |
180 | 6 | atEnd = true; |
181 | 6 | close(); |
182 | 6 | return null; |
183 | } | |
184 | ||
185 | 139 | int vend = variant.getEnd(); |
186 | // todo what is the undeprecated way to get | |
187 | // the chromosome for the variant? | |
188 | 139 | if (chrom.equals(variant.getContig()) && (vstart <= end) |
189 | && (vend >= start)) | |
190 | { | |
191 | 37 | return variant; |
192 | } | |
193 | } | |
194 | 34 | return null; |
195 | } | |
196 | ||
197 | 76 | @Override |
198 | public boolean hasNext() | |
199 | { | |
200 | 76 | boolean hasNext = !atEnd && (next != null); |
201 | 76 | if (!hasNext) |
202 | { | |
203 | 40 | close(); |
204 | } | |
205 | 76 | return hasNext; |
206 | } | |
207 | ||
208 | 37 | @Override |
209 | public VariantContext next() | |
210 | { | |
211 | /* | |
212 | * return the next match, and then re-prime | |
213 | * it with the following one (if any) | |
214 | */ | |
215 | 37 | VariantContext temp = next; |
216 | 37 | next = findNext(); |
217 | 37 | return temp; |
218 | } | |
219 | ||
220 | 0 | @Override |
221 | public void remove() | |
222 | { | |
223 | // not implemented | |
224 | } | |
225 | ||
226 | 86 | @Override |
227 | public void close() | |
228 | { | |
229 | 86 | it.close(); |
230 | } | |
231 | }; | |
232 | } | |
233 | ||
234 | /** | |
235 | * Returns an object that models the VCF file headers | |
236 | * | |
237 | * @return | |
238 | */ | |
239 | 4 | public VCFHeader getFileHeader() |
240 | { | |
241 | 4 | return reader == null ? null : reader.getFileHeader(); |
242 | } | |
243 | ||
244 | /** | |
245 | * Answers true if we are processing a tab-indexed VCF file, false if it is a | |
246 | * plain text (uncompressed) file. | |
247 | * | |
248 | * @return | |
249 | */ | |
250 | 0 | public boolean isIndex() |
251 | { | |
252 | 0 | return indexed; |
253 | } | |
254 | } |