1 |
|
|
2 |
|
|
3 |
|
|
4 |
|
|
5 |
|
|
6 |
|
|
7 |
|
|
8 |
|
|
9 |
|
|
10 |
|
|
11 |
|
|
12 |
|
|
13 |
|
|
14 |
|
|
15 |
|
|
16 |
|
|
17 |
|
|
18 |
|
|
19 |
|
|
20 |
|
|
21 |
|
package jalview.io.vcf; |
22 |
|
|
23 |
|
import jalview.analysis.Dna; |
24 |
|
import jalview.api.AlignViewControllerGuiI; |
25 |
|
import jalview.bin.Cache; |
26 |
|
import jalview.datamodel.DBRefEntry; |
27 |
|
import jalview.datamodel.GeneLociI; |
28 |
|
import jalview.datamodel.Mapping; |
29 |
|
import jalview.datamodel.SequenceFeature; |
30 |
|
import jalview.datamodel.SequenceI; |
31 |
|
import jalview.datamodel.features.FeatureAttributeType; |
32 |
|
import jalview.datamodel.features.FeatureSource; |
33 |
|
import jalview.datamodel.features.FeatureSources; |
34 |
|
import jalview.ext.ensembl.EnsemblMap; |
35 |
|
import jalview.ext.htsjdk.HtsContigDb; |
36 |
|
import jalview.ext.htsjdk.VCFReader; |
37 |
|
import jalview.io.gff.Gff3Helper; |
38 |
|
import jalview.io.gff.SequenceOntologyI; |
39 |
|
import jalview.util.MapList; |
40 |
|
import jalview.util.MappingUtils; |
41 |
|
import jalview.util.MessageManager; |
42 |
|
import jalview.util.StringUtils; |
43 |
|
|
44 |
|
import java.io.File; |
45 |
|
import java.io.IOException; |
46 |
|
import java.util.ArrayList; |
47 |
|
import java.util.HashMap; |
48 |
|
import java.util.HashSet; |
49 |
|
import java.util.Iterator; |
50 |
|
import java.util.List; |
51 |
|
import java.util.Map; |
52 |
|
import java.util.Map.Entry; |
53 |
|
import java.util.Set; |
54 |
|
import java.util.regex.Pattern; |
55 |
|
import java.util.regex.PatternSyntaxException; |
56 |
|
|
57 |
|
import htsjdk.samtools.SAMException; |
58 |
|
import htsjdk.samtools.SAMSequenceDictionary; |
59 |
|
import htsjdk.samtools.SAMSequenceRecord; |
60 |
|
import htsjdk.samtools.util.CloseableIterator; |
61 |
|
import htsjdk.tribble.TribbleException; |
62 |
|
import htsjdk.variant.variantcontext.Allele; |
63 |
|
import htsjdk.variant.variantcontext.VariantContext; |
64 |
|
import htsjdk.variant.vcf.VCFConstants; |
65 |
|
import htsjdk.variant.vcf.VCFHeader; |
66 |
|
import htsjdk.variant.vcf.VCFHeaderLine; |
67 |
|
import htsjdk.variant.vcf.VCFHeaderLineCount; |
68 |
|
import htsjdk.variant.vcf.VCFHeaderLineType; |
69 |
|
import htsjdk.variant.vcf.VCFInfoHeaderLine; |
70 |
|
|
71 |
|
|
72 |
|
|
73 |
|
|
74 |
|
|
75 |
|
@author |
76 |
|
|
|
|
| 70% |
Uncovered Elements: 185 (617) |
Complexity: 150 |
Complexity Density: 0.37 |
|
77 |
|
public class VCFLoader |
78 |
|
{ |
79 |
|
private static final String VCF_ENCODABLE = ":;=%,"; |
80 |
|
|
81 |
|
|
82 |
|
|
83 |
|
|
84 |
|
private static final String VCF_POS = "POS"; |
85 |
|
|
86 |
|
private static final String VCF_ID = "ID"; |
87 |
|
|
88 |
|
private static final String VCF_QUAL = "QUAL"; |
89 |
|
|
90 |
|
private static final String VCF_FILTER = "FILTER"; |
91 |
|
|
92 |
|
private static final String NO_VALUE = VCFConstants.MISSING_VALUE_v4; |
93 |
|
|
94 |
|
private static final String DEFAULT_SPECIES = "homo_sapiens"; |
95 |
|
|
96 |
|
|
97 |
|
|
98 |
|
|
99 |
|
|
100 |
|
|
101 |
|
|
102 |
|
|
103 |
|
|
104 |
|
|
105 |
|
|
|
|
| 60% |
Uncovered Elements: 2 (5) |
Complexity: 2 |
Complexity Density: 0.67 |
|
106 |
|
class VCFMap |
107 |
|
{ |
108 |
|
final String chromosome; |
109 |
|
|
110 |
|
final MapList map; |
111 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (2) |
Complexity: 1 |
Complexity Density: 0.5 |
|
112 |
24 |
VCFMap(String chr, MapList m)... |
113 |
|
{ |
114 |
24 |
chromosome = chr; |
115 |
24 |
map = m; |
116 |
|
} |
117 |
|
|
|
|
| 0% |
Uncovered Elements: 1 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
118 |
0 |
@Override... |
119 |
|
public String toString() |
120 |
|
{ |
121 |
0 |
return chromosome + ":" + map.toString(); |
122 |
|
} |
123 |
|
} |
124 |
|
|
125 |
|
|
126 |
|
|
127 |
|
|
128 |
|
|
129 |
|
private static final String VEP_FIELDS_PREF = "VEP_FIELDS"; |
130 |
|
|
131 |
|
private static final String VCF_FIELDS_PREF = "VCF_FIELDS"; |
132 |
|
|
133 |
|
private static final String DEFAULT_VCF_FIELDS = ".*"; |
134 |
|
|
135 |
|
private static final String DEFAULT_VEP_FIELDS = ".*"; |
136 |
|
|
137 |
|
|
138 |
|
|
139 |
|
|
140 |
|
|
141 |
|
private static final String VCF_ASSEMBLY = "VCF_ASSEMBLY"; |
142 |
|
|
143 |
|
private static final String DEFAULT_VCF_ASSEMBLY = "assembly19=GRCh37,hs37=GRCh37,grch37=GRCh37,grch38=GRCh38"; |
144 |
|
|
145 |
|
private static final String VCF_SPECIES = "VCF_SPECIES"; |
146 |
|
|
147 |
|
private static final String DEFAULT_REFERENCE = "grch37"; |
148 |
|
|
149 |
|
|
150 |
|
|
151 |
|
|
152 |
|
|
153 |
|
private static final String CSQ_CONSEQUENCE_KEY = "Consequence"; |
154 |
|
private static final String CSQ_ALLELE_KEY = "Allele"; |
155 |
|
private static final String CSQ_ALLELE_NUM_KEY = "ALLELE_NUM"; |
156 |
|
private static final String CSQ_FEATURE_KEY = "Feature"; |
157 |
|
|
158 |
|
|
159 |
|
|
160 |
|
|
161 |
|
|
162 |
|
|
163 |
|
private static final String CSQ_FIELD = "CSQ"; |
164 |
|
|
165 |
|
|
166 |
|
|
167 |
|
|
168 |
|
private static final String PIPE_REGEX = "\\|"; |
169 |
|
|
170 |
|
|
171 |
|
|
172 |
|
|
173 |
|
private static final String COMMA = ","; |
174 |
|
|
175 |
|
|
176 |
|
|
177 |
|
|
178 |
|
private static final String FEATURE_GROUP_VCF = "VCF"; |
179 |
|
|
180 |
|
|
181 |
|
|
182 |
|
|
183 |
|
|
184 |
|
private static final String EXCL = "!"; |
185 |
|
|
186 |
|
|
187 |
|
|
188 |
|
|
189 |
|
protected String vcfFilePath; |
190 |
|
|
191 |
|
|
192 |
|
|
193 |
|
|
194 |
|
|
195 |
|
|
196 |
|
private Map<String, Map<int[], int[]>> assemblyMappings; |
197 |
|
|
198 |
|
private VCFReader reader; |
199 |
|
|
200 |
|
|
201 |
|
|
202 |
|
|
203 |
|
private VCFHeader header; |
204 |
|
|
205 |
|
|
206 |
|
|
207 |
|
|
208 |
|
private String vcfSpecies; |
209 |
|
|
210 |
|
|
211 |
|
|
212 |
|
|
213 |
|
private String vcfAssembly; |
214 |
|
|
215 |
|
|
216 |
|
|
217 |
|
|
218 |
|
private SAMSequenceDictionary dictionary; |
219 |
|
|
220 |
|
|
221 |
|
|
222 |
|
|
223 |
|
|
224 |
|
|
225 |
|
private int csqConsequenceFieldIndex = -1; |
226 |
|
private int csqAlleleFieldIndex = -1; |
227 |
|
private int csqAlleleNumberFieldIndex = -1; |
228 |
|
private int csqFeatureFieldIndex = -1; |
229 |
|
|
230 |
|
|
231 |
|
|
232 |
|
|
233 |
|
|
234 |
|
|
235 |
|
|
236 |
|
|
237 |
|
private String sourceId; |
238 |
|
|
239 |
|
|
240 |
|
|
241 |
|
|
242 |
|
|
243 |
|
List<String> vcfFieldsOfInterest; |
244 |
|
|
245 |
|
|
246 |
|
|
247 |
|
|
248 |
|
|
249 |
|
|
250 |
|
Map<Integer, String> vepFieldsOfInterest; |
251 |
|
|
252 |
|
|
253 |
|
|
254 |
|
|
255 |
|
|
256 |
|
private Set<String> badData; |
257 |
|
|
258 |
|
|
259 |
|
|
260 |
|
|
261 |
|
@param |
262 |
|
|
|
|
| 75% |
Uncovered Elements: 1 (4) |
Complexity: 2 |
Complexity Density: 0.5 |
|
263 |
4 |
public VCFLoader(String vcfFile)... |
264 |
|
{ |
265 |
4 |
try |
266 |
|
{ |
267 |
4 |
initialise(vcfFile); |
268 |
|
} catch (IOException e) |
269 |
|
{ |
270 |
0 |
System.err.println("Error opening VCF file: " + e.getMessage()); |
271 |
|
} |
272 |
|
|
273 |
|
|
274 |
4 |
assemblyMappings = new HashMap<>(); |
275 |
|
} |
276 |
|
|
277 |
|
|
278 |
|
|
279 |
|
|
280 |
|
|
281 |
|
|
282 |
|
|
283 |
|
|
284 |
|
@param |
285 |
|
@param |
286 |
|
|
|
|
| 0% |
Uncovered Elements: 5 (5) |
Complexity: 2 |
Complexity Density: 0.67 |
|
287 |
0 |
public void loadVCF(SequenceI[] seqs, final AlignViewControllerGuiI gui)... |
288 |
|
{ |
289 |
0 |
if (gui != null) |
290 |
|
{ |
291 |
0 |
gui.setStatus(MessageManager.getString("label.searching_vcf")); |
292 |
|
} |
293 |
|
|
294 |
0 |
new Thread() |
295 |
|
{ |
|
|
| 0% |
Uncovered Elements: 1 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
296 |
0 |
@Override... |
297 |
|
public void run() |
298 |
|
{ |
299 |
0 |
VCFLoader.this.doLoad(seqs, gui); |
300 |
|
} |
301 |
|
}.start(); |
302 |
|
} |
303 |
|
|
304 |
|
|
305 |
|
|
306 |
|
|
307 |
|
@param |
308 |
|
|
309 |
|
@return |
310 |
|
|
|
|
| 69.6% |
Uncovered Elements: 7 (23) |
Complexity: 4 |
Complexity Density: 0.24 |
|
311 |
3 |
public SequenceI loadVCFContig(String contig)... |
312 |
|
{ |
313 |
3 |
VCFHeaderLine headerLine = header.getOtherHeaderLine(VCFHeader.REFERENCE_KEY); |
314 |
3 |
if (headerLine == null) |
315 |
|
{ |
316 |
0 |
Cache.log.error("VCF reference header not found"); |
317 |
0 |
return null; |
318 |
|
} |
319 |
3 |
String ref = headerLine.getValue(); |
320 |
3 |
if (ref.startsWith("file://")) |
321 |
|
{ |
322 |
0 |
ref = ref.substring(7); |
323 |
|
} |
324 |
3 |
setSpeciesAndAssembly(ref); |
325 |
|
|
326 |
3 |
SequenceI seq = null; |
327 |
3 |
File dbFile = new File(ref); |
328 |
|
|
329 |
3 |
if (dbFile.exists()) |
330 |
|
{ |
331 |
3 |
HtsContigDb db = new HtsContigDb("", dbFile); |
332 |
3 |
seq = db.getSequenceProxy(contig); |
333 |
3 |
loadSequenceVCF(seq); |
334 |
3 |
db.close(); |
335 |
|
} |
336 |
|
else |
337 |
|
{ |
338 |
0 |
Cache.log.error("VCF reference not found: " + ref); |
339 |
|
} |
340 |
|
|
341 |
3 |
return seq; |
342 |
|
} |
343 |
|
|
344 |
|
|
345 |
|
|
346 |
|
|
347 |
|
@param |
348 |
|
@param |
349 |
|
|
350 |
|
|
|
|
| 60.5% |
Uncovered Elements: 15 (38) |
Complexity: 9 |
Complexity Density: 0.35 |
|
351 |
3 |
protected void doLoad(SequenceI[] seqs, AlignViewControllerGuiI gui)... |
352 |
|
{ |
353 |
3 |
try |
354 |
|
{ |
355 |
3 |
VCFHeaderLine ref = header |
356 |
|
.getOtherHeaderLine(VCFHeader.REFERENCE_KEY); |
357 |
3 |
String reference = ref == null ? null : ref.getValue(); |
358 |
|
|
359 |
3 |
setSpeciesAndAssembly(reference); |
360 |
|
|
361 |
3 |
int varCount = 0; |
362 |
3 |
int seqCount = 0; |
363 |
|
|
364 |
|
|
365 |
|
|
366 |
|
|
367 |
3 |
for (SequenceI seq : seqs) |
368 |
|
{ |
369 |
21 |
int added = loadSequenceVCF(seq); |
370 |
21 |
if (added > 0) |
371 |
|
{ |
372 |
11 |
seqCount++; |
373 |
11 |
varCount += added; |
374 |
11 |
transferAddedFeatures(seq); |
375 |
|
} |
376 |
|
} |
377 |
3 |
if (gui != null) |
378 |
|
{ |
379 |
0 |
String msg = MessageManager.formatMessage("label.added_vcf", |
380 |
|
varCount, seqCount); |
381 |
0 |
gui.setStatus(msg); |
382 |
0 |
if (gui.getFeatureSettingsUI() != null) |
383 |
|
{ |
384 |
0 |
gui.getFeatureSettingsUI().discoverAllFeatureData(); |
385 |
|
} |
386 |
|
} |
387 |
|
} catch (Throwable e) |
388 |
|
{ |
389 |
0 |
System.err.println("Error processing VCF: " + e.getMessage()); |
390 |
0 |
e.printStackTrace(); |
391 |
0 |
if (gui != null) |
392 |
|
{ |
393 |
0 |
gui.setStatus("Error occurred - see console for details"); |
394 |
|
} |
395 |
|
} finally |
396 |
|
{ |
397 |
3 |
if (reader != null) |
398 |
|
{ |
399 |
3 |
try |
400 |
|
{ |
401 |
3 |
reader.close(); |
402 |
|
} catch (IOException e) |
403 |
|
{ |
404 |
|
|
405 |
|
} |
406 |
|
} |
407 |
3 |
header = null; |
408 |
3 |
dictionary = null; |
409 |
|
} |
410 |
|
} |
411 |
|
|
412 |
|
|
413 |
|
|
414 |
|
|
415 |
|
|
416 |
|
|
417 |
|
|
418 |
|
|
419 |
|
|
420 |
|
|
421 |
|
|
422 |
|
@param |
423 |
|
@see |
424 |
|
@see |
425 |
|
@see |
426 |
|
|
|
|
| 53.1% |
Uncovered Elements: 15 (32) |
Complexity: 7 |
Complexity Density: 0.35 |
|
427 |
6 |
protected void setSpeciesAndAssembly(String reference)... |
428 |
|
{ |
429 |
6 |
if (reference == null) |
430 |
|
{ |
431 |
0 |
Cache.log.error("No VCF ##reference found, defaulting to " |
432 |
|
+ DEFAULT_REFERENCE + ":" + DEFAULT_SPECIES); |
433 |
0 |
reference = DEFAULT_REFERENCE; |
434 |
|
} |
435 |
6 |
reference = reference.toLowerCase(); |
436 |
|
|
437 |
|
|
438 |
|
|
439 |
|
|
440 |
|
|
441 |
|
|
442 |
|
|
443 |
|
|
444 |
6 |
String prop = Cache.getDefault(VCF_ASSEMBLY, DEFAULT_VCF_ASSEMBLY); |
445 |
6 |
for (String token : prop.split(",")) |
446 |
|
{ |
447 |
6 |
String[] tokens = token.split("="); |
448 |
6 |
if (tokens.length == 2) |
449 |
|
{ |
450 |
6 |
if (reference.contains(tokens[0].trim().toLowerCase())) |
451 |
|
{ |
452 |
3 |
vcfAssembly = tokens[1].trim(); |
453 |
3 |
break; |
454 |
|
} |
455 |
|
} |
456 |
|
} |
457 |
|
|
458 |
6 |
vcfSpecies = DEFAULT_SPECIES; |
459 |
6 |
prop = Cache.getProperty(VCF_SPECIES); |
460 |
6 |
if (prop != null) |
461 |
|
{ |
462 |
0 |
for (String token : prop.split(",")) |
463 |
|
{ |
464 |
0 |
String[] tokens = token.split("="); |
465 |
0 |
if (tokens.length == 2) |
466 |
|
{ |
467 |
0 |
if (reference.contains(tokens[0].trim().toLowerCase())) |
468 |
|
{ |
469 |
0 |
vcfSpecies = tokens[1].trim(); |
470 |
0 |
break; |
471 |
|
} |
472 |
|
} |
473 |
|
} |
474 |
|
} |
475 |
|
} |
476 |
|
|
477 |
|
|
478 |
|
|
479 |
|
|
480 |
|
@param |
481 |
|
@throws |
482 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (8) |
Complexity: 2 |
Complexity Density: 0.25 |
|
483 |
4 |
private void initialise(String filePath) throws IOException... |
484 |
|
{ |
485 |
4 |
vcfFilePath = filePath; |
486 |
|
|
487 |
4 |
reader = new VCFReader(filePath); |
488 |
|
|
489 |
4 |
header = reader.getFileHeader(); |
490 |
|
|
491 |
4 |
try |
492 |
|
{ |
493 |
4 |
dictionary = header.getSequenceDictionary(); |
494 |
|
} catch (SAMException e) |
495 |
|
{ |
496 |
|
|
497 |
|
} |
498 |
|
|
499 |
4 |
sourceId = filePath; |
500 |
|
|
501 |
4 |
saveMetadata(sourceId); |
502 |
|
|
503 |
|
|
504 |
|
|
505 |
|
|
506 |
4 |
parseCsqHeader(); |
507 |
|
} |
508 |
|
|
509 |
|
|
510 |
|
|
511 |
|
|
512 |
|
|
513 |
|
@param |
514 |
|
|
|
|
| 77.4% |
Uncovered Elements: 7 (31) |
Complexity: 7 |
Complexity Density: 0.24 |
|
515 |
4 |
void saveMetadata(String theSourceId)... |
516 |
|
{ |
517 |
4 |
List<Pattern> vcfFieldPatterns = getFieldMatchers(VCF_FIELDS_PREF, |
518 |
|
DEFAULT_VCF_FIELDS); |
519 |
4 |
vcfFieldsOfInterest = new ArrayList<>(); |
520 |
|
|
521 |
4 |
FeatureSource metadata = new FeatureSource(theSourceId); |
522 |
|
|
523 |
4 |
for (VCFInfoHeaderLine info : header.getInfoHeaderLines()) |
524 |
|
{ |
525 |
13 |
String attributeId = info.getID(); |
526 |
13 |
String desc = info.getDescription(); |
527 |
13 |
VCFHeaderLineType type = info.getType(); |
528 |
13 |
FeatureAttributeType attType = null; |
529 |
13 |
switch (type) |
530 |
|
{ |
531 |
0 |
case Character: |
532 |
0 |
attType = FeatureAttributeType.Character; |
533 |
0 |
break; |
534 |
0 |
case Flag: |
535 |
0 |
attType = FeatureAttributeType.Flag; |
536 |
0 |
break; |
537 |
7 |
case Float: |
538 |
7 |
attType = FeatureAttributeType.Float; |
539 |
7 |
break; |
540 |
5 |
case Integer: |
541 |
5 |
attType = FeatureAttributeType.Integer; |
542 |
5 |
break; |
543 |
1 |
case String: |
544 |
1 |
attType = FeatureAttributeType.String; |
545 |
1 |
break; |
546 |
|
} |
547 |
13 |
metadata.setAttributeName(attributeId, desc); |
548 |
13 |
metadata.setAttributeType(attributeId, attType); |
549 |
|
|
550 |
13 |
if (isFieldWanted(attributeId, vcfFieldPatterns)) |
551 |
|
{ |
552 |
13 |
vcfFieldsOfInterest.add(attributeId); |
553 |
|
} |
554 |
|
} |
555 |
|
|
556 |
4 |
FeatureSources.getInstance().addSource(theSourceId, metadata); |
557 |
|
} |
558 |
|
|
559 |
|
|
560 |
|
|
561 |
|
|
562 |
|
|
563 |
|
|
564 |
|
@param |
565 |
|
@param |
566 |
|
@return |
567 |
|
|
|
|
| 66.7% |
Uncovered Elements: 2 (6) |
Complexity: 2 |
Complexity Density: 0.5 |
|
568 |
22 |
private boolean isFieldWanted(String id, List<Pattern> filters)... |
569 |
|
{ |
570 |
22 |
for (Pattern p : filters) |
571 |
|
{ |
572 |
22 |
if (p.matcher(id.toUpperCase()).matches()) |
573 |
|
{ |
574 |
22 |
return true; |
575 |
|
} |
576 |
|
} |
577 |
0 |
return false; |
578 |
|
} |
579 |
|
|
580 |
|
|
581 |
|
|
582 |
|
|
583 |
|
|
584 |
|
|
585 |
|
|
586 |
|
|
587 |
|
|
588 |
|
|
|
|
| 89.2% |
Uncovered Elements: 4 (37) |
Complexity: 8 |
Complexity Density: 0.35 |
|
589 |
4 |
protected void parseCsqHeader()... |
590 |
|
{ |
591 |
4 |
List<Pattern> vepFieldFilters = getFieldMatchers(VEP_FIELDS_PREF, |
592 |
|
DEFAULT_VEP_FIELDS); |
593 |
4 |
vepFieldsOfInterest = new HashMap<>(); |
594 |
|
|
595 |
4 |
VCFInfoHeaderLine csqInfo = header.getInfoHeaderLine(CSQ_FIELD); |
596 |
4 |
if (csqInfo == null) |
597 |
|
{ |
598 |
3 |
return; |
599 |
|
} |
600 |
|
|
601 |
|
|
602 |
|
|
603 |
|
|
604 |
|
|
605 |
1 |
String desc = csqInfo.getDescription(); |
606 |
1 |
int spacePos = desc.lastIndexOf(" "); |
607 |
1 |
desc = desc.substring(spacePos + 1); |
608 |
|
|
609 |
1 |
if (desc != null) |
610 |
|
{ |
611 |
1 |
String[] format = desc.split(PIPE_REGEX); |
612 |
1 |
int index = 0; |
613 |
1 |
for (String field : format) |
614 |
|
{ |
615 |
9 |
if (CSQ_CONSEQUENCE_KEY.equals(field)) |
616 |
|
{ |
617 |
1 |
csqConsequenceFieldIndex = index; |
618 |
|
} |
619 |
9 |
if (CSQ_ALLELE_NUM_KEY.equals(field)) |
620 |
|
{ |
621 |
0 |
csqAlleleNumberFieldIndex = index; |
622 |
|
} |
623 |
9 |
if (CSQ_ALLELE_KEY.equals(field)) |
624 |
|
{ |
625 |
1 |
csqAlleleFieldIndex = index; |
626 |
|
} |
627 |
9 |
if (CSQ_FEATURE_KEY.equals(field)) |
628 |
|
{ |
629 |
1 |
csqFeatureFieldIndex = index; |
630 |
|
} |
631 |
|
|
632 |
9 |
if (isFieldWanted(field, vepFieldFilters)) |
633 |
|
{ |
634 |
9 |
vepFieldsOfInterest.put(index, field); |
635 |
|
} |
636 |
|
|
637 |
9 |
index++; |
638 |
|
} |
639 |
|
} |
640 |
|
} |
641 |
|
|
642 |
|
|
643 |
|
|
644 |
|
|
645 |
|
|
646 |
|
|
647 |
|
|
648 |
|
|
649 |
|
|
650 |
|
|
651 |
|
|
652 |
|
|
653 |
|
@param |
654 |
|
@param |
655 |
|
@return |
656 |
|
|
|
|
| 87.5% |
Uncovered Elements: 1 (8) |
Complexity: 2 |
Complexity Density: 0.25 |
|
657 |
8 |
private List<Pattern> getFieldMatchers(String key, String def)... |
658 |
|
{ |
659 |
8 |
String pref = Cache.getDefault(key, def); |
660 |
8 |
List<Pattern> patterns = new ArrayList<>(); |
661 |
8 |
String[] tokens = pref.split(","); |
662 |
8 |
for (String token : tokens) |
663 |
|
{ |
664 |
8 |
try |
665 |
|
{ |
666 |
8 |
patterns.add(Pattern.compile(token.toUpperCase())); |
667 |
|
} catch (PatternSyntaxException e) |
668 |
|
{ |
669 |
0 |
System.err.println("Invalid pattern ignored: " + token); |
670 |
|
} |
671 |
|
} |
672 |
8 |
return patterns; |
673 |
|
} |
674 |
|
|
675 |
|
|
676 |
|
|
677 |
|
|
678 |
|
|
679 |
|
@param |
680 |
|
|
|
|
| 59.1% |
Uncovered Elements: 9 (22) |
Complexity: 6 |
Complexity Density: 0.43 |
|
681 |
11 |
protected void transferAddedFeatures(SequenceI seq)... |
682 |
|
{ |
683 |
11 |
List<DBRefEntry> dbrefs = seq.getDBRefs(); |
684 |
11 |
if (dbrefs == null) |
685 |
|
{ |
686 |
0 |
return; |
687 |
|
} |
688 |
11 |
for (DBRefEntry dbref : dbrefs) |
689 |
|
{ |
690 |
16 |
Mapping mapping = dbref.getMap(); |
691 |
16 |
if (mapping == null || mapping.getTo() == null) |
692 |
|
{ |
693 |
11 |
continue; |
694 |
|
} |
695 |
|
|
696 |
5 |
SequenceI mapTo = mapping.getTo(); |
697 |
5 |
MapList map = mapping.getMap(); |
698 |
5 |
if (map.getFromRatio() == 3) |
699 |
|
{ |
700 |
|
|
701 |
|
|
702 |
|
|
703 |
|
|
704 |
|
|
705 |
|
} |
706 |
|
else |
707 |
|
{ |
708 |
|
|
709 |
|
|
710 |
|
|
711 |
0 |
List<SequenceFeature> features = seq.getFeatures() |
712 |
|
.getPositionalFeatures(SequenceOntologyI.SEQUENCE_VARIANT); |
713 |
0 |
for (SequenceFeature sf : features) |
714 |
|
{ |
715 |
0 |
if (FEATURE_GROUP_VCF.equals(sf.getFeatureGroup())) |
716 |
|
{ |
717 |
0 |
transferFeature(sf, mapTo, map); |
718 |
|
} |
719 |
|
} |
720 |
|
} |
721 |
|
} |
722 |
|
} |
723 |
|
|
724 |
|
|
725 |
|
|
726 |
|
|
727 |
|
|
728 |
|
@param |
729 |
|
@return |
730 |
|
|
|
|
| 81.8% |
Uncovered Elements: 2 (11) |
Complexity: 3 |
Complexity Density: 0.43 |
|
731 |
24 |
protected int loadSequenceVCF(SequenceI seq)... |
732 |
|
{ |
733 |
24 |
VCFMap vcfMap = getVcfMap(seq); |
734 |
24 |
if (vcfMap == null) |
735 |
|
{ |
736 |
0 |
return 0; |
737 |
|
} |
738 |
|
|
739 |
|
|
740 |
|
|
741 |
|
|
742 |
24 |
SequenceI dss = seq.getDatasetSequence(); |
743 |
24 |
if (dss == null) |
744 |
|
{ |
745 |
3 |
dss = seq; |
746 |
|
} |
747 |
24 |
return addVcfVariants(dss, vcfMap); |
748 |
|
} |
749 |
|
|
750 |
|
|
751 |
|
|
752 |
|
|
753 |
|
@param |
754 |
|
@return |
755 |
|
|
|
|
| 46.7% |
Uncovered Elements: 24 (45) |
Complexity: 8 |
Complexity Density: 0.26 |
|
756 |
24 |
private VCFMap getVcfMap(SequenceI seq)... |
757 |
|
{ |
758 |
|
|
759 |
|
|
760 |
|
|
761 |
24 |
VCFMap vcfMap = null; |
762 |
24 |
if (dictionary != null) |
763 |
|
{ |
764 |
3 |
vcfMap = getContigMap(seq); |
765 |
|
} |
766 |
24 |
if (vcfMap != null) |
767 |
|
{ |
768 |
3 |
return vcfMap; |
769 |
|
} |
770 |
|
|
771 |
|
|
772 |
|
|
773 |
|
|
774 |
|
|
775 |
21 |
GeneLociI seqCoords = seq.getGeneLoci(); |
776 |
21 |
if (seqCoords == null) |
777 |
|
{ |
778 |
0 |
Cache.log.warn(String.format( |
779 |
|
"Can't query VCF for %s as chromosome coordinates not known", |
780 |
|
seq.getName())); |
781 |
0 |
return null; |
782 |
|
} |
783 |
|
|
784 |
21 |
String species = seqCoords.getSpeciesId(); |
785 |
21 |
String chromosome = seqCoords.getChromosomeId(); |
786 |
21 |
String seqRef = seqCoords.getAssemblyId(); |
787 |
21 |
MapList map = seqCoords.getMapping(); |
788 |
|
|
789 |
|
|
790 |
|
|
791 |
21 |
if (!vcfSpecies.equalsIgnoreCase(species)) |
792 |
|
{ |
793 |
0 |
Cache.log.warn("No VCF loaded to " + seq.getName() |
794 |
|
+ " as species not matched"); |
795 |
0 |
return null; |
796 |
|
} |
797 |
|
|
798 |
21 |
if (seqRef.equalsIgnoreCase(vcfAssembly)) |
799 |
|
{ |
800 |
21 |
return new VCFMap(chromosome, map); |
801 |
|
} |
802 |
|
|
803 |
|
|
804 |
|
|
805 |
|
|
806 |
|
|
807 |
0 |
List<int[]> toVcfRanges = new ArrayList<>(); |
808 |
0 |
List<int[]> fromSequenceRanges = new ArrayList<>(); |
809 |
|
|
810 |
0 |
for (int[] range : map.getToRanges()) |
811 |
|
{ |
812 |
0 |
int[] fromRange = map.locateInFrom(range[0], range[1]); |
813 |
0 |
if (fromRange == null) |
814 |
|
{ |
815 |
|
|
816 |
0 |
continue; |
817 |
|
} |
818 |
|
|
819 |
0 |
int[] newRange = mapReferenceRange(range, chromosome, "human", seqRef, |
820 |
|
vcfAssembly); |
821 |
0 |
if (newRange == null) |
822 |
|
{ |
823 |
0 |
Cache.log.error( |
824 |
|
String.format("Failed to map %s:%s:%s:%d:%d to %s", species, |
825 |
|
chromosome, seqRef, range[0], range[1], |
826 |
|
vcfAssembly)); |
827 |
0 |
continue; |
828 |
|
} |
829 |
|
else |
830 |
|
{ |
831 |
0 |
toVcfRanges.add(newRange); |
832 |
0 |
fromSequenceRanges.add(fromRange); |
833 |
|
} |
834 |
|
} |
835 |
|
|
836 |
0 |
return new VCFMap(chromosome, |
837 |
|
new MapList(fromSequenceRanges, toVcfRanges, 1, 1)); |
838 |
|
} |
839 |
|
|
840 |
|
|
841 |
|
|
842 |
|
|
843 |
|
|
844 |
|
|
845 |
|
@param |
846 |
|
@return |
847 |
|
|
|
|
| 75% |
Uncovered Elements: 3 (12) |
Complexity: 3 |
Complexity Density: 0.38 |
|
848 |
3 |
private VCFMap getContigMap(SequenceI seq)... |
849 |
|
{ |
850 |
3 |
String id = seq.getName(); |
851 |
3 |
SAMSequenceRecord contig = dictionary.getSequence(id); |
852 |
3 |
if (contig != null) |
853 |
|
{ |
854 |
3 |
int len = seq.getLength(); |
855 |
3 |
if (len == contig.getSequenceLength()) |
856 |
|
{ |
857 |
3 |
MapList map = new MapList(new int[] { 1, len }, |
858 |
|
new int[] |
859 |
|
{ 1, len }, 1, 1); |
860 |
3 |
return new VCFMap(id, map); |
861 |
|
} |
862 |
|
} |
863 |
0 |
return null; |
864 |
|
} |
865 |
|
|
866 |
|
|
867 |
|
|
868 |
|
|
869 |
|
|
870 |
|
|
871 |
|
@param |
872 |
|
@param |
873 |
|
|
874 |
|
@return |
875 |
|
|
|
|
| 90.9% |
Uncovered Elements: 2 (22) |
Complexity: 4 |
Complexity Density: 0.22 |
|
876 |
24 |
protected int addVcfVariants(SequenceI seq, VCFMap map)... |
877 |
|
{ |
878 |
24 |
boolean forwardStrand = map.map.isToForwardStrand(); |
879 |
|
|
880 |
|
|
881 |
|
|
882 |
|
|
883 |
24 |
int count = 0; |
884 |
|
|
885 |
24 |
for (int[] range : map.map.getToRanges()) |
886 |
|
{ |
887 |
39 |
int vcfStart = Math.min(range[0], range[1]); |
888 |
39 |
int vcfEnd = Math.max(range[0], range[1]); |
889 |
39 |
try |
890 |
|
{ |
891 |
39 |
CloseableIterator<VariantContext> variants = reader |
892 |
|
.query(map.chromosome, vcfStart, vcfEnd); |
893 |
75 |
while (variants.hasNext()) |
894 |
|
{ |
895 |
36 |
VariantContext variant = variants.next(); |
896 |
|
|
897 |
36 |
int[] featureRange = map.map.locateInFrom(variant.getStart(), |
898 |
|
variant.getEnd()); |
899 |
|
|
900 |
36 |
if (featureRange != null) |
901 |
|
{ |
902 |
34 |
int featureStart = Math.min(featureRange[0], featureRange[1]); |
903 |
34 |
int featureEnd = Math.max(featureRange[0], featureRange[1]); |
904 |
34 |
count += addAlleleFeatures(seq, variant, featureStart, |
905 |
|
featureEnd, forwardStrand); |
906 |
|
} |
907 |
|
} |
908 |
39 |
variants.close(); |
909 |
|
} catch (TribbleException e) |
910 |
|
{ |
911 |
|
|
912 |
|
|
913 |
|
|
914 |
0 |
String msg = String.format("Error reading VCF for %s:%d-%d: %s ", |
915 |
|
map.chromosome, vcfStart, vcfEnd,e.getLocalizedMessage()); |
916 |
0 |
Cache.log.error(msg); |
917 |
|
} |
918 |
|
} |
919 |
|
|
920 |
24 |
return count; |
921 |
|
} |
922 |
|
|
923 |
|
|
924 |
|
|
925 |
|
|
926 |
|
@param |
927 |
|
@param |
928 |
|
@param |
929 |
|
@return |
930 |
|
|
|
|
| 80% |
Uncovered Elements: 2 (10) |
Complexity: 3 |
Complexity Density: 0.5 |
|
931 |
100 |
protected String getAttributeValue(VariantContext variant,... |
932 |
|
String attributeName, int alleleIndex) |
933 |
|
{ |
934 |
100 |
Object att = variant.getAttribute(attributeName); |
935 |
|
|
936 |
100 |
if (att instanceof String) |
937 |
|
{ |
938 |
39 |
return (String) att; |
939 |
|
} |
940 |
61 |
else if (att instanceof ArrayList) |
941 |
|
{ |
942 |
61 |
return ((List<String>) att).get(alleleIndex); |
943 |
|
} |
944 |
|
|
945 |
0 |
return null; |
946 |
|
} |
947 |
|
|
948 |
|
|
949 |
|
|
950 |
|
|
951 |
|
|
952 |
|
@param |
953 |
|
@param |
954 |
|
@param |
955 |
|
@param |
956 |
|
@param |
957 |
|
@return |
958 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (7) |
Complexity: 2 |
Complexity Density: 0.4 |
|
959 |
34 |
protected int addAlleleFeatures(SequenceI seq, VariantContext variant,... |
960 |
|
int featureStart, int featureEnd, boolean forwardStrand) |
961 |
|
{ |
962 |
34 |
int added = 0; |
963 |
|
|
964 |
|
|
965 |
|
|
966 |
|
|
967 |
|
|
968 |
34 |
int altAlleleCount = variant.getAlternateAlleles().size(); |
969 |
85 |
for (int i = 0; i < altAlleleCount; i++) |
970 |
|
{ |
971 |
51 |
added += addAlleleFeature(seq, variant, i, featureStart, featureEnd, |
972 |
|
forwardStrand); |
973 |
|
} |
974 |
34 |
return added; |
975 |
|
} |
976 |
|
|
977 |
|
|
978 |
|
|
979 |
|
|
980 |
|
|
981 |
|
|
982 |
|
|
983 |
|
@param |
984 |
|
@param |
985 |
|
@param |
986 |
|
|
987 |
|
@param |
988 |
|
@param |
989 |
|
@param |
990 |
|
@return |
991 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (37) |
Complexity: 7 |
Complexity Density: 0.24 |
|
992 |
51 |
protected int addAlleleFeature(SequenceI seq, VariantContext variant,... |
993 |
|
int altAlleleIndex, int featureStart, int featureEnd, |
994 |
|
boolean forwardStrand) |
995 |
|
{ |
996 |
51 |
String reference = variant.getReference().getBaseString(); |
997 |
51 |
Allele alt = variant.getAlternateAllele(altAlleleIndex); |
998 |
51 |
String allele = alt.getBaseString(); |
999 |
|
|
1000 |
|
|
1001 |
|
|
1002 |
|
|
1003 |
|
|
1004 |
51 |
int referenceLength = reference.length(); |
1005 |
51 |
if (!forwardStrand && allele.length() > referenceLength |
1006 |
|
&& allele.startsWith(reference)) |
1007 |
|
{ |
1008 |
4 |
featureStart -= referenceLength; |
1009 |
4 |
featureEnd = featureStart; |
1010 |
4 |
char insertAfter = seq.getCharAt(featureStart - seq.getStart()); |
1011 |
4 |
reference = Dna.reverseComplement(String.valueOf(insertAfter)); |
1012 |
4 |
allele = allele.substring(referenceLength) + reference; |
1013 |
|
} |
1014 |
|
|
1015 |
|
|
1016 |
|
|
1017 |
|
|
1018 |
|
|
1019 |
51 |
StringBuilder sb = new StringBuilder(); |
1020 |
51 |
sb.append(forwardStrand ? reference : Dna.reverseComplement(reference)); |
1021 |
51 |
sb.append(COMMA); |
1022 |
51 |
sb.append(forwardStrand ? allele : Dna.reverseComplement(allele)); |
1023 |
51 |
String alleles = sb.toString(); |
1024 |
|
|
1025 |
|
|
1026 |
|
|
1027 |
|
|
1028 |
|
|
1029 |
51 |
String consequence = getConsequenceForAlleleAndFeature(variant, CSQ_FIELD, |
1030 |
|
altAlleleIndex, csqAlleleFieldIndex, |
1031 |
|
csqAlleleNumberFieldIndex, seq.getName().toLowerCase(), |
1032 |
|
csqFeatureFieldIndex); |
1033 |
|
|
1034 |
|
|
1035 |
|
|
1036 |
|
|
1037 |
51 |
String type = SequenceOntologyI.SEQUENCE_VARIANT; |
1038 |
51 |
if (consequence != null) |
1039 |
|
{ |
1040 |
7 |
type = getOntologyTerm(consequence); |
1041 |
|
} |
1042 |
|
|
1043 |
51 |
SequenceFeature sf = new SequenceFeature(type, alleles, featureStart, |
1044 |
|
featureEnd, FEATURE_GROUP_VCF); |
1045 |
51 |
sf.setSource(sourceId); |
1046 |
|
|
1047 |
|
|
1048 |
|
|
1049 |
|
|
1050 |
|
|
1051 |
51 |
addFeatureAttribute(sf, Gff3Helper.ALLELES, alleles); |
1052 |
|
|
1053 |
|
|
1054 |
|
|
1055 |
|
|
1056 |
51 |
addFeatureAttribute(sf, VCF_POS, String.valueOf(variant.getStart())); |
1057 |
51 |
addFeatureAttribute(sf, VCF_ID, variant.getID()); |
1058 |
51 |
addFeatureAttribute(sf, VCF_QUAL, |
1059 |
|
String.valueOf(variant.getPhredScaledQual())); |
1060 |
51 |
addFeatureAttribute(sf, VCF_FILTER, getFilter(variant)); |
1061 |
|
|
1062 |
51 |
addAlleleProperties(variant, sf, altAlleleIndex, consequence); |
1063 |
|
|
1064 |
51 |
seq.addSequenceFeature(sf); |
1065 |
|
|
1066 |
51 |
return 1; |
1067 |
|
} |
1068 |
|
|
1069 |
|
|
1070 |
|
|
1071 |
|
|
1072 |
|
|
1073 |
|
|
1074 |
|
|
1075 |
|
@param |
1076 |
|
@return |
1077 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (17) |
Complexity: 4 |
Complexity Density: 0.36 |
|
1078 |
51 |
String getFilter(VariantContext variant)... |
1079 |
|
{ |
1080 |
51 |
Set<String> filters = variant.getFilters(); |
1081 |
51 |
if (filters.isEmpty()) |
1082 |
|
{ |
1083 |
21 |
return NO_VALUE; |
1084 |
|
} |
1085 |
30 |
Iterator<String> iterator = filters.iterator(); |
1086 |
30 |
String first = iterator.next(); |
1087 |
30 |
if (filters.size() == 1) |
1088 |
|
{ |
1089 |
11 |
return first; |
1090 |
|
} |
1091 |
|
|
1092 |
19 |
StringBuilder sb = new StringBuilder(first); |
1093 |
38 |
while (iterator.hasNext()) |
1094 |
|
{ |
1095 |
19 |
sb.append(";").append(iterator.next()); |
1096 |
|
} |
1097 |
|
|
1098 |
19 |
return sb.toString(); |
1099 |
|
} |
1100 |
|
|
1101 |
|
|
1102 |
|
|
1103 |
|
|
1104 |
|
@param |
1105 |
|
@param |
1106 |
|
@param |
1107 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (4) |
Complexity: 4 |
Complexity Density: 2 |
|
1108 |
347 |
void addFeatureAttribute(SequenceFeature sf, String key, String value)... |
1109 |
|
{ |
1110 |
347 |
if (value != null && !value.isEmpty() && !NO_VALUE.equals(value)) |
1111 |
|
{ |
1112 |
275 |
sf.setValue(key, value); |
1113 |
|
} |
1114 |
|
} |
1115 |
|
|
1116 |
|
|
1117 |
|
|
1118 |
|
|
1119 |
|
|
1120 |
|
|
1121 |
|
|
1122 |
|
|
1123 |
|
|
1124 |
|
|
1125 |
|
@param |
1126 |
|
@return |
1127 |
|
@see |
1128 |
|
|
|
|
| 68.2% |
Uncovered Elements: 7 (22) |
Complexity: 6 |
Complexity Density: 0.5 |
|
1129 |
7 |
String getOntologyTerm(String consequence)... |
1130 |
|
{ |
1131 |
7 |
String type = SequenceOntologyI.SEQUENCE_VARIANT; |
1132 |
|
|
1133 |
|
|
1134 |
|
|
1135 |
|
|
1136 |
|
|
1137 |
7 |
if (csqAlleleFieldIndex == -1) |
1138 |
|
{ |
1139 |
|
|
1140 |
|
|
1141 |
|
|
1142 |
0 |
return type; |
1143 |
|
} |
1144 |
|
|
1145 |
7 |
if (consequence != null) |
1146 |
|
{ |
1147 |
7 |
String[] csqFields = consequence.split(PIPE_REGEX); |
1148 |
7 |
if (csqFields.length > csqConsequenceFieldIndex) |
1149 |
|
{ |
1150 |
7 |
type = csqFields[csqConsequenceFieldIndex]; |
1151 |
|
} |
1152 |
|
} |
1153 |
|
else |
1154 |
|
{ |
1155 |
|
|
1156 |
|
} |
1157 |
|
|
1158 |
|
|
1159 |
|
|
1160 |
|
|
1161 |
|
|
1162 |
7 |
if (type != null) |
1163 |
|
{ |
1164 |
7 |
int pos = type.indexOf('&'); |
1165 |
7 |
if (pos > 0) |
1166 |
|
{ |
1167 |
0 |
type = type.substring(0, pos); |
1168 |
|
} |
1169 |
|
} |
1170 |
7 |
return type; |
1171 |
|
} |
1172 |
|
|
1173 |
|
|
1174 |
|
|
1175 |
|
|
1176 |
|
|
1177 |
|
|
1178 |
|
|
1179 |
|
|
1180 |
|
|
1181 |
|
|
1182 |
|
|
1183 |
|
|
1184 |
|
|
1185 |
|
|
1186 |
|
@param |
1187 |
|
@param |
1188 |
|
@param |
1189 |
|
@param |
1190 |
|
@param |
1191 |
|
@param |
1192 |
|
@param |
1193 |
|
@return |
1194 |
|
|
|
|
| 87.5% |
Uncovered Elements: 3 (24) |
Complexity: 9 |
Complexity Density: 0.64 |
|
1195 |
51 |
private String getConsequenceForAlleleAndFeature(VariantContext variant,... |
1196 |
|
String vcfInfoId, int altAlleleIndex, int alleleFieldIndex, |
1197 |
|
int alleleNumberFieldIndex, |
1198 |
|
String seqName, int featureFieldIndex) |
1199 |
|
{ |
1200 |
51 |
if (alleleFieldIndex == -1 || featureFieldIndex == -1) |
1201 |
|
{ |
1202 |
37 |
return null; |
1203 |
|
} |
1204 |
14 |
Object value = variant.getAttribute(vcfInfoId); |
1205 |
|
|
1206 |
14 |
if (value == null || !(value instanceof List<?>)) |
1207 |
|
{ |
1208 |
0 |
return null; |
1209 |
|
} |
1210 |
|
|
1211 |
|
|
1212 |
|
|
1213 |
|
|
1214 |
|
|
1215 |
14 |
List<String> consequences = (List<String>) value; |
1216 |
|
|
1217 |
14 |
for (String consequence : consequences) |
1218 |
|
{ |
1219 |
41 |
String[] csqFields = consequence.split(PIPE_REGEX); |
1220 |
41 |
if (csqFields.length > featureFieldIndex) |
1221 |
|
{ |
1222 |
41 |
String featureIdentifier = csqFields[featureFieldIndex]; |
1223 |
41 |
if (featureIdentifier.length() > 4 |
1224 |
|
&& seqName.indexOf(featureIdentifier.toLowerCase()) > -1) |
1225 |
|
{ |
1226 |
|
|
1227 |
|
|
1228 |
|
|
1229 |
10 |
if (matchAllele(variant, altAlleleIndex, csqFields, |
1230 |
|
alleleFieldIndex, alleleNumberFieldIndex)) |
1231 |
|
{ |
1232 |
7 |
return consequence; |
1233 |
|
} |
1234 |
|
} |
1235 |
|
} |
1236 |
|
} |
1237 |
7 |
return null; |
1238 |
|
} |
1239 |
|
|
|
|
| 43.8% |
Uncovered Elements: 9 (16) |
Complexity: 5 |
Complexity Density: 0.5 |
|
1240 |
10 |
private boolean matchAllele(VariantContext variant, int altAlleleIndex,... |
1241 |
|
String[] csqFields, int alleleFieldIndex, |
1242 |
|
int alleleNumberFieldIndex) |
1243 |
|
{ |
1244 |
|
|
1245 |
|
|
1246 |
|
|
1247 |
|
|
1248 |
10 |
if (alleleNumberFieldIndex > -1) |
1249 |
|
{ |
1250 |
0 |
if (csqFields.length <= alleleNumberFieldIndex) |
1251 |
|
{ |
1252 |
0 |
return false; |
1253 |
|
} |
1254 |
0 |
String alleleNum = csqFields[alleleNumberFieldIndex]; |
1255 |
0 |
return String.valueOf(altAlleleIndex + 1).equals(alleleNum); |
1256 |
|
} |
1257 |
|
|
1258 |
|
|
1259 |
|
|
1260 |
|
|
1261 |
10 |
if (alleleFieldIndex > -1 && csqFields.length > alleleFieldIndex) |
1262 |
|
{ |
1263 |
10 |
String csqAllele = csqFields[alleleFieldIndex]; |
1264 |
10 |
String vcfAllele = variant.getAlternateAllele(altAlleleIndex) |
1265 |
|
.getBaseString(); |
1266 |
10 |
return csqAllele.equals(vcfAllele); |
1267 |
|
} |
1268 |
0 |
return false; |
1269 |
|
} |
1270 |
|
|
1271 |
|
|
1272 |
|
|
1273 |
|
|
1274 |
|
@param |
1275 |
|
@param |
1276 |
|
@param |
1277 |
|
|
1278 |
|
@param |
1279 |
|
|
1280 |
|
|
1281 |
|
|
|
|
| 93.9% |
Uncovered Elements: 2 (33) |
Complexity: 8 |
Complexity Density: 0.38 |
|
1282 |
51 |
protected void addAlleleProperties(VariantContext variant,... |
1283 |
|
SequenceFeature sf, final int altAlelleIndex, String consequence) |
1284 |
|
{ |
1285 |
51 |
Map<String, Object> atts = variant.getAttributes(); |
1286 |
|
|
1287 |
51 |
for (Entry<String, Object> att : atts.entrySet()) |
1288 |
|
{ |
1289 |
174 |
String key = att.getKey(); |
1290 |
|
|
1291 |
|
|
1292 |
|
|
1293 |
|
|
1294 |
|
|
1295 |
174 |
if (CSQ_FIELD.equals(key)) |
1296 |
|
{ |
1297 |
14 |
addConsequences(variant, sf, consequence); |
1298 |
14 |
continue; |
1299 |
|
} |
1300 |
|
|
1301 |
|
|
1302 |
|
|
1303 |
|
|
1304 |
160 |
if (!vcfFieldsOfInterest.contains(key)) |
1305 |
|
{ |
1306 |
46 |
continue; |
1307 |
|
} |
1308 |
|
|
1309 |
|
|
1310 |
|
|
1311 |
|
|
1312 |
|
|
1313 |
|
|
1314 |
114 |
VCFInfoHeaderLine infoHeader = header.getInfoHeaderLine(key); |
1315 |
114 |
if (infoHeader == null) |
1316 |
|
{ |
1317 |
|
|
1318 |
|
|
1319 |
|
|
1320 |
|
|
1321 |
0 |
continue; |
1322 |
|
} |
1323 |
|
|
1324 |
114 |
VCFHeaderLineCount number = infoHeader.getCountType(); |
1325 |
114 |
int index = altAlelleIndex; |
1326 |
114 |
if (number == VCFHeaderLineCount.R) |
1327 |
|
{ |
1328 |
|
|
1329 |
|
|
1330 |
|
|
1331 |
|
|
1332 |
14 |
index++; |
1333 |
|
} |
1334 |
100 |
else if (number != VCFHeaderLineCount.A) |
1335 |
|
{ |
1336 |
|
|
1337 |
|
|
1338 |
|
|
1339 |
14 |
continue; |
1340 |
|
} |
1341 |
|
|
1342 |
|
|
1343 |
|
|
1344 |
|
|
1345 |
100 |
String value = getAttributeValue(variant, key, index); |
1346 |
100 |
if (value != null && isValid(variant, key, value)) |
1347 |
|
{ |
1348 |
|
|
1349 |
|
|
1350 |
|
|
1351 |
|
|
1352 |
92 |
value = StringUtils.urlDecode(value, VCF_ENCODABLE); |
1353 |
92 |
addFeatureAttribute(sf, key, value); |
1354 |
|
} |
1355 |
|
} |
1356 |
|
} |
1357 |
|
|
1358 |
|
|
1359 |
|
|
1360 |
|
|
1361 |
|
|
1362 |
|
|
1363 |
|
@param |
1364 |
|
@param |
1365 |
|
@param |
1366 |
|
@return |
1367 |
|
|
|
|
| 82.6% |
Uncovered Elements: 4 (23) |
Complexity: 8 |
Complexity Density: 0.53 |
|
1368 |
100 |
protected boolean isValid(VariantContext variant, String infoId,... |
1369 |
|
String value) |
1370 |
|
{ |
1371 |
100 |
if (value == null || value.isEmpty() || NO_VALUE.equals(value)) |
1372 |
|
{ |
1373 |
8 |
return true; |
1374 |
|
} |
1375 |
92 |
VCFInfoHeaderLine infoHeader = header.getInfoHeaderLine(infoId); |
1376 |
92 |
if (infoHeader == null) |
1377 |
|
{ |
1378 |
0 |
Cache.log.error("Field " + infoId + " has no INFO header"); |
1379 |
0 |
return false; |
1380 |
|
} |
1381 |
92 |
VCFHeaderLineType infoType = infoHeader.getType(); |
1382 |
92 |
try |
1383 |
|
{ |
1384 |
92 |
if (infoType == VCFHeaderLineType.Integer) |
1385 |
|
{ |
1386 |
27 |
Integer.parseInt(value); |
1387 |
|
} |
1388 |
65 |
else if (infoType == VCFHeaderLineType.Float) |
1389 |
|
{ |
1390 |
65 |
Float.parseFloat(value); |
1391 |
|
} |
1392 |
|
} catch (NumberFormatException e) |
1393 |
|
{ |
1394 |
8 |
logInvalidValue(variant, infoId, value); |
1395 |
8 |
return false; |
1396 |
|
} |
1397 |
84 |
return true; |
1398 |
|
} |
1399 |
|
|
1400 |
|
|
1401 |
|
|
1402 |
|
|
1403 |
|
|
1404 |
|
@param |
1405 |
|
@param |
1406 |
|
@param |
1407 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (10) |
Complexity: 3 |
Complexity Density: 0.5 |
|
1408 |
8 |
private void logInvalidValue(VariantContext variant, String infoId,... |
1409 |
|
String value) |
1410 |
|
{ |
1411 |
8 |
if (badData == null) |
1412 |
|
{ |
1413 |
2 |
badData = new HashSet<>(); |
1414 |
|
} |
1415 |
8 |
String token = infoId + ":" + value; |
1416 |
8 |
if (!badData.contains(token)) |
1417 |
|
{ |
1418 |
4 |
badData.add(token); |
1419 |
4 |
Cache.log.error(String.format("Invalid VCF data at %s:%d %s=%s", |
1420 |
|
variant.getContig(), variant.getStart(), infoId, value)); |
1421 |
|
} |
1422 |
|
} |
1423 |
|
|
1424 |
|
|
1425 |
|
|
1426 |
|
|
1427 |
|
|
1428 |
|
|
1429 |
|
|
1430 |
|
|
1431 |
|
|
1432 |
|
@param |
1433 |
|
@param |
1434 |
|
@param |
1435 |
|
|
|
|
| 82.1% |
Uncovered Elements: 5 (28) |
Complexity: 9 |
Complexity Density: 0.5 |
|
1436 |
14 |
protected void addConsequences(VariantContext variant, SequenceFeature sf,... |
1437 |
|
String myConsequence) |
1438 |
|
{ |
1439 |
14 |
Object value = variant.getAttribute(CSQ_FIELD); |
1440 |
|
|
1441 |
14 |
if (value == null || !(value instanceof List<?>)) |
1442 |
|
{ |
1443 |
0 |
return; |
1444 |
|
} |
1445 |
|
|
1446 |
14 |
List<String> consequences = (List<String>) value; |
1447 |
|
|
1448 |
|
|
1449 |
|
|
1450 |
|
|
1451 |
|
|
1452 |
14 |
Map<String, String> csqValues = new HashMap<>(); |
1453 |
|
|
1454 |
14 |
for (String consequence : consequences) |
1455 |
|
{ |
1456 |
50 |
if (myConsequence == null || myConsequence.equals(consequence)) |
1457 |
|
{ |
1458 |
31 |
String[] csqFields = consequence.split(PIPE_REGEX); |
1459 |
|
|
1460 |
|
|
1461 |
|
|
1462 |
|
|
1463 |
|
|
1464 |
31 |
int i = 0; |
1465 |
31 |
for (String field : csqFields) |
1466 |
|
{ |
1467 |
279 |
if (field != null && field.length() > 0) |
1468 |
|
{ |
1469 |
279 |
String id = vepFieldsOfInterest.get(i); |
1470 |
279 |
if (id != null) |
1471 |
|
{ |
1472 |
|
|
1473 |
|
|
1474 |
|
|
1475 |
|
|
1476 |
279 |
field = StringUtils.urlDecode(field, VCF_ENCODABLE); |
1477 |
279 |
csqValues.put(id, field); |
1478 |
|
} |
1479 |
|
} |
1480 |
279 |
i++; |
1481 |
|
} |
1482 |
|
} |
1483 |
|
} |
1484 |
|
|
1485 |
14 |
if (!csqValues.isEmpty()) |
1486 |
|
{ |
1487 |
14 |
sf.setValue(CSQ_FIELD, csqValues); |
1488 |
|
} |
1489 |
|
} |
1490 |
|
|
1491 |
|
|
1492 |
|
|
1493 |
|
|
1494 |
|
|
1495 |
|
@param |
1496 |
|
@return |
1497 |
|
|
|
|
| 0% |
Uncovered Elements: 1 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
1498 |
0 |
protected String complement(byte[] reference)... |
1499 |
|
{ |
1500 |
0 |
return String.valueOf(Dna.getComplement((char) reference[0])); |
1501 |
|
} |
1502 |
|
|
1503 |
|
|
1504 |
|
|
1505 |
|
|
1506 |
|
|
1507 |
|
|
1508 |
|
|
1509 |
|
|
1510 |
|
|
1511 |
|
|
1512 |
|
|
1513 |
|
|
1514 |
|
@param |
1515 |
|
|
1516 |
|
@param |
1517 |
|
@param |
1518 |
|
@param |
1519 |
|
|
1520 |
|
@param |
1521 |
|
|
1522 |
|
@return |
1523 |
|
|
|
|
| 0% |
Uncovered Elements: 18 (18) |
Complexity: 4 |
Complexity Density: 0.33 |
|
1524 |
0 |
protected int[] mapReferenceRange(int[] queryRange, String chromosome,... |
1525 |
|
String species, String fromRef, String toRef) |
1526 |
|
{ |
1527 |
|
|
1528 |
|
|
1529 |
|
|
1530 |
|
|
1531 |
0 |
int[] mappedRange = findSubsumedRangeMapping(queryRange, chromosome, |
1532 |
|
species, fromRef, toRef); |
1533 |
0 |
if (mappedRange != null) |
1534 |
|
{ |
1535 |
0 |
return mappedRange; |
1536 |
|
} |
1537 |
|
|
1538 |
|
|
1539 |
|
|
1540 |
|
|
1541 |
0 |
EnsemblMap mapper = new EnsemblMap(); |
1542 |
0 |
int[] mapping = mapper.getAssemblyMapping(species, chromosome, fromRef, |
1543 |
|
toRef, queryRange); |
1544 |
|
|
1545 |
0 |
if (mapping == null) |
1546 |
|
{ |
1547 |
|
|
1548 |
0 |
return null; |
1549 |
|
} |
1550 |
|
|
1551 |
|
|
1552 |
|
|
1553 |
|
|
1554 |
0 |
String key = makeRangesKey(chromosome, species, fromRef, toRef); |
1555 |
0 |
if (!assemblyMappings.containsKey(key)) |
1556 |
|
{ |
1557 |
0 |
assemblyMappings.put(key, new HashMap<int[], int[]>()); |
1558 |
|
} |
1559 |
|
|
1560 |
0 |
assemblyMappings.get(key).put(queryRange, mapping); |
1561 |
|
|
1562 |
0 |
return mapping; |
1563 |
|
} |
1564 |
|
|
1565 |
|
|
1566 |
|
|
1567 |
|
|
1568 |
|
|
1569 |
|
|
1570 |
|
|
1571 |
|
|
1572 |
|
|
1573 |
|
|
1574 |
|
|
1575 |
|
|
1576 |
|
|
1577 |
|
|
1578 |
|
|
1579 |
|
@param |
1580 |
|
@param |
1581 |
|
@param |
1582 |
|
@param |
1583 |
|
@param |
1584 |
|
@return |
1585 |
|
|
|
|
| 0% |
Uncovered Elements: 19 (19) |
Complexity: 4 |
Complexity Density: 0.31 |
|
1586 |
0 |
protected int[] findSubsumedRangeMapping(int[] queryRange, String chromosome,... |
1587 |
|
String species, String fromRef, String toRef) |
1588 |
|
{ |
1589 |
0 |
String key = makeRangesKey(chromosome, species, fromRef, toRef); |
1590 |
0 |
if (assemblyMappings.containsKey(key)) |
1591 |
|
{ |
1592 |
0 |
Map<int[], int[]> mappedRanges = assemblyMappings.get(key); |
1593 |
0 |
for (Entry<int[], int[]> mappedRange : mappedRanges.entrySet()) |
1594 |
|
{ |
1595 |
0 |
int[] fromRange = mappedRange.getKey(); |
1596 |
0 |
int[] toRange = mappedRange.getValue(); |
1597 |
0 |
if (fromRange[1] - fromRange[0] == toRange[1] - toRange[0]) |
1598 |
|
{ |
1599 |
|
|
1600 |
|
|
1601 |
|
|
1602 |
0 |
if (MappingUtils.rangeContains(fromRange, queryRange)) |
1603 |
|
{ |
1604 |
|
|
1605 |
|
|
1606 |
|
|
1607 |
0 |
int offset = queryRange[0] - fromRange[0]; |
1608 |
0 |
int mappedRangeFrom = toRange[0] + offset; |
1609 |
0 |
int mappedRangeTo = mappedRangeFrom + (queryRange[1] - queryRange[0]); |
1610 |
0 |
return new int[] { mappedRangeFrom, mappedRangeTo }; |
1611 |
|
} |
1612 |
|
} |
1613 |
|
} |
1614 |
|
} |
1615 |
0 |
return null; |
1616 |
|
} |
1617 |
|
|
1618 |
|
|
1619 |
|
|
1620 |
|
|
1621 |
|
|
1622 |
|
|
1623 |
|
@param |
1624 |
|
@param |
1625 |
|
@param |
1626 |
|
|
1627 |
|
|
|
|
| 0% |
Uncovered Elements: 9 (9) |
Complexity: 2 |
Complexity Density: 0.29 |
|
1628 |
0 |
protected void transferFeature(SequenceFeature sf,... |
1629 |
|
SequenceI targetSequence, MapList mapping) |
1630 |
|
{ |
1631 |
0 |
int[] mappedRange = mapping.locateInTo(sf.getBegin(), sf.getEnd()); |
1632 |
|
|
1633 |
0 |
if (mappedRange != null) |
1634 |
|
{ |
1635 |
0 |
String group = sf.getFeatureGroup(); |
1636 |
0 |
int newBegin = Math.min(mappedRange[0], mappedRange[1]); |
1637 |
0 |
int newEnd = Math.max(mappedRange[0], mappedRange[1]); |
1638 |
0 |
SequenceFeature copy = new SequenceFeature(sf, newBegin, newEnd, |
1639 |
|
group, sf.getScore()); |
1640 |
0 |
targetSequence.addSequenceFeature(copy); |
1641 |
|
} |
1642 |
|
} |
1643 |
|
|
1644 |
|
|
1645 |
|
|
1646 |
|
|
1647 |
|
@param |
1648 |
|
@param |
1649 |
|
@param |
1650 |
|
@param |
1651 |
|
@return |
1652 |
|
|
|
|
| 0% |
Uncovered Elements: 1 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
1653 |
0 |
protected static String makeRangesKey(String chromosome, String species,... |
1654 |
|
String fromRef, String toRef) |
1655 |
|
{ |
1656 |
0 |
return species + EXCL + chromosome + EXCL + fromRef + EXCL |
1657 |
|
+ toRef; |
1658 |
|
} |
1659 |
|
} |