1 |
|
|
2 |
|
|
3 |
|
|
4 |
|
|
5 |
|
|
6 |
|
|
7 |
|
|
8 |
|
|
9 |
|
|
10 |
|
|
11 |
|
|
12 |
|
|
13 |
|
|
14 |
|
|
15 |
|
|
16 |
|
|
17 |
|
|
18 |
|
|
19 |
|
|
20 |
|
|
21 |
|
package jalview.io.vcf; |
22 |
|
|
23 |
|
import java.util.Locale; |
24 |
|
|
25 |
|
import java.io.File; |
26 |
|
import java.io.IOException; |
27 |
|
import java.util.ArrayList; |
28 |
|
import java.util.HashMap; |
29 |
|
import java.util.HashSet; |
30 |
|
import java.util.Iterator; |
31 |
|
import java.util.List; |
32 |
|
import java.util.Map; |
33 |
|
import java.util.Map.Entry; |
34 |
|
import java.util.Set; |
35 |
|
import java.util.regex.Pattern; |
36 |
|
import java.util.regex.PatternSyntaxException; |
37 |
|
|
38 |
|
import htsjdk.samtools.SAMException; |
39 |
|
import htsjdk.samtools.SAMSequenceDictionary; |
40 |
|
import htsjdk.samtools.SAMSequenceRecord; |
41 |
|
import htsjdk.samtools.util.CloseableIterator; |
42 |
|
import htsjdk.tribble.TribbleException; |
43 |
|
import htsjdk.variant.variantcontext.Allele; |
44 |
|
import htsjdk.variant.variantcontext.VariantContext; |
45 |
|
import htsjdk.variant.vcf.VCFConstants; |
46 |
|
import htsjdk.variant.vcf.VCFHeader; |
47 |
|
import htsjdk.variant.vcf.VCFHeaderLine; |
48 |
|
import htsjdk.variant.vcf.VCFHeaderLineCount; |
49 |
|
import htsjdk.variant.vcf.VCFHeaderLineType; |
50 |
|
import htsjdk.variant.vcf.VCFInfoHeaderLine; |
51 |
|
import jalview.analysis.Dna; |
52 |
|
import jalview.api.AlignViewControllerGuiI; |
53 |
|
import jalview.bin.Cache; |
54 |
|
import jalview.bin.Console; |
55 |
|
import jalview.datamodel.DBRefEntry; |
56 |
|
import jalview.datamodel.GeneLociI; |
57 |
|
import jalview.datamodel.Mapping; |
58 |
|
import jalview.datamodel.SequenceFeature; |
59 |
|
import jalview.datamodel.SequenceI; |
60 |
|
import jalview.datamodel.features.FeatureAttributeType; |
61 |
|
import jalview.datamodel.features.FeatureSource; |
62 |
|
import jalview.datamodel.features.FeatureSources; |
63 |
|
import jalview.ext.ensembl.EnsemblMap; |
64 |
|
import jalview.ext.htsjdk.HtsContigDb; |
65 |
|
import jalview.ext.htsjdk.VCFReader; |
66 |
|
import jalview.io.gff.Gff3Helper; |
67 |
|
import jalview.io.gff.SequenceOntologyI; |
68 |
|
import jalview.util.MapList; |
69 |
|
import jalview.util.MappingUtils; |
70 |
|
import jalview.util.MessageManager; |
71 |
|
import jalview.util.StringUtils; |
72 |
|
|
73 |
|
|
74 |
|
|
75 |
|
|
76 |
|
|
77 |
|
@author |
78 |
|
|
|
|
| 70% |
Uncovered Elements: 186 (620) |
Complexity: 151 |
Complexity Density: 0.37 |
|
79 |
|
public class VCFLoader |
80 |
|
{ |
81 |
|
private static final String VCF_ENCODABLE = ":;=%,"; |
82 |
|
|
83 |
|
|
84 |
|
|
85 |
|
|
86 |
|
private static final String VCF_POS = "POS"; |
87 |
|
|
88 |
|
private static final String VCF_ID = "ID"; |
89 |
|
|
90 |
|
private static final String VCF_QUAL = "QUAL"; |
91 |
|
|
92 |
|
private static final String VCF_FILTER = "FILTER"; |
93 |
|
|
94 |
|
private static final String NO_VALUE = VCFConstants.MISSING_VALUE_v4; |
95 |
|
|
96 |
|
private static final String DEFAULT_SPECIES = "homo_sapiens"; |
97 |
|
|
98 |
|
|
99 |
|
|
100 |
|
|
101 |
|
|
102 |
|
|
103 |
|
|
104 |
|
|
105 |
|
|
106 |
|
|
107 |
|
|
108 |
|
|
|
|
| 60% |
Uncovered Elements: 2 (5) |
Complexity: 2 |
Complexity Density: 0.67 |
|
109 |
|
class VCFMap |
110 |
|
{ |
111 |
|
final String chromosome; |
112 |
|
|
113 |
|
final MapList map; |
114 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (2) |
Complexity: 1 |
Complexity Density: 0.5 |
|
115 |
24 |
VCFMap(String chr, MapList m)... |
116 |
|
{ |
117 |
24 |
chromosome = chr; |
118 |
24 |
map = m; |
119 |
|
} |
120 |
|
|
|
|
| 0% |
Uncovered Elements: 1 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
121 |
0 |
@Override... |
122 |
|
public String toString() |
123 |
|
{ |
124 |
0 |
return chromosome + ":" + map.toString(); |
125 |
|
} |
126 |
|
} |
127 |
|
|
128 |
|
|
129 |
|
|
130 |
|
|
131 |
|
|
132 |
|
private static final String VEP_FIELDS_PREF = "VEP_FIELDS"; |
133 |
|
|
134 |
|
private static final String VCF_FIELDS_PREF = "VCF_FIELDS"; |
135 |
|
|
136 |
|
private static final String DEFAULT_VCF_FIELDS = ".*"; |
137 |
|
|
138 |
|
private static final String DEFAULT_VEP_FIELDS = ".*"; |
139 |
|
|
140 |
|
|
141 |
|
|
142 |
|
|
143 |
|
|
144 |
|
private static final String VCF_ASSEMBLY = "VCF_ASSEMBLY"; |
145 |
|
|
146 |
|
private static final String DEFAULT_VCF_ASSEMBLY = "assembly19=GRCh37,hs37=GRCh37,grch37=GRCh37,grch38=GRCh38"; |
147 |
|
|
148 |
|
private static final String VCF_SPECIES = "VCF_SPECIES"; |
149 |
|
|
150 |
|
private static final String DEFAULT_REFERENCE = "grch37"; |
151 |
|
|
152 |
|
|
153 |
|
|
154 |
|
|
155 |
|
|
156 |
|
|
157 |
|
private static final String CSQ_CONSEQUENCE_KEY = "Consequence"; |
158 |
|
|
159 |
|
private static final String CSQ_ALLELE_KEY = "Allele"; |
160 |
|
|
161 |
|
private static final String CSQ_ALLELE_NUM_KEY = "ALLELE_NUM"; |
162 |
|
|
163 |
|
|
164 |
|
private static final String CSQ_FEATURE_KEY = "Feature"; |
165 |
|
|
166 |
|
|
167 |
|
|
168 |
|
|
169 |
|
|
170 |
|
|
171 |
|
private static final String CSQ_FIELD = "CSQ"; |
172 |
|
|
173 |
|
|
174 |
|
|
175 |
|
|
176 |
|
private static final String PIPE_REGEX = "\\|"; |
177 |
|
|
178 |
|
|
179 |
|
|
180 |
|
|
181 |
|
private static final String COMMA = ","; |
182 |
|
|
183 |
|
|
184 |
|
|
185 |
|
|
186 |
|
private static final String FEATURE_GROUP_VCF = "VCF"; |
187 |
|
|
188 |
|
|
189 |
|
|
190 |
|
|
191 |
|
|
192 |
|
private static final String EXCL = "!"; |
193 |
|
|
194 |
|
|
195 |
|
|
196 |
|
|
197 |
|
protected String vcfFilePath; |
198 |
|
|
199 |
|
|
200 |
|
|
201 |
|
|
202 |
|
|
203 |
|
|
204 |
|
private Map<String, Map<int[], int[]>> assemblyMappings; |
205 |
|
|
206 |
|
private VCFReader reader; |
207 |
|
|
208 |
|
|
209 |
|
|
210 |
|
|
211 |
|
private VCFHeader header; |
212 |
|
|
213 |
|
|
214 |
|
|
215 |
|
|
216 |
|
private String vcfSpecies; |
217 |
|
|
218 |
|
|
219 |
|
|
220 |
|
|
221 |
|
private String vcfAssembly; |
222 |
|
|
223 |
|
|
224 |
|
|
225 |
|
|
226 |
|
private SAMSequenceDictionary dictionary; |
227 |
|
|
228 |
|
|
229 |
|
|
230 |
|
|
231 |
|
|
232 |
|
|
233 |
|
private int csqConsequenceFieldIndex = -1; |
234 |
|
|
235 |
|
private int csqAlleleFieldIndex = -1; |
236 |
|
|
237 |
|
private int csqAlleleNumberFieldIndex = -1; |
238 |
|
|
239 |
|
private int csqFeatureFieldIndex = -1; |
240 |
|
|
241 |
|
|
242 |
|
|
243 |
|
|
244 |
|
|
245 |
|
|
246 |
|
|
247 |
|
|
248 |
|
private String sourceId; |
249 |
|
|
250 |
|
|
251 |
|
|
252 |
|
|
253 |
|
|
254 |
|
List<String> vcfFieldsOfInterest; |
255 |
|
|
256 |
|
|
257 |
|
|
258 |
|
|
259 |
|
|
260 |
|
|
261 |
|
Map<Integer, String> vepFieldsOfInterest; |
262 |
|
|
263 |
|
|
264 |
|
|
265 |
|
|
266 |
|
|
267 |
|
private Set<String> badData; |
268 |
|
|
269 |
|
|
270 |
|
|
271 |
|
|
272 |
|
@param |
273 |
|
|
|
|
| 75% |
Uncovered Elements: 1 (4) |
Complexity: 2 |
Complexity Density: 0.5 |
|
274 |
4 |
public VCFLoader(String vcfFile)... |
275 |
|
{ |
276 |
4 |
try |
277 |
|
{ |
278 |
4 |
initialise(vcfFile); |
279 |
|
} catch (IOException e) |
280 |
|
{ |
281 |
0 |
jalview.bin.Console |
282 |
|
.errPrintln("Error opening VCF file: " + e.getMessage()); |
283 |
|
} |
284 |
|
|
285 |
|
|
286 |
4 |
assemblyMappings = new HashMap<>(); |
287 |
|
} |
288 |
|
|
289 |
|
|
290 |
|
|
291 |
|
|
292 |
|
|
293 |
|
|
294 |
|
|
295 |
|
|
296 |
|
@param |
297 |
|
@param |
298 |
|
|
|
|
| 0% |
Uncovered Elements: 5 (5) |
Complexity: 2 |
Complexity Density: 0.67 |
|
299 |
0 |
public void loadVCF(SequenceI[] seqs, final AlignViewControllerGuiI gui)... |
300 |
|
{ |
301 |
0 |
if (gui != null) |
302 |
|
{ |
303 |
0 |
gui.setStatus(MessageManager.getString("label.searching_vcf")); |
304 |
|
} |
305 |
|
|
306 |
0 |
new Thread() |
307 |
|
{ |
|
|
| 0% |
Uncovered Elements: 1 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
308 |
0 |
@Override... |
309 |
|
public void run() |
310 |
|
{ |
311 |
0 |
VCFLoader.this.doLoad(seqs, gui); |
312 |
|
} |
313 |
|
}.start(); |
314 |
|
} |
315 |
|
|
316 |
|
|
317 |
|
|
318 |
|
|
319 |
|
@param |
320 |
|
|
321 |
|
@return |
322 |
|
|
|
|
| 69.6% |
Uncovered Elements: 7 (23) |
Complexity: 4 |
Complexity Density: 0.24 |
|
323 |
3 |
public SequenceI loadVCFContig(String contig)... |
324 |
|
{ |
325 |
3 |
VCFHeaderLine headerLine = header |
326 |
|
.getOtherHeaderLine(VCFHeader.REFERENCE_KEY); |
327 |
3 |
if (headerLine == null) |
328 |
|
{ |
329 |
0 |
Console.error("VCF reference header not found"); |
330 |
0 |
return null; |
331 |
|
} |
332 |
3 |
String ref = headerLine.getValue(); |
333 |
3 |
if (ref.startsWith("file://")) |
334 |
|
{ |
335 |
0 |
ref = ref.substring(7); |
336 |
|
} |
337 |
3 |
setSpeciesAndAssembly(ref); |
338 |
|
|
339 |
3 |
SequenceI seq = null; |
340 |
3 |
File dbFile = new File(ref); |
341 |
|
|
342 |
3 |
if (dbFile.exists()) |
343 |
|
{ |
344 |
3 |
HtsContigDb db = new HtsContigDb("", dbFile); |
345 |
3 |
seq = db.getSequenceProxy(contig); |
346 |
3 |
loadSequenceVCF(seq); |
347 |
3 |
db.close(); |
348 |
|
} |
349 |
|
else |
350 |
|
{ |
351 |
0 |
Console.error("VCF reference not found: " + ref); |
352 |
|
} |
353 |
|
|
354 |
3 |
return seq; |
355 |
|
} |
356 |
|
|
357 |
|
|
358 |
|
|
359 |
|
|
360 |
|
@param |
361 |
|
@param |
362 |
|
|
363 |
|
|
|
|
| 60.5% |
Uncovered Elements: 15 (38) |
Complexity: 9 |
Complexity Density: 0.35 |
|
364 |
3 |
protected void doLoad(SequenceI[] seqs, AlignViewControllerGuiI gui)... |
365 |
|
{ |
366 |
3 |
try |
367 |
|
{ |
368 |
3 |
VCFHeaderLine ref = header |
369 |
|
.getOtherHeaderLine(VCFHeader.REFERENCE_KEY); |
370 |
3 |
String reference = ref == null ? null : ref.getValue(); |
371 |
|
|
372 |
3 |
setSpeciesAndAssembly(reference); |
373 |
|
|
374 |
3 |
int varCount = 0; |
375 |
3 |
int seqCount = 0; |
376 |
|
|
377 |
|
|
378 |
|
|
379 |
|
|
380 |
3 |
for (SequenceI seq : seqs) |
381 |
|
{ |
382 |
21 |
int added = loadSequenceVCF(seq); |
383 |
21 |
if (added > 0) |
384 |
|
{ |
385 |
11 |
seqCount++; |
386 |
11 |
varCount += added; |
387 |
11 |
transferAddedFeatures(seq); |
388 |
|
} |
389 |
|
} |
390 |
3 |
if (gui != null) |
391 |
|
{ |
392 |
0 |
String msg = MessageManager.formatMessage("label.added_vcf", |
393 |
|
varCount, seqCount); |
394 |
0 |
gui.setStatus(msg); |
395 |
0 |
if (gui.getFeatureSettingsUI() != null) |
396 |
|
{ |
397 |
0 |
gui.getFeatureSettingsUI().discoverAllFeatureData(); |
398 |
|
} |
399 |
|
} |
400 |
|
} catch (Throwable e) |
401 |
|
{ |
402 |
0 |
jalview.bin.Console |
403 |
|
.errPrintln("Error processing VCF: " + e.getMessage()); |
404 |
0 |
e.printStackTrace(); |
405 |
0 |
if (gui != null) |
406 |
|
{ |
407 |
0 |
gui.setStatus("Error occurred - see console for details"); |
408 |
|
} |
409 |
|
} finally |
410 |
|
{ |
411 |
3 |
if (reader != null) |
412 |
|
{ |
413 |
3 |
try |
414 |
|
{ |
415 |
3 |
reader.close(); |
416 |
|
} catch (IOException e) |
417 |
|
{ |
418 |
|
|
419 |
|
} |
420 |
|
} |
421 |
3 |
header = null; |
422 |
3 |
dictionary = null; |
423 |
|
} |
424 |
|
} |
425 |
|
|
426 |
|
|
427 |
|
|
428 |
|
|
429 |
|
|
430 |
|
|
431 |
|
|
432 |
|
|
433 |
|
|
434 |
|
|
435 |
|
|
436 |
|
@param |
437 |
|
@see |
438 |
|
@see |
439 |
|
@see |
440 |
|
|
|
|
| 53.1% |
Uncovered Elements: 15 (32) |
Complexity: 7 |
Complexity Density: 0.35 |
|
441 |
6 |
protected void setSpeciesAndAssembly(String reference)... |
442 |
|
{ |
443 |
6 |
if (reference == null) |
444 |
|
{ |
445 |
0 |
Console.error("No VCF ##reference found, defaulting to " |
446 |
|
+ DEFAULT_REFERENCE + ":" + DEFAULT_SPECIES); |
447 |
0 |
reference = DEFAULT_REFERENCE; |
448 |
|
} |
449 |
6 |
reference = reference.toLowerCase(Locale.ROOT); |
450 |
|
|
451 |
|
|
452 |
|
|
453 |
|
|
454 |
|
|
455 |
|
|
456 |
|
|
457 |
|
|
458 |
6 |
String prop = Cache.getDefault(VCF_ASSEMBLY, DEFAULT_VCF_ASSEMBLY); |
459 |
6 |
for (String token : prop.split(",")) |
460 |
|
{ |
461 |
6 |
String[] tokens = token.split("="); |
462 |
6 |
if (tokens.length == 2) |
463 |
|
{ |
464 |
6 |
if (reference.contains(tokens[0].trim().toLowerCase(Locale.ROOT))) |
465 |
|
{ |
466 |
3 |
vcfAssembly = tokens[1].trim(); |
467 |
3 |
break; |
468 |
|
} |
469 |
|
} |
470 |
|
} |
471 |
|
|
472 |
6 |
vcfSpecies = DEFAULT_SPECIES; |
473 |
6 |
prop = Cache.getProperty(VCF_SPECIES); |
474 |
6 |
if (prop != null) |
475 |
|
{ |
476 |
0 |
for (String token : prop.split(",")) |
477 |
|
{ |
478 |
0 |
String[] tokens = token.split("="); |
479 |
0 |
if (tokens.length == 2) |
480 |
|
{ |
481 |
0 |
if (reference.contains(tokens[0].trim().toLowerCase(Locale.ROOT))) |
482 |
|
{ |
483 |
0 |
vcfSpecies = tokens[1].trim(); |
484 |
0 |
break; |
485 |
|
} |
486 |
|
} |
487 |
|
} |
488 |
|
} |
489 |
|
} |
490 |
|
|
491 |
|
|
492 |
|
|
493 |
|
|
494 |
|
@param |
495 |
|
@throws |
496 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (8) |
Complexity: 2 |
Complexity Density: 0.25 |
|
497 |
4 |
private void initialise(String filePath) throws IOException... |
498 |
|
{ |
499 |
4 |
vcfFilePath = filePath; |
500 |
|
|
501 |
4 |
reader = new VCFReader(filePath); |
502 |
|
|
503 |
4 |
header = reader.getFileHeader(); |
504 |
|
|
505 |
4 |
try |
506 |
|
{ |
507 |
4 |
dictionary = header.getSequenceDictionary(); |
508 |
|
} catch (SAMException e) |
509 |
|
{ |
510 |
|
|
511 |
|
} |
512 |
|
|
513 |
4 |
sourceId = filePath; |
514 |
|
|
515 |
4 |
saveMetadata(sourceId); |
516 |
|
|
517 |
|
|
518 |
|
|
519 |
|
|
520 |
4 |
parseCsqHeader(); |
521 |
|
} |
522 |
|
|
523 |
|
|
524 |
|
|
525 |
|
|
526 |
|
|
527 |
|
@param |
528 |
|
|
|
|
| 77.4% |
Uncovered Elements: 7 (31) |
Complexity: 7 |
Complexity Density: 0.24 |
|
529 |
4 |
void saveMetadata(String theSourceId)... |
530 |
|
{ |
531 |
4 |
List<Pattern> vcfFieldPatterns = getFieldMatchers(VCF_FIELDS_PREF, |
532 |
|
DEFAULT_VCF_FIELDS); |
533 |
4 |
vcfFieldsOfInterest = new ArrayList<>(); |
534 |
|
|
535 |
4 |
FeatureSource metadata = new FeatureSource(theSourceId); |
536 |
|
|
537 |
4 |
for (VCFInfoHeaderLine info : header.getInfoHeaderLines()) |
538 |
|
{ |
539 |
13 |
String attributeId = info.getID(); |
540 |
13 |
String desc = info.getDescription(); |
541 |
13 |
VCFHeaderLineType type = info.getType(); |
542 |
13 |
FeatureAttributeType attType = null; |
543 |
13 |
switch (type) |
544 |
|
{ |
545 |
0 |
case Character: |
546 |
0 |
attType = FeatureAttributeType.Character; |
547 |
0 |
break; |
548 |
0 |
case Flag: |
549 |
0 |
attType = FeatureAttributeType.Flag; |
550 |
0 |
break; |
551 |
7 |
case Float: |
552 |
7 |
attType = FeatureAttributeType.Float; |
553 |
7 |
break; |
554 |
5 |
case Integer: |
555 |
5 |
attType = FeatureAttributeType.Integer; |
556 |
5 |
break; |
557 |
1 |
case String: |
558 |
1 |
attType = FeatureAttributeType.String; |
559 |
1 |
break; |
560 |
|
} |
561 |
13 |
metadata.setAttributeName(attributeId, desc); |
562 |
13 |
metadata.setAttributeType(attributeId, attType); |
563 |
|
|
564 |
13 |
if (isFieldWanted(attributeId, vcfFieldPatterns)) |
565 |
|
{ |
566 |
13 |
vcfFieldsOfInterest.add(attributeId); |
567 |
|
} |
568 |
|
} |
569 |
|
|
570 |
4 |
FeatureSources.getInstance().addSource(theSourceId, metadata); |
571 |
|
} |
572 |
|
|
573 |
|
|
574 |
|
|
575 |
|
|
576 |
|
|
577 |
|
|
578 |
|
@param |
579 |
|
@param |
580 |
|
@return |
581 |
|
|
|
|
| 66.7% |
Uncovered Elements: 2 (6) |
Complexity: 2 |
Complexity Density: 0.5 |
|
582 |
22 |
private boolean isFieldWanted(String id, List<Pattern> filters)... |
583 |
|
{ |
584 |
22 |
for (Pattern p : filters) |
585 |
|
{ |
586 |
22 |
if (p.matcher(id.toUpperCase(Locale.ROOT)).matches()) |
587 |
|
{ |
588 |
22 |
return true; |
589 |
|
} |
590 |
|
} |
591 |
0 |
return false; |
592 |
|
} |
593 |
|
|
594 |
|
|
595 |
|
|
596 |
|
|
597 |
|
|
598 |
|
|
599 |
|
|
600 |
|
|
601 |
|
|
602 |
|
|
|
|
| 89.2% |
Uncovered Elements: 4 (37) |
Complexity: 8 |
Complexity Density: 0.35 |
|
603 |
4 |
protected void parseCsqHeader()... |
604 |
|
{ |
605 |
4 |
List<Pattern> vepFieldFilters = getFieldMatchers(VEP_FIELDS_PREF, |
606 |
|
DEFAULT_VEP_FIELDS); |
607 |
4 |
vepFieldsOfInterest = new HashMap<>(); |
608 |
|
|
609 |
4 |
VCFInfoHeaderLine csqInfo = header.getInfoHeaderLine(CSQ_FIELD); |
610 |
4 |
if (csqInfo == null) |
611 |
|
{ |
612 |
3 |
return; |
613 |
|
} |
614 |
|
|
615 |
|
|
616 |
|
|
617 |
|
|
618 |
|
|
619 |
1 |
String desc = csqInfo.getDescription(); |
620 |
1 |
int spacePos = desc.lastIndexOf(" "); |
621 |
1 |
desc = desc.substring(spacePos + 1); |
622 |
|
|
623 |
1 |
if (desc != null) |
624 |
|
{ |
625 |
1 |
String[] format = desc.split(PIPE_REGEX); |
626 |
1 |
int index = 0; |
627 |
1 |
for (String field : format) |
628 |
|
{ |
629 |
9 |
if (CSQ_CONSEQUENCE_KEY.equals(field)) |
630 |
|
{ |
631 |
1 |
csqConsequenceFieldIndex = index; |
632 |
|
} |
633 |
9 |
if (CSQ_ALLELE_NUM_KEY.equals(field)) |
634 |
|
{ |
635 |
0 |
csqAlleleNumberFieldIndex = index; |
636 |
|
} |
637 |
9 |
if (CSQ_ALLELE_KEY.equals(field)) |
638 |
|
{ |
639 |
1 |
csqAlleleFieldIndex = index; |
640 |
|
} |
641 |
9 |
if (CSQ_FEATURE_KEY.equals(field)) |
642 |
|
{ |
643 |
1 |
csqFeatureFieldIndex = index; |
644 |
|
} |
645 |
|
|
646 |
9 |
if (isFieldWanted(field, vepFieldFilters)) |
647 |
|
{ |
648 |
9 |
vepFieldsOfInterest.put(index, field); |
649 |
|
} |
650 |
|
|
651 |
9 |
index++; |
652 |
|
} |
653 |
|
} |
654 |
|
} |
655 |
|
|
656 |
|
|
657 |
|
|
658 |
|
|
659 |
|
|
660 |
|
|
661 |
|
|
662 |
|
|
663 |
|
|
664 |
|
|
665 |
|
|
666 |
|
|
667 |
|
@param |
668 |
|
@param |
669 |
|
@return |
670 |
|
|
|
|
| 87.5% |
Uncovered Elements: 1 (8) |
Complexity: 2 |
Complexity Density: 0.25 |
|
671 |
8 |
private List<Pattern> getFieldMatchers(String key, String def)... |
672 |
|
{ |
673 |
8 |
String pref = Cache.getDefault(key, def); |
674 |
8 |
List<Pattern> patterns = new ArrayList<>(); |
675 |
8 |
String[] tokens = pref.split(","); |
676 |
8 |
for (String token : tokens) |
677 |
|
{ |
678 |
8 |
try |
679 |
|
{ |
680 |
8 |
patterns.add(Pattern.compile(token.toUpperCase(Locale.ROOT))); |
681 |
|
} catch (PatternSyntaxException e) |
682 |
|
{ |
683 |
0 |
jalview.bin.Console.errPrintln("Invalid pattern ignored: " + token); |
684 |
|
} |
685 |
|
} |
686 |
8 |
return patterns; |
687 |
|
} |
688 |
|
|
689 |
|
|
690 |
|
|
691 |
|
|
692 |
|
@param |
693 |
|
|
|
|
| 59.1% |
Uncovered Elements: 9 (22) |
Complexity: 6 |
Complexity Density: 0.43 |
|
694 |
11 |
protected void transferAddedFeatures(SequenceI seq)... |
695 |
|
{ |
696 |
11 |
List<DBRefEntry> dbrefs = seq.getDBRefs(); |
697 |
11 |
if (dbrefs == null) |
698 |
|
{ |
699 |
0 |
return; |
700 |
|
} |
701 |
11 |
for (DBRefEntry dbref : dbrefs) |
702 |
|
{ |
703 |
16 |
Mapping mapping = dbref.getMap(); |
704 |
16 |
if (mapping == null || mapping.getTo() == null) |
705 |
|
{ |
706 |
11 |
continue; |
707 |
|
} |
708 |
|
|
709 |
5 |
SequenceI mapTo = mapping.getTo(); |
710 |
5 |
MapList map = mapping.getMap(); |
711 |
5 |
if (map.getFromRatio() == 3) |
712 |
|
{ |
713 |
|
|
714 |
|
|
715 |
|
|
716 |
|
|
717 |
|
|
718 |
|
} |
719 |
|
else |
720 |
|
{ |
721 |
|
|
722 |
|
|
723 |
|
|
724 |
0 |
List<SequenceFeature> features = seq.getFeatures() |
725 |
|
.getPositionalFeatures(SequenceOntologyI.SEQUENCE_VARIANT); |
726 |
0 |
for (SequenceFeature sf : features) |
727 |
|
{ |
728 |
0 |
if (FEATURE_GROUP_VCF.equals(sf.getFeatureGroup())) |
729 |
|
{ |
730 |
0 |
transferFeature(sf, mapTo, map); |
731 |
|
} |
732 |
|
} |
733 |
|
} |
734 |
|
} |
735 |
|
} |
736 |
|
|
737 |
|
|
738 |
|
|
739 |
|
|
740 |
|
|
741 |
|
@param |
742 |
|
@return |
743 |
|
|
|
|
| 81.8% |
Uncovered Elements: 2 (11) |
Complexity: 3 |
Complexity Density: 0.43 |
|
744 |
24 |
protected int loadSequenceVCF(SequenceI seq)... |
745 |
|
{ |
746 |
24 |
VCFMap vcfMap = getVcfMap(seq); |
747 |
24 |
if (vcfMap == null) |
748 |
|
{ |
749 |
0 |
return 0; |
750 |
|
} |
751 |
|
|
752 |
|
|
753 |
|
|
754 |
|
|
755 |
24 |
SequenceI dss = seq.getDatasetSequence(); |
756 |
24 |
if (dss == null) |
757 |
|
{ |
758 |
3 |
dss = seq; |
759 |
|
} |
760 |
24 |
return addVcfVariants(dss, vcfMap); |
761 |
|
} |
762 |
|
|
763 |
|
|
764 |
|
|
765 |
|
|
766 |
|
@param |
767 |
|
@return |
768 |
|
|
|
|
| 46.7% |
Uncovered Elements: 24 (45) |
Complexity: 8 |
Complexity Density: 0.26 |
|
769 |
24 |
private VCFMap getVcfMap(SequenceI seq)... |
770 |
|
{ |
771 |
|
|
772 |
|
|
773 |
|
|
774 |
24 |
VCFMap vcfMap = null; |
775 |
24 |
if (dictionary != null) |
776 |
|
{ |
777 |
3 |
vcfMap = getContigMap(seq); |
778 |
|
} |
779 |
24 |
if (vcfMap != null) |
780 |
|
{ |
781 |
3 |
return vcfMap; |
782 |
|
} |
783 |
|
|
784 |
|
|
785 |
|
|
786 |
|
|
787 |
|
|
788 |
21 |
GeneLociI seqCoords = seq.getGeneLoci(); |
789 |
21 |
if (seqCoords == null) |
790 |
|
{ |
791 |
0 |
Console.warn(String.format( |
792 |
|
"Can't query VCF for %s as chromosome coordinates not known", |
793 |
|
seq.getName())); |
794 |
0 |
return null; |
795 |
|
} |
796 |
|
|
797 |
21 |
String species = seqCoords.getSpeciesId(); |
798 |
21 |
String chromosome = seqCoords.getChromosomeId(); |
799 |
21 |
String seqRef = seqCoords.getAssemblyId(); |
800 |
21 |
MapList map = seqCoords.getMapping(); |
801 |
|
|
802 |
|
|
803 |
|
|
804 |
21 |
if (!vcfSpecies.equalsIgnoreCase(species)) |
805 |
|
{ |
806 |
0 |
Console.warn("No VCF loaded to " + seq.getName() |
807 |
|
+ " as species not matched"); |
808 |
0 |
return null; |
809 |
|
} |
810 |
|
|
811 |
21 |
if (seqRef.equalsIgnoreCase(vcfAssembly)) |
812 |
|
{ |
813 |
21 |
return new VCFMap(chromosome, map); |
814 |
|
} |
815 |
|
|
816 |
|
|
817 |
|
|
818 |
|
|
819 |
|
|
820 |
0 |
List<int[]> toVcfRanges = new ArrayList<>(); |
821 |
0 |
List<int[]> fromSequenceRanges = new ArrayList<>(); |
822 |
|
|
823 |
0 |
for (int[] range : map.getToRanges()) |
824 |
|
{ |
825 |
0 |
int[] fromRange = map.locateInFrom(range[0], range[1]); |
826 |
0 |
if (fromRange == null) |
827 |
|
{ |
828 |
|
|
829 |
0 |
continue; |
830 |
|
} |
831 |
|
|
832 |
0 |
int[] newRange = mapReferenceRange(range, chromosome, "human", seqRef, |
833 |
|
vcfAssembly); |
834 |
0 |
if (newRange == null) |
835 |
|
{ |
836 |
0 |
Console.error(String.format("Failed to map %s:%s:%s:%d:%d to %s", |
837 |
|
species, chromosome, seqRef, range[0], range[1], |
838 |
|
vcfAssembly)); |
839 |
0 |
continue; |
840 |
|
} |
841 |
|
else |
842 |
|
{ |
843 |
0 |
toVcfRanges.add(newRange); |
844 |
0 |
fromSequenceRanges.add(fromRange); |
845 |
|
} |
846 |
|
} |
847 |
|
|
848 |
0 |
return new VCFMap(chromosome, |
849 |
|
new MapList(fromSequenceRanges, toVcfRanges, 1, 1)); |
850 |
|
} |
851 |
|
|
852 |
|
|
853 |
|
|
854 |
|
|
855 |
|
|
856 |
|
|
857 |
|
@param |
858 |
|
@return |
859 |
|
|
|
|
| 75% |
Uncovered Elements: 3 (12) |
Complexity: 3 |
Complexity Density: 0.38 |
|
860 |
3 |
private VCFMap getContigMap(SequenceI seq)... |
861 |
|
{ |
862 |
3 |
String id = seq.getName(); |
863 |
3 |
SAMSequenceRecord contig = dictionary.getSequence(id); |
864 |
3 |
if (contig != null) |
865 |
|
{ |
866 |
3 |
int len = seq.getLength(); |
867 |
3 |
if (len == contig.getSequenceLength()) |
868 |
|
{ |
869 |
3 |
MapList map = new MapList(new int[] { 1, len }, |
870 |
|
new int[] |
871 |
|
{ 1, len }, 1, 1); |
872 |
3 |
return new VCFMap(id, map); |
873 |
|
} |
874 |
|
} |
875 |
0 |
return null; |
876 |
|
} |
877 |
|
|
878 |
|
|
879 |
|
|
880 |
|
|
881 |
|
|
882 |
|
|
883 |
|
@param |
884 |
|
@param |
885 |
|
|
886 |
|
@return |
887 |
|
|
|
|
| 88% |
Uncovered Elements: 3 (25) |
Complexity: 5 |
Complexity Density: 0.26 |
|
888 |
24 |
protected int addVcfVariants(SequenceI seq, VCFMap map)... |
889 |
|
{ |
890 |
24 |
boolean forwardStrand = map.map.isToForwardStrand(); |
891 |
|
|
892 |
|
|
893 |
|
|
894 |
|
|
895 |
24 |
int count = 0; |
896 |
|
|
897 |
24 |
for (int[] range : map.map.getToRanges()) |
898 |
|
{ |
899 |
39 |
int vcfStart = Math.min(range[0], range[1]); |
900 |
39 |
int vcfEnd = Math.max(range[0], range[1]); |
901 |
39 |
try |
902 |
|
{ |
903 |
39 |
CloseableIterator<VariantContext> variants = reader |
904 |
|
.query(map.chromosome, vcfStart, vcfEnd); |
905 |
75 |
while (variants.hasNext()) |
906 |
|
{ |
907 |
36 |
VariantContext variant = variants.next(); |
908 |
|
|
909 |
36 |
int[] featureRange = map.map.locateInFrom(variant.getStart(), |
910 |
|
variant.getEnd()); |
911 |
|
|
912 |
|
|
913 |
|
|
914 |
|
|
915 |
36 |
if (featureRange != null) |
916 |
|
{ |
917 |
36 |
int featureStart = Math.min(featureRange[0], featureRange[1]); |
918 |
36 |
int featureEnd = Math.max(featureRange[0], featureRange[1]); |
919 |
36 |
if (featureEnd - featureStart == variant.getEnd() |
920 |
|
- variant.getStart()) |
921 |
|
{ |
922 |
34 |
count += addAlleleFeatures(seq, variant, featureStart, |
923 |
|
featureEnd, forwardStrand); |
924 |
|
} |
925 |
|
} |
926 |
|
} |
927 |
39 |
variants.close(); |
928 |
|
} catch (TribbleException e) |
929 |
|
{ |
930 |
|
|
931 |
|
|
932 |
|
|
933 |
0 |
String msg = String.format("Error reading VCF for %s:%d-%d: %s ", |
934 |
|
map.chromosome, vcfStart, vcfEnd, e.getLocalizedMessage()); |
935 |
0 |
Console.error(msg); |
936 |
|
} |
937 |
|
} |
938 |
|
|
939 |
24 |
return count; |
940 |
|
} |
941 |
|
|
942 |
|
|
943 |
|
|
944 |
|
|
945 |
|
@param |
946 |
|
@param |
947 |
|
@param |
948 |
|
@return |
949 |
|
|
|
|
| 80% |
Uncovered Elements: 2 (10) |
Complexity: 3 |
Complexity Density: 0.5 |
|
950 |
100 |
protected String getAttributeValue(VariantContext variant,... |
951 |
|
String attributeName, int alleleIndex) |
952 |
|
{ |
953 |
100 |
Object att = variant.getAttribute(attributeName); |
954 |
|
|
955 |
100 |
if (att instanceof String) |
956 |
|
{ |
957 |
39 |
return (String) att; |
958 |
|
} |
959 |
61 |
else if (att instanceof ArrayList) |
960 |
|
{ |
961 |
61 |
return ((List<String>) att).get(alleleIndex); |
962 |
|
} |
963 |
|
|
964 |
0 |
return null; |
965 |
|
} |
966 |
|
|
967 |
|
|
968 |
|
|
969 |
|
|
970 |
|
|
971 |
|
@param |
972 |
|
@param |
973 |
|
@param |
974 |
|
@param |
975 |
|
@param |
976 |
|
@return |
977 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (7) |
Complexity: 2 |
Complexity Density: 0.4 |
|
978 |
34 |
protected int addAlleleFeatures(SequenceI seq, VariantContext variant,... |
979 |
|
int featureStart, int featureEnd, boolean forwardStrand) |
980 |
|
{ |
981 |
34 |
int added = 0; |
982 |
|
|
983 |
|
|
984 |
|
|
985 |
|
|
986 |
|
|
987 |
34 |
int altAlleleCount = variant.getAlternateAlleles().size(); |
988 |
85 |
for (int i = 0; i < altAlleleCount; i++) |
989 |
|
{ |
990 |
51 |
added += addAlleleFeature(seq, variant, i, featureStart, featureEnd, |
991 |
|
forwardStrand); |
992 |
|
} |
993 |
34 |
return added; |
994 |
|
} |
995 |
|
|
996 |
|
|
997 |
|
|
998 |
|
|
999 |
|
|
1000 |
|
|
1001 |
|
|
1002 |
|
@param |
1003 |
|
@param |
1004 |
|
@param |
1005 |
|
|
1006 |
|
@param |
1007 |
|
@param |
1008 |
|
@param |
1009 |
|
@return |
1010 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (37) |
Complexity: 7 |
Complexity Density: 0.24 |
|
1011 |
51 |
protected int addAlleleFeature(SequenceI seq, VariantContext variant,... |
1012 |
|
int altAlleleIndex, int featureStart, int featureEnd, |
1013 |
|
boolean forwardStrand) |
1014 |
|
{ |
1015 |
51 |
String reference = variant.getReference().getBaseString(); |
1016 |
51 |
Allele alt = variant.getAlternateAllele(altAlleleIndex); |
1017 |
51 |
String allele = alt.getBaseString(); |
1018 |
|
|
1019 |
|
|
1020 |
|
|
1021 |
|
|
1022 |
|
|
1023 |
51 |
int referenceLength = reference.length(); |
1024 |
51 |
if (!forwardStrand && allele.length() > referenceLength |
1025 |
|
&& allele.startsWith(reference)) |
1026 |
|
{ |
1027 |
4 |
featureStart -= referenceLength; |
1028 |
4 |
featureEnd = featureStart; |
1029 |
4 |
char insertAfter = seq.getCharAt(featureStart - seq.getStart()); |
1030 |
4 |
reference = Dna.reverseComplement(String.valueOf(insertAfter)); |
1031 |
4 |
allele = allele.substring(referenceLength) + reference; |
1032 |
|
} |
1033 |
|
|
1034 |
|
|
1035 |
|
|
1036 |
|
|
1037 |
|
|
1038 |
51 |
StringBuilder sb = new StringBuilder(); |
1039 |
51 |
sb.append(forwardStrand ? reference : Dna.reverseComplement(reference)); |
1040 |
51 |
sb.append(COMMA); |
1041 |
51 |
sb.append(forwardStrand ? allele : Dna.reverseComplement(allele)); |
1042 |
51 |
String alleles = sb.toString(); |
1043 |
|
|
1044 |
|
|
1045 |
|
|
1046 |
|
|
1047 |
|
|
1048 |
51 |
String consequence = getConsequenceForAlleleAndFeature(variant, |
1049 |
|
CSQ_FIELD, altAlleleIndex, csqAlleleFieldIndex, |
1050 |
|
csqAlleleNumberFieldIndex, |
1051 |
|
seq.getName().toLowerCase(Locale.ROOT), csqFeatureFieldIndex); |
1052 |
|
|
1053 |
|
|
1054 |
|
|
1055 |
|
|
1056 |
51 |
String type = SequenceOntologyI.SEQUENCE_VARIANT; |
1057 |
51 |
if (consequence != null) |
1058 |
|
{ |
1059 |
7 |
type = getOntologyTerm(consequence); |
1060 |
|
} |
1061 |
|
|
1062 |
51 |
SequenceFeature sf = new SequenceFeature(type, alleles, featureStart, |
1063 |
|
featureEnd, FEATURE_GROUP_VCF); |
1064 |
51 |
sf.setSource(sourceId); |
1065 |
|
|
1066 |
|
|
1067 |
|
|
1068 |
|
|
1069 |
|
|
1070 |
51 |
addFeatureAttribute(sf, Gff3Helper.ALLELES, alleles); |
1071 |
|
|
1072 |
|
|
1073 |
|
|
1074 |
|
|
1075 |
51 |
addFeatureAttribute(sf, VCF_POS, String.valueOf(variant.getStart())); |
1076 |
51 |
addFeatureAttribute(sf, VCF_ID, variant.getID()); |
1077 |
51 |
addFeatureAttribute(sf, VCF_QUAL, |
1078 |
|
String.valueOf(variant.getPhredScaledQual())); |
1079 |
51 |
addFeatureAttribute(sf, VCF_FILTER, getFilter(variant)); |
1080 |
|
|
1081 |
51 |
addAlleleProperties(variant, sf, altAlleleIndex, consequence); |
1082 |
|
|
1083 |
51 |
seq.addSequenceFeature(sf); |
1084 |
|
|
1085 |
51 |
return 1; |
1086 |
|
} |
1087 |
|
|
1088 |
|
|
1089 |
|
|
1090 |
|
|
1091 |
|
|
1092 |
|
|
1093 |
|
|
1094 |
|
@param |
1095 |
|
@return |
1096 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (17) |
Complexity: 4 |
Complexity Density: 0.36 |
|
1097 |
51 |
String getFilter(VariantContext variant)... |
1098 |
|
{ |
1099 |
51 |
Set<String> filters = variant.getFilters(); |
1100 |
51 |
if (filters.isEmpty()) |
1101 |
|
{ |
1102 |
21 |
return NO_VALUE; |
1103 |
|
} |
1104 |
30 |
Iterator<String> iterator = filters.iterator(); |
1105 |
30 |
String first = iterator.next(); |
1106 |
30 |
if (filters.size() == 1) |
1107 |
|
{ |
1108 |
11 |
return first; |
1109 |
|
} |
1110 |
|
|
1111 |
19 |
StringBuilder sb = new StringBuilder(first); |
1112 |
38 |
while (iterator.hasNext()) |
1113 |
|
{ |
1114 |
19 |
sb.append(";").append(iterator.next()); |
1115 |
|
} |
1116 |
|
|
1117 |
19 |
return sb.toString(); |
1118 |
|
} |
1119 |
|
|
1120 |
|
|
1121 |
|
|
1122 |
|
|
1123 |
|
@param |
1124 |
|
@param |
1125 |
|
@param |
1126 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (4) |
Complexity: 4 |
Complexity Density: 2 |
|
1127 |
347 |
void addFeatureAttribute(SequenceFeature sf, String key, String value)... |
1128 |
|
{ |
1129 |
347 |
if (value != null && !value.isEmpty() && !NO_VALUE.equals(value)) |
1130 |
|
{ |
1131 |
275 |
sf.setValue(key, value); |
1132 |
|
} |
1133 |
|
} |
1134 |
|
|
1135 |
|
|
1136 |
|
|
1137 |
|
|
1138 |
|
|
1139 |
|
|
1140 |
|
|
1141 |
|
|
1142 |
|
|
1143 |
|
|
1144 |
|
@param |
1145 |
|
@return |
1146 |
|
@see |
1147 |
|
|
|
|
| 68.2% |
Uncovered Elements: 7 (22) |
Complexity: 6 |
Complexity Density: 0.5 |
|
1148 |
7 |
String getOntologyTerm(String consequence)... |
1149 |
|
{ |
1150 |
7 |
String type = SequenceOntologyI.SEQUENCE_VARIANT; |
1151 |
|
|
1152 |
|
|
1153 |
|
|
1154 |
|
|
1155 |
|
|
1156 |
7 |
if (csqAlleleFieldIndex == -1) |
1157 |
|
{ |
1158 |
|
|
1159 |
|
|
1160 |
|
|
1161 |
0 |
return type; |
1162 |
|
} |
1163 |
|
|
1164 |
7 |
if (consequence != null) |
1165 |
|
{ |
1166 |
7 |
String[] csqFields = consequence.split(PIPE_REGEX); |
1167 |
7 |
if (csqFields.length > csqConsequenceFieldIndex) |
1168 |
|
{ |
1169 |
7 |
type = csqFields[csqConsequenceFieldIndex]; |
1170 |
|
} |
1171 |
|
} |
1172 |
|
else |
1173 |
|
{ |
1174 |
|
|
1175 |
|
} |
1176 |
|
|
1177 |
|
|
1178 |
|
|
1179 |
|
|
1180 |
|
|
1181 |
7 |
if (type != null) |
1182 |
|
{ |
1183 |
7 |
int pos = type.indexOf('&'); |
1184 |
7 |
if (pos > 0) |
1185 |
|
{ |
1186 |
0 |
type = type.substring(0, pos); |
1187 |
|
} |
1188 |
|
} |
1189 |
7 |
return type; |
1190 |
|
} |
1191 |
|
|
1192 |
|
|
1193 |
|
|
1194 |
|
|
1195 |
|
|
1196 |
|
|
1197 |
|
|
1198 |
|
|
1199 |
|
|
1200 |
|
|
1201 |
|
|
1202 |
|
|
1203 |
|
|
1204 |
|
|
1205 |
|
@param |
1206 |
|
@param |
1207 |
|
@param |
1208 |
|
@param |
1209 |
|
@param |
1210 |
|
@param |
1211 |
|
@param |
1212 |
|
@return |
1213 |
|
|
|
|
| 87.5% |
Uncovered Elements: 3 (24) |
Complexity: 9 |
Complexity Density: 0.64 |
|
1214 |
51 |
private String getConsequenceForAlleleAndFeature(VariantContext variant,... |
1215 |
|
String vcfInfoId, int altAlleleIndex, int alleleFieldIndex, |
1216 |
|
int alleleNumberFieldIndex, String seqName, int featureFieldIndex) |
1217 |
|
{ |
1218 |
51 |
if (alleleFieldIndex == -1 || featureFieldIndex == -1) |
1219 |
|
{ |
1220 |
37 |
return null; |
1221 |
|
} |
1222 |
14 |
Object value = variant.getAttribute(vcfInfoId); |
1223 |
|
|
1224 |
14 |
if (value == null || !(value instanceof List<?>)) |
1225 |
|
{ |
1226 |
0 |
return null; |
1227 |
|
} |
1228 |
|
|
1229 |
|
|
1230 |
|
|
1231 |
|
|
1232 |
|
|
1233 |
14 |
List<String> consequences = (List<String>) value; |
1234 |
|
|
1235 |
14 |
for (String consequence : consequences) |
1236 |
|
{ |
1237 |
41 |
String[] csqFields = consequence.split(PIPE_REGEX); |
1238 |
41 |
if (csqFields.length > featureFieldIndex) |
1239 |
|
{ |
1240 |
41 |
String featureIdentifier = csqFields[featureFieldIndex]; |
1241 |
41 |
if (featureIdentifier.length() > 4 && seqName |
1242 |
|
.indexOf(featureIdentifier.toLowerCase(Locale.ROOT)) > -1) |
1243 |
|
{ |
1244 |
|
|
1245 |
|
|
1246 |
|
|
1247 |
10 |
if (matchAllele(variant, altAlleleIndex, csqFields, |
1248 |
|
alleleFieldIndex, alleleNumberFieldIndex)) |
1249 |
|
{ |
1250 |
7 |
return consequence; |
1251 |
|
} |
1252 |
|
} |
1253 |
|
} |
1254 |
|
} |
1255 |
7 |
return null; |
1256 |
|
} |
1257 |
|
|
|
|
| 43.8% |
Uncovered Elements: 9 (16) |
Complexity: 5 |
Complexity Density: 0.5 |
|
1258 |
10 |
private boolean matchAllele(VariantContext variant, int altAlleleIndex,... |
1259 |
|
String[] csqFields, int alleleFieldIndex, |
1260 |
|
int alleleNumberFieldIndex) |
1261 |
|
{ |
1262 |
|
|
1263 |
|
|
1264 |
|
|
1265 |
|
|
1266 |
10 |
if (alleleNumberFieldIndex > -1) |
1267 |
|
{ |
1268 |
0 |
if (csqFields.length <= alleleNumberFieldIndex) |
1269 |
|
{ |
1270 |
0 |
return false; |
1271 |
|
} |
1272 |
0 |
String alleleNum = csqFields[alleleNumberFieldIndex]; |
1273 |
0 |
return String.valueOf(altAlleleIndex + 1).equals(alleleNum); |
1274 |
|
} |
1275 |
|
|
1276 |
|
|
1277 |
|
|
1278 |
|
|
1279 |
10 |
if (alleleFieldIndex > -1 && csqFields.length > alleleFieldIndex) |
1280 |
|
{ |
1281 |
10 |
String csqAllele = csqFields[alleleFieldIndex]; |
1282 |
10 |
String vcfAllele = variant.getAlternateAllele(altAlleleIndex) |
1283 |
|
.getBaseString(); |
1284 |
10 |
return csqAllele.equals(vcfAllele); |
1285 |
|
} |
1286 |
0 |
return false; |
1287 |
|
} |
1288 |
|
|
1289 |
|
|
1290 |
|
|
1291 |
|
|
1292 |
|
@param |
1293 |
|
@param |
1294 |
|
@param |
1295 |
|
|
1296 |
|
@param |
1297 |
|
|
1298 |
|
|
1299 |
|
|
|
|
| 93.9% |
Uncovered Elements: 2 (33) |
Complexity: 8 |
Complexity Density: 0.38 |
|
1300 |
51 |
protected void addAlleleProperties(VariantContext variant,... |
1301 |
|
SequenceFeature sf, final int altAlelleIndex, String consequence) |
1302 |
|
{ |
1303 |
51 |
Map<String, Object> atts = variant.getAttributes(); |
1304 |
|
|
1305 |
51 |
for (Entry<String, Object> att : atts.entrySet()) |
1306 |
|
{ |
1307 |
174 |
String key = att.getKey(); |
1308 |
|
|
1309 |
|
|
1310 |
|
|
1311 |
|
|
1312 |
|
|
1313 |
174 |
if (CSQ_FIELD.equals(key)) |
1314 |
|
{ |
1315 |
14 |
addConsequences(variant, sf, consequence); |
1316 |
14 |
continue; |
1317 |
|
} |
1318 |
|
|
1319 |
|
|
1320 |
|
|
1321 |
|
|
1322 |
160 |
if (!vcfFieldsOfInterest.contains(key)) |
1323 |
|
{ |
1324 |
46 |
continue; |
1325 |
|
} |
1326 |
|
|
1327 |
|
|
1328 |
|
|
1329 |
|
|
1330 |
|
|
1331 |
|
|
1332 |
114 |
VCFInfoHeaderLine infoHeader = header.getInfoHeaderLine(key); |
1333 |
114 |
if (infoHeader == null) |
1334 |
|
{ |
1335 |
|
|
1336 |
|
|
1337 |
|
|
1338 |
|
|
1339 |
0 |
continue; |
1340 |
|
} |
1341 |
|
|
1342 |
114 |
VCFHeaderLineCount number = infoHeader.getCountType(); |
1343 |
114 |
int index = altAlelleIndex; |
1344 |
114 |
if (number == VCFHeaderLineCount.R) |
1345 |
|
{ |
1346 |
|
|
1347 |
|
|
1348 |
|
|
1349 |
|
|
1350 |
14 |
index++; |
1351 |
|
} |
1352 |
100 |
else if (number != VCFHeaderLineCount.A) |
1353 |
|
{ |
1354 |
|
|
1355 |
|
|
1356 |
|
|
1357 |
14 |
continue; |
1358 |
|
} |
1359 |
|
|
1360 |
|
|
1361 |
|
|
1362 |
|
|
1363 |
100 |
String value = getAttributeValue(variant, key, index); |
1364 |
100 |
if (value != null && isValid(variant, key, value)) |
1365 |
|
{ |
1366 |
|
|
1367 |
|
|
1368 |
|
|
1369 |
|
|
1370 |
92 |
value = StringUtils.urlDecode(value, VCF_ENCODABLE); |
1371 |
92 |
addFeatureAttribute(sf, key, value); |
1372 |
|
} |
1373 |
|
} |
1374 |
|
} |
1375 |
|
|
1376 |
|
|
1377 |
|
|
1378 |
|
|
1379 |
|
|
1380 |
|
|
1381 |
|
@param |
1382 |
|
@param |
1383 |
|
@param |
1384 |
|
@return |
1385 |
|
|
|
|
| 82.6% |
Uncovered Elements: 4 (23) |
Complexity: 8 |
Complexity Density: 0.53 |
|
1386 |
100 |
protected boolean isValid(VariantContext variant, String infoId,... |
1387 |
|
String value) |
1388 |
|
{ |
1389 |
100 |
if (value == null || value.isEmpty() || NO_VALUE.equals(value)) |
1390 |
|
{ |
1391 |
8 |
return true; |
1392 |
|
} |
1393 |
92 |
VCFInfoHeaderLine infoHeader = header.getInfoHeaderLine(infoId); |
1394 |
92 |
if (infoHeader == null) |
1395 |
|
{ |
1396 |
0 |
Console.error("Field " + infoId + " has no INFO header"); |
1397 |
0 |
return false; |
1398 |
|
} |
1399 |
92 |
VCFHeaderLineType infoType = infoHeader.getType(); |
1400 |
92 |
try |
1401 |
|
{ |
1402 |
92 |
if (infoType == VCFHeaderLineType.Integer) |
1403 |
|
{ |
1404 |
27 |
Integer.parseInt(value); |
1405 |
|
} |
1406 |
65 |
else if (infoType == VCFHeaderLineType.Float) |
1407 |
|
{ |
1408 |
65 |
Float.parseFloat(value); |
1409 |
|
} |
1410 |
|
} catch (NumberFormatException e) |
1411 |
|
{ |
1412 |
8 |
logInvalidValue(variant, infoId, value); |
1413 |
8 |
return false; |
1414 |
|
} |
1415 |
84 |
return true; |
1416 |
|
} |
1417 |
|
|
1418 |
|
|
1419 |
|
|
1420 |
|
|
1421 |
|
|
1422 |
|
@param |
1423 |
|
@param |
1424 |
|
@param |
1425 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (10) |
Complexity: 3 |
Complexity Density: 0.5 |
|
1426 |
8 |
private void logInvalidValue(VariantContext variant, String infoId,... |
1427 |
|
String value) |
1428 |
|
{ |
1429 |
8 |
if (badData == null) |
1430 |
|
{ |
1431 |
2 |
badData = new HashSet<>(); |
1432 |
|
} |
1433 |
8 |
String token = infoId + ":" + value; |
1434 |
8 |
if (!badData.contains(token)) |
1435 |
|
{ |
1436 |
4 |
badData.add(token); |
1437 |
4 |
Console.error(String.format("Invalid VCF data at %s:%d %s=%s", |
1438 |
|
variant.getContig(), variant.getStart(), infoId, value)); |
1439 |
|
} |
1440 |
|
} |
1441 |
|
|
1442 |
|
|
1443 |
|
|
1444 |
|
|
1445 |
|
|
1446 |
|
|
1447 |
|
|
1448 |
|
|
1449 |
|
|
1450 |
|
@param |
1451 |
|
@param |
1452 |
|
@param |
1453 |
|
|
|
|
| 82.1% |
Uncovered Elements: 5 (28) |
Complexity: 9 |
Complexity Density: 0.5 |
|
1454 |
14 |
protected void addConsequences(VariantContext variant, SequenceFeature sf,... |
1455 |
|
String myConsequence) |
1456 |
|
{ |
1457 |
14 |
Object value = variant.getAttribute(CSQ_FIELD); |
1458 |
|
|
1459 |
14 |
if (value == null || !(value instanceof List<?>)) |
1460 |
|
{ |
1461 |
0 |
return; |
1462 |
|
} |
1463 |
|
|
1464 |
14 |
List<String> consequences = (List<String>) value; |
1465 |
|
|
1466 |
|
|
1467 |
|
|
1468 |
|
|
1469 |
|
|
1470 |
14 |
Map<String, String> csqValues = new HashMap<>(); |
1471 |
|
|
1472 |
14 |
for (String consequence : consequences) |
1473 |
|
{ |
1474 |
50 |
if (myConsequence == null || myConsequence.equals(consequence)) |
1475 |
|
{ |
1476 |
31 |
String[] csqFields = consequence.split(PIPE_REGEX); |
1477 |
|
|
1478 |
|
|
1479 |
|
|
1480 |
|
|
1481 |
|
|
1482 |
31 |
int i = 0; |
1483 |
31 |
for (String field : csqFields) |
1484 |
|
{ |
1485 |
279 |
if (field != null && field.length() > 0) |
1486 |
|
{ |
1487 |
279 |
String id = vepFieldsOfInterest.get(i); |
1488 |
279 |
if (id != null) |
1489 |
|
{ |
1490 |
|
|
1491 |
|
|
1492 |
|
|
1493 |
|
|
1494 |
279 |
field = StringUtils.urlDecode(field, VCF_ENCODABLE); |
1495 |
279 |
csqValues.put(id, field); |
1496 |
|
} |
1497 |
|
} |
1498 |
279 |
i++; |
1499 |
|
} |
1500 |
|
} |
1501 |
|
} |
1502 |
|
|
1503 |
14 |
if (!csqValues.isEmpty()) |
1504 |
|
{ |
1505 |
14 |
sf.setValue(CSQ_FIELD, csqValues); |
1506 |
|
} |
1507 |
|
} |
1508 |
|
|
1509 |
|
|
1510 |
|
|
1511 |
|
|
1512 |
|
|
1513 |
|
@param |
1514 |
|
@return |
1515 |
|
|
|
|
| 0% |
Uncovered Elements: 1 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
1516 |
0 |
protected String complement(byte[] reference)... |
1517 |
|
{ |
1518 |
0 |
return String.valueOf(Dna.getComplement((char) reference[0])); |
1519 |
|
} |
1520 |
|
|
1521 |
|
|
1522 |
|
|
1523 |
|
|
1524 |
|
|
1525 |
|
|
1526 |
|
|
1527 |
|
|
1528 |
|
|
1529 |
|
|
1530 |
|
|
1531 |
|
|
1532 |
|
@param |
1533 |
|
|
1534 |
|
@param |
1535 |
|
@param |
1536 |
|
@param |
1537 |
|
|
1538 |
|
@param |
1539 |
|
|
1540 |
|
@return |
1541 |
|
|
|
|
| 0% |
Uncovered Elements: 18 (18) |
Complexity: 4 |
Complexity Density: 0.33 |
|
1542 |
0 |
protected int[] mapReferenceRange(int[] queryRange, String chromosome,... |
1543 |
|
String species, String fromRef, String toRef) |
1544 |
|
{ |
1545 |
|
|
1546 |
|
|
1547 |
|
|
1548 |
|
|
1549 |
0 |
int[] mappedRange = findSubsumedRangeMapping(queryRange, chromosome, |
1550 |
|
species, fromRef, toRef); |
1551 |
0 |
if (mappedRange != null) |
1552 |
|
{ |
1553 |
0 |
return mappedRange; |
1554 |
|
} |
1555 |
|
|
1556 |
|
|
1557 |
|
|
1558 |
|
|
1559 |
0 |
EnsemblMap mapper = new EnsemblMap(); |
1560 |
0 |
int[] mapping = mapper.getAssemblyMapping(species, chromosome, fromRef, |
1561 |
|
toRef, queryRange); |
1562 |
|
|
1563 |
0 |
if (mapping == null) |
1564 |
|
{ |
1565 |
|
|
1566 |
0 |
return null; |
1567 |
|
} |
1568 |
|
|
1569 |
|
|
1570 |
|
|
1571 |
|
|
1572 |
0 |
String key = makeRangesKey(chromosome, species, fromRef, toRef); |
1573 |
0 |
if (!assemblyMappings.containsKey(key)) |
1574 |
|
{ |
1575 |
0 |
assemblyMappings.put(key, new HashMap<int[], int[]>()); |
1576 |
|
} |
1577 |
|
|
1578 |
0 |
assemblyMappings.get(key).put(queryRange, mapping); |
1579 |
|
|
1580 |
0 |
return mapping; |
1581 |
|
} |
1582 |
|
|
1583 |
|
|
1584 |
|
|
1585 |
|
|
1586 |
|
|
1587 |
|
|
1588 |
|
|
1589 |
|
|
1590 |
|
|
1591 |
|
|
1592 |
|
|
1593 |
|
|
1594 |
|
|
1595 |
|
|
1596 |
|
|
1597 |
|
@param |
1598 |
|
@param |
1599 |
|
@param |
1600 |
|
@param |
1601 |
|
@param |
1602 |
|
@return |
1603 |
|
|
|
|
| 0% |
Uncovered Elements: 19 (19) |
Complexity: 4 |
Complexity Density: 0.31 |
|
1604 |
0 |
protected int[] findSubsumedRangeMapping(int[] queryRange,... |
1605 |
|
String chromosome, String species, String fromRef, String toRef) |
1606 |
|
{ |
1607 |
0 |
String key = makeRangesKey(chromosome, species, fromRef, toRef); |
1608 |
0 |
if (assemblyMappings.containsKey(key)) |
1609 |
|
{ |
1610 |
0 |
Map<int[], int[]> mappedRanges = assemblyMappings.get(key); |
1611 |
0 |
for (Entry<int[], int[]> mappedRange : mappedRanges.entrySet()) |
1612 |
|
{ |
1613 |
0 |
int[] fromRange = mappedRange.getKey(); |
1614 |
0 |
int[] toRange = mappedRange.getValue(); |
1615 |
0 |
if (fromRange[1] - fromRange[0] == toRange[1] - toRange[0]) |
1616 |
|
{ |
1617 |
|
|
1618 |
|
|
1619 |
|
|
1620 |
0 |
if (MappingUtils.rangeContains(fromRange, queryRange)) |
1621 |
|
{ |
1622 |
|
|
1623 |
|
|
1624 |
|
|
1625 |
0 |
int offset = queryRange[0] - fromRange[0]; |
1626 |
0 |
int mappedRangeFrom = toRange[0] + offset; |
1627 |
0 |
int mappedRangeTo = mappedRangeFrom |
1628 |
|
+ (queryRange[1] - queryRange[0]); |
1629 |
0 |
return new int[] { mappedRangeFrom, mappedRangeTo }; |
1630 |
|
} |
1631 |
|
} |
1632 |
|
} |
1633 |
|
} |
1634 |
0 |
return null; |
1635 |
|
} |
1636 |
|
|
1637 |
|
|
1638 |
|
|
1639 |
|
|
1640 |
|
|
1641 |
|
|
1642 |
|
@param |
1643 |
|
@param |
1644 |
|
@param |
1645 |
|
|
1646 |
|
|
|
|
| 0% |
Uncovered Elements: 9 (9) |
Complexity: 2 |
Complexity Density: 0.29 |
|
1647 |
0 |
protected void transferFeature(SequenceFeature sf,... |
1648 |
|
SequenceI targetSequence, MapList mapping) |
1649 |
|
{ |
1650 |
0 |
int[] mappedRange = mapping.locateInTo(sf.getBegin(), sf.getEnd()); |
1651 |
|
|
1652 |
0 |
if (mappedRange != null) |
1653 |
|
{ |
1654 |
0 |
String group = sf.getFeatureGroup(); |
1655 |
0 |
int newBegin = Math.min(mappedRange[0], mappedRange[1]); |
1656 |
0 |
int newEnd = Math.max(mappedRange[0], mappedRange[1]); |
1657 |
0 |
SequenceFeature copy = new SequenceFeature(sf, newBegin, newEnd, |
1658 |
|
group, sf.getScore()); |
1659 |
0 |
targetSequence.addSequenceFeature(copy); |
1660 |
|
} |
1661 |
|
} |
1662 |
|
|
1663 |
|
|
1664 |
|
|
1665 |
|
|
1666 |
|
@param |
1667 |
|
@param |
1668 |
|
@param |
1669 |
|
@param |
1670 |
|
@return |
1671 |
|
|
|
|
| 0% |
Uncovered Elements: 1 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
1672 |
0 |
protected static String makeRangesKey(String chromosome, String species,... |
1673 |
|
String fromRef, String toRef) |
1674 |
|
{ |
1675 |
0 |
return species + EXCL + chromosome + EXCL + fromRef + EXCL + toRef; |
1676 |
|
} |
1677 |
|
} |