1 |
|
package jalview.io.vcf; |
2 |
|
|
3 |
|
import jalview.analysis.AlignmentUtils; |
4 |
|
import jalview.analysis.Dna; |
5 |
|
import jalview.api.AlignViewControllerGuiI; |
6 |
|
import jalview.bin.Cache; |
7 |
|
import jalview.datamodel.DBRefEntry; |
8 |
|
import jalview.datamodel.GeneLociI; |
9 |
|
import jalview.datamodel.Mapping; |
10 |
|
import jalview.datamodel.SequenceFeature; |
11 |
|
import jalview.datamodel.SequenceI; |
12 |
|
import jalview.datamodel.features.FeatureAttributeType; |
13 |
|
import jalview.datamodel.features.FeatureSource; |
14 |
|
import jalview.datamodel.features.FeatureSources; |
15 |
|
import jalview.ext.ensembl.EnsemblMap; |
16 |
|
import jalview.ext.htsjdk.HtsContigDb; |
17 |
|
import jalview.ext.htsjdk.VCFReader; |
18 |
|
import jalview.io.gff.Gff3Helper; |
19 |
|
import jalview.io.gff.SequenceOntologyI; |
20 |
|
import jalview.util.MapList; |
21 |
|
import jalview.util.MappingUtils; |
22 |
|
import jalview.util.MessageManager; |
23 |
|
|
24 |
|
import java.io.File; |
25 |
|
import java.io.IOException; |
26 |
|
import java.util.ArrayList; |
27 |
|
import java.util.HashMap; |
28 |
|
import java.util.List; |
29 |
|
import java.util.Map; |
30 |
|
import java.util.Map.Entry; |
31 |
|
import java.util.regex.Pattern; |
32 |
|
import java.util.regex.PatternSyntaxException; |
33 |
|
|
34 |
|
import htsjdk.samtools.SAMException; |
35 |
|
import htsjdk.samtools.SAMSequenceDictionary; |
36 |
|
import htsjdk.samtools.SAMSequenceRecord; |
37 |
|
import htsjdk.samtools.util.CloseableIterator; |
38 |
|
import htsjdk.variant.variantcontext.Allele; |
39 |
|
import htsjdk.variant.variantcontext.VariantContext; |
40 |
|
import htsjdk.variant.vcf.VCFHeader; |
41 |
|
import htsjdk.variant.vcf.VCFHeaderLine; |
42 |
|
import htsjdk.variant.vcf.VCFHeaderLineCount; |
43 |
|
import htsjdk.variant.vcf.VCFHeaderLineType; |
44 |
|
import htsjdk.variant.vcf.VCFInfoHeaderLine; |
45 |
|
|
46 |
|
|
47 |
|
|
48 |
|
|
49 |
|
|
50 |
|
@author |
51 |
|
|
|
|
| 67.5% |
Uncovered Elements: 176 (542) |
Complexity: 134 |
Complexity Density: 0.38 |
|
52 |
|
public class VCFLoader |
53 |
|
{ |
54 |
|
|
55 |
|
|
56 |
|
|
57 |
|
|
58 |
|
|
59 |
|
|
60 |
|
|
61 |
|
|
62 |
|
|
63 |
|
|
|
|
| 60% |
Uncovered Elements: 2 (5) |
Complexity: 2 |
Complexity Density: 0.67 |
|
64 |
|
class VCFMap |
65 |
|
{ |
66 |
|
final String chromosome; |
67 |
|
|
68 |
|
final MapList map; |
69 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (2) |
Complexity: 1 |
Complexity Density: 0.5 |
|
70 |
24 |
VCFMap(String chr, MapList m)... |
71 |
|
{ |
72 |
24 |
chromosome = chr; |
73 |
24 |
map = m; |
74 |
|
} |
75 |
|
|
|
|
| 0% |
Uncovered Elements: 1 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
76 |
0 |
@Override... |
77 |
|
public String toString() |
78 |
|
{ |
79 |
0 |
return chromosome + ":" + map.toString(); |
80 |
|
} |
81 |
|
} |
82 |
|
|
83 |
|
|
84 |
|
|
85 |
|
|
86 |
|
|
87 |
|
private static final String VEP_FIELDS_PREF = "VEP_FIELDS"; |
88 |
|
|
89 |
|
private static final String VCF_FIELDS_PREF = "VCF_FIELDS"; |
90 |
|
|
91 |
|
private static final String DEFAULT_VCF_FIELDS = ".*"; |
92 |
|
|
93 |
|
private static final String DEFAULT_VEP_FIELDS = ".*"; |
94 |
|
|
95 |
|
|
96 |
|
|
97 |
|
|
98 |
|
|
99 |
|
private static final String CSQ_CONSEQUENCE_KEY = "Consequence"; |
100 |
|
private static final String CSQ_ALLELE_KEY = "Allele"; |
101 |
|
private static final String CSQ_ALLELE_NUM_KEY = "ALLELE_NUM"; |
102 |
|
private static final String CSQ_FEATURE_KEY = "Feature"; |
103 |
|
|
104 |
|
|
105 |
|
|
106 |
|
|
107 |
|
|
108 |
|
|
109 |
|
private static final String CSQ_FIELD = "CSQ"; |
110 |
|
|
111 |
|
|
112 |
|
|
113 |
|
|
114 |
|
private static final String PIPE_REGEX = "\\|"; |
115 |
|
|
116 |
|
|
117 |
|
|
118 |
|
|
119 |
|
|
120 |
|
private static final String ALLELE_FREQUENCY_KEY = "AF"; |
121 |
|
|
122 |
|
|
123 |
|
|
124 |
|
|
125 |
|
private static final String COMMA = ","; |
126 |
|
|
127 |
|
|
128 |
|
|
129 |
|
|
130 |
|
private static final String FEATURE_GROUP_VCF = "VCF"; |
131 |
|
|
132 |
|
|
133 |
|
|
134 |
|
|
135 |
|
|
136 |
|
private static final String EXCL = "!"; |
137 |
|
|
138 |
|
|
139 |
|
|
140 |
|
|
141 |
|
protected String vcfFilePath; |
142 |
|
|
143 |
|
|
144 |
|
|
145 |
|
|
146 |
|
|
147 |
|
|
148 |
|
private Map<String, Map<int[], int[]>> assemblyMappings; |
149 |
|
|
150 |
|
private VCFReader reader; |
151 |
|
|
152 |
|
|
153 |
|
|
154 |
|
|
155 |
|
private VCFHeader header; |
156 |
|
|
157 |
|
|
158 |
|
|
159 |
|
|
160 |
|
private SAMSequenceDictionary dictionary; |
161 |
|
|
162 |
|
|
163 |
|
|
164 |
|
|
165 |
|
|
166 |
|
|
167 |
|
private int csqConsequenceFieldIndex = -1; |
168 |
|
private int csqAlleleFieldIndex = -1; |
169 |
|
private int csqAlleleNumberFieldIndex = -1; |
170 |
|
private int csqFeatureFieldIndex = -1; |
171 |
|
|
172 |
|
|
173 |
|
|
174 |
|
|
175 |
|
|
176 |
|
|
177 |
|
|
178 |
|
|
179 |
|
private String sourceId; |
180 |
|
|
181 |
|
|
182 |
|
|
183 |
|
|
184 |
|
|
185 |
|
List<String> vcfFieldsOfInterest; |
186 |
|
|
187 |
|
|
188 |
|
|
189 |
|
|
190 |
|
|
191 |
|
|
192 |
|
Map<Integer, String> vepFieldsOfInterest; |
193 |
|
|
194 |
|
|
195 |
|
|
196 |
|
|
197 |
|
@param |
198 |
|
|
|
|
| 75% |
Uncovered Elements: 1 (4) |
Complexity: 2 |
Complexity Density: 0.5 |
|
199 |
4 |
public VCFLoader(String vcfFile)... |
200 |
|
{ |
201 |
4 |
try |
202 |
|
{ |
203 |
4 |
initialise(vcfFile); |
204 |
|
} catch (IOException e) |
205 |
|
{ |
206 |
0 |
System.err.println("Error opening VCF file: " + e.getMessage()); |
207 |
|
} |
208 |
|
|
209 |
|
|
210 |
4 |
assemblyMappings = new HashMap<>(); |
211 |
|
} |
212 |
|
|
213 |
|
|
214 |
|
|
215 |
|
|
216 |
|
|
217 |
|
|
218 |
|
|
219 |
|
|
220 |
|
@param |
221 |
|
@param |
222 |
|
|
|
|
| 0% |
Uncovered Elements: 5 (5) |
Complexity: 2 |
Complexity Density: 0.67 |
|
223 |
0 |
public void loadVCF(SequenceI[] seqs, final AlignViewControllerGuiI gui)... |
224 |
|
{ |
225 |
0 |
if (gui != null) |
226 |
|
{ |
227 |
0 |
gui.setStatus(MessageManager.getString("label.searching_vcf")); |
228 |
|
} |
229 |
|
|
230 |
0 |
new Thread() |
231 |
|
{ |
|
|
| 0% |
Uncovered Elements: 1 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
232 |
0 |
@Override... |
233 |
|
public void run() |
234 |
|
{ |
235 |
0 |
VCFLoader.this.doLoad(seqs, gui); |
236 |
|
} |
237 |
|
}.start(); |
238 |
|
} |
239 |
|
|
240 |
|
|
241 |
|
|
242 |
|
|
243 |
|
@param |
244 |
|
|
245 |
|
@return |
246 |
|
|
|
|
| 75% |
Uncovered Elements: 4 (16) |
Complexity: 3 |
Complexity Density: 0.25 |
|
247 |
3 |
public SequenceI loadVCFContig(String contig)... |
248 |
|
{ |
249 |
3 |
String ref = header.getOtherHeaderLine(VCFHeader.REFERENCE_KEY) |
250 |
|
.getValue(); |
251 |
3 |
if (ref.startsWith("file://")) |
252 |
|
{ |
253 |
0 |
ref = ref.substring(7); |
254 |
|
} |
255 |
|
|
256 |
3 |
SequenceI seq = null; |
257 |
3 |
File dbFile = new File(ref); |
258 |
|
|
259 |
3 |
if (dbFile.exists()) |
260 |
|
{ |
261 |
3 |
HtsContigDb db = new HtsContigDb("", dbFile); |
262 |
3 |
seq = db.getSequenceProxy(contig); |
263 |
3 |
loadSequenceVCF(seq, ref); |
264 |
3 |
db.close(); |
265 |
|
} |
266 |
|
else |
267 |
|
{ |
268 |
0 |
System.err.println("VCF reference not found: " + ref); |
269 |
|
} |
270 |
|
|
271 |
3 |
return seq; |
272 |
|
} |
273 |
|
|
274 |
|
|
275 |
|
|
276 |
|
|
277 |
|
@param |
278 |
|
@param |
279 |
|
|
280 |
|
|
|
|
| 60% |
Uncovered Elements: 14 (35) |
Complexity: 8 |
Complexity Density: 0.32 |
|
281 |
3 |
protected void doLoad(SequenceI[] seqs, AlignViewControllerGuiI gui)... |
282 |
|
{ |
283 |
3 |
try |
284 |
|
{ |
285 |
3 |
VCFHeaderLine ref = header |
286 |
|
.getOtherHeaderLine(VCFHeader.REFERENCE_KEY); |
287 |
3 |
String vcfAssembly = ref.getValue(); |
288 |
|
|
289 |
3 |
int varCount = 0; |
290 |
3 |
int seqCount = 0; |
291 |
|
|
292 |
|
|
293 |
|
|
294 |
|
|
295 |
3 |
for (SequenceI seq : seqs) |
296 |
|
{ |
297 |
21 |
int added = loadSequenceVCF(seq, vcfAssembly); |
298 |
21 |
if (added > 0) |
299 |
|
{ |
300 |
11 |
seqCount++; |
301 |
11 |
varCount += added; |
302 |
11 |
transferAddedFeatures(seq); |
303 |
|
} |
304 |
|
} |
305 |
3 |
if (gui != null) |
306 |
|
{ |
307 |
0 |
String msg = MessageManager.formatMessage("label.added_vcf", |
308 |
|
varCount, seqCount); |
309 |
0 |
gui.setStatus(msg); |
310 |
0 |
if (gui.getFeatureSettingsUI() != null) |
311 |
|
{ |
312 |
0 |
gui.getFeatureSettingsUI().discoverAllFeatureData(); |
313 |
|
} |
314 |
|
} |
315 |
|
} catch (Throwable e) |
316 |
|
{ |
317 |
0 |
System.err.println("Error processing VCF: " + e.getMessage()); |
318 |
0 |
e.printStackTrace(); |
319 |
0 |
if (gui != null) |
320 |
|
{ |
321 |
0 |
gui.setStatus("Error occurred - see console for details"); |
322 |
|
} |
323 |
|
} finally |
324 |
|
{ |
325 |
3 |
if (reader != null) |
326 |
|
{ |
327 |
3 |
try |
328 |
|
{ |
329 |
3 |
reader.close(); |
330 |
|
} catch (IOException e) |
331 |
|
{ |
332 |
|
|
333 |
|
} |
334 |
|
} |
335 |
3 |
header = null; |
336 |
3 |
dictionary = null; |
337 |
|
} |
338 |
|
} |
339 |
|
|
340 |
|
|
341 |
|
|
342 |
|
|
343 |
|
@param |
344 |
|
@throws |
345 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (8) |
Complexity: 2 |
Complexity Density: 0.25 |
|
346 |
4 |
private void initialise(String filePath) throws IOException... |
347 |
|
{ |
348 |
4 |
vcfFilePath = filePath; |
349 |
|
|
350 |
4 |
reader = new VCFReader(filePath); |
351 |
|
|
352 |
4 |
header = reader.getFileHeader(); |
353 |
|
|
354 |
4 |
try |
355 |
|
{ |
356 |
4 |
dictionary = header.getSequenceDictionary(); |
357 |
|
} catch (SAMException e) |
358 |
|
{ |
359 |
|
|
360 |
|
} |
361 |
|
|
362 |
4 |
sourceId = filePath; |
363 |
|
|
364 |
4 |
saveMetadata(sourceId); |
365 |
|
|
366 |
|
|
367 |
|
|
368 |
|
|
369 |
4 |
parseCsqHeader(); |
370 |
|
} |
371 |
|
|
372 |
|
|
373 |
|
|
374 |
|
|
375 |
|
|
376 |
|
@param |
377 |
|
|
|
|
| 77.4% |
Uncovered Elements: 7 (31) |
Complexity: 7 |
Complexity Density: 0.24 |
|
378 |
4 |
void saveMetadata(String theSourceId)... |
379 |
|
{ |
380 |
4 |
List<Pattern> vcfFieldPatterns = getFieldMatchers(VCF_FIELDS_PREF, |
381 |
|
DEFAULT_VCF_FIELDS); |
382 |
4 |
vcfFieldsOfInterest = new ArrayList<>(); |
383 |
|
|
384 |
4 |
FeatureSource metadata = new FeatureSource(theSourceId); |
385 |
|
|
386 |
4 |
for (VCFInfoHeaderLine info : header.getInfoHeaderLines()) |
387 |
|
{ |
388 |
9 |
String attributeId = info.getID(); |
389 |
9 |
String desc = info.getDescription(); |
390 |
9 |
VCFHeaderLineType type = info.getType(); |
391 |
9 |
FeatureAttributeType attType = null; |
392 |
9 |
switch (type) |
393 |
|
{ |
394 |
0 |
case Character: |
395 |
0 |
attType = FeatureAttributeType.Character; |
396 |
0 |
break; |
397 |
0 |
case Flag: |
398 |
0 |
attType = FeatureAttributeType.Flag; |
399 |
0 |
break; |
400 |
5 |
case Float: |
401 |
5 |
attType = FeatureAttributeType.Float; |
402 |
5 |
break; |
403 |
3 |
case Integer: |
404 |
3 |
attType = FeatureAttributeType.Integer; |
405 |
3 |
break; |
406 |
1 |
case String: |
407 |
1 |
attType = FeatureAttributeType.String; |
408 |
1 |
break; |
409 |
|
} |
410 |
9 |
metadata.setAttributeName(attributeId, desc); |
411 |
9 |
metadata.setAttributeType(attributeId, attType); |
412 |
|
|
413 |
9 |
if (isFieldWanted(attributeId, vcfFieldPatterns)) |
414 |
|
{ |
415 |
9 |
vcfFieldsOfInterest.add(attributeId); |
416 |
|
} |
417 |
|
} |
418 |
|
|
419 |
4 |
FeatureSources.getInstance().addSource(theSourceId, metadata); |
420 |
|
} |
421 |
|
|
422 |
|
|
423 |
|
|
424 |
|
|
425 |
|
|
426 |
|
|
427 |
|
@param |
428 |
|
@param |
429 |
|
@return |
430 |
|
|
|
|
| 66.7% |
Uncovered Elements: 2 (6) |
Complexity: 2 |
Complexity Density: 0.5 |
|
431 |
18 |
private boolean isFieldWanted(String id, List<Pattern> filters)... |
432 |
|
{ |
433 |
18 |
for (Pattern p : filters) |
434 |
|
{ |
435 |
18 |
if (p.matcher(id.toUpperCase()).matches()) |
436 |
|
{ |
437 |
18 |
return true; |
438 |
|
} |
439 |
|
} |
440 |
0 |
return false; |
441 |
|
} |
442 |
|
|
443 |
|
|
444 |
|
|
445 |
|
|
446 |
|
|
447 |
|
|
448 |
|
|
449 |
|
|
450 |
|
|
451 |
|
|
|
|
| 89.2% |
Uncovered Elements: 4 (37) |
Complexity: 8 |
Complexity Density: 0.35 |
|
452 |
4 |
protected void parseCsqHeader()... |
453 |
|
{ |
454 |
4 |
List<Pattern> vepFieldFilters = getFieldMatchers(VEP_FIELDS_PREF, |
455 |
|
DEFAULT_VEP_FIELDS); |
456 |
4 |
vepFieldsOfInterest = new HashMap<>(); |
457 |
|
|
458 |
4 |
VCFInfoHeaderLine csqInfo = header.getInfoHeaderLine(CSQ_FIELD); |
459 |
4 |
if (csqInfo == null) |
460 |
|
{ |
461 |
3 |
return; |
462 |
|
} |
463 |
|
|
464 |
|
|
465 |
|
|
466 |
|
|
467 |
|
|
468 |
1 |
String desc = csqInfo.getDescription(); |
469 |
1 |
int spacePos = desc.lastIndexOf(" "); |
470 |
1 |
desc = desc.substring(spacePos + 1); |
471 |
|
|
472 |
1 |
if (desc != null) |
473 |
|
{ |
474 |
1 |
String[] format = desc.split(PIPE_REGEX); |
475 |
1 |
int index = 0; |
476 |
1 |
for (String field : format) |
477 |
|
{ |
478 |
9 |
if (CSQ_CONSEQUENCE_KEY.equals(field)) |
479 |
|
{ |
480 |
1 |
csqConsequenceFieldIndex = index; |
481 |
|
} |
482 |
9 |
if (CSQ_ALLELE_NUM_KEY.equals(field)) |
483 |
|
{ |
484 |
0 |
csqAlleleNumberFieldIndex = index; |
485 |
|
} |
486 |
9 |
if (CSQ_ALLELE_KEY.equals(field)) |
487 |
|
{ |
488 |
1 |
csqAlleleFieldIndex = index; |
489 |
|
} |
490 |
9 |
if (CSQ_FEATURE_KEY.equals(field)) |
491 |
|
{ |
492 |
1 |
csqFeatureFieldIndex = index; |
493 |
|
} |
494 |
|
|
495 |
9 |
if (isFieldWanted(field, vepFieldFilters)) |
496 |
|
{ |
497 |
9 |
vepFieldsOfInterest.put(index, field); |
498 |
|
} |
499 |
|
|
500 |
9 |
index++; |
501 |
|
} |
502 |
|
} |
503 |
|
} |
504 |
|
|
505 |
|
|
506 |
|
|
507 |
|
|
508 |
|
|
509 |
|
|
510 |
|
|
511 |
|
|
512 |
|
|
513 |
|
|
514 |
|
|
515 |
|
|
516 |
|
@param |
517 |
|
@param |
518 |
|
@return |
519 |
|
|
|
|
| 87.5% |
Uncovered Elements: 1 (8) |
Complexity: 2 |
Complexity Density: 0.25 |
|
520 |
8 |
private List<Pattern> getFieldMatchers(String key, String def)... |
521 |
|
{ |
522 |
8 |
String pref = Cache.getDefault(key, def); |
523 |
8 |
List<Pattern> patterns = new ArrayList<>(); |
524 |
8 |
String[] tokens = pref.split(","); |
525 |
8 |
for (String token : tokens) |
526 |
|
{ |
527 |
8 |
try |
528 |
|
{ |
529 |
8 |
patterns.add(Pattern.compile(token.toUpperCase())); |
530 |
|
} catch (PatternSyntaxException e) |
531 |
|
{ |
532 |
0 |
System.err.println("Invalid pattern ignored: " + token); |
533 |
|
} |
534 |
|
} |
535 |
8 |
return patterns; |
536 |
|
} |
537 |
|
|
538 |
|
|
539 |
|
|
540 |
|
|
541 |
|
|
542 |
|
@param |
543 |
|
|
|
|
| 60.9% |
Uncovered Elements: 9 (23) |
Complexity: 6 |
Complexity Density: 0.4 |
|
544 |
11 |
protected void transferAddedFeatures(SequenceI seq)... |
545 |
|
{ |
546 |
11 |
DBRefEntry[] dbrefs = seq.getDBRefs(); |
547 |
11 |
if (dbrefs == null) |
548 |
|
{ |
549 |
0 |
return; |
550 |
|
} |
551 |
11 |
for (DBRefEntry dbref : dbrefs) |
552 |
|
{ |
553 |
16 |
Mapping mapping = dbref.getMap(); |
554 |
16 |
if (mapping == null || mapping.getTo() == null) |
555 |
|
{ |
556 |
11 |
continue; |
557 |
|
} |
558 |
|
|
559 |
5 |
SequenceI mapTo = mapping.getTo(); |
560 |
5 |
MapList map = mapping.getMap(); |
561 |
5 |
if (map.getFromRatio() == 3) |
562 |
|
{ |
563 |
|
|
564 |
|
|
565 |
|
|
566 |
5 |
AlignmentUtils.computeProteinFeatures(seq, mapTo, map); |
567 |
|
} |
568 |
|
else |
569 |
|
{ |
570 |
|
|
571 |
|
|
572 |
|
|
573 |
0 |
List<SequenceFeature> features = seq.getFeatures() |
574 |
|
.getPositionalFeatures(SequenceOntologyI.SEQUENCE_VARIANT); |
575 |
0 |
for (SequenceFeature sf : features) |
576 |
|
{ |
577 |
0 |
if (FEATURE_GROUP_VCF.equals(sf.getFeatureGroup())) |
578 |
|
{ |
579 |
0 |
transferFeature(sf, mapTo, map); |
580 |
|
} |
581 |
|
} |
582 |
|
} |
583 |
|
} |
584 |
|
} |
585 |
|
|
586 |
|
|
587 |
|
|
588 |
|
|
589 |
|
|
590 |
|
@param |
591 |
|
@param |
592 |
|
@return |
593 |
|
|
|
|
| 81.8% |
Uncovered Elements: 2 (11) |
Complexity: 3 |
Complexity Density: 0.43 |
|
594 |
24 |
protected int loadSequenceVCF(SequenceI seq, String vcfAssembly)... |
595 |
|
{ |
596 |
24 |
VCFMap vcfMap = getVcfMap(seq, vcfAssembly); |
597 |
24 |
if (vcfMap == null) |
598 |
|
{ |
599 |
0 |
return 0; |
600 |
|
} |
601 |
|
|
602 |
|
|
603 |
|
|
604 |
|
|
605 |
24 |
SequenceI dss = seq.getDatasetSequence(); |
606 |
24 |
if (dss == null) |
607 |
|
{ |
608 |
3 |
dss = seq; |
609 |
|
} |
610 |
24 |
return addVcfVariants(dss, vcfMap); |
611 |
|
} |
612 |
|
|
613 |
|
|
614 |
|
|
615 |
|
|
616 |
|
@param |
617 |
|
@param |
618 |
|
@return |
619 |
|
|
|
|
| 42.9% |
Uncovered Elements: 28 (49) |
Complexity: 10 |
Complexity Density: 0.3 |
|
620 |
24 |
private VCFMap getVcfMap(SequenceI seq, String vcfAssembly)... |
621 |
|
{ |
622 |
|
|
623 |
|
|
624 |
|
|
625 |
24 |
VCFMap vcfMap = null; |
626 |
24 |
if (dictionary != null) |
627 |
|
{ |
628 |
3 |
vcfMap = getContigMap(seq); |
629 |
|
} |
630 |
24 |
if (vcfMap != null) |
631 |
|
{ |
632 |
3 |
return vcfMap; |
633 |
|
} |
634 |
|
|
635 |
|
|
636 |
|
|
637 |
|
|
638 |
|
|
639 |
21 |
GeneLociI seqCoords = seq.getGeneLoci(); |
640 |
21 |
if (seqCoords == null) |
641 |
|
{ |
642 |
0 |
Cache.log.warn(String.format( |
643 |
|
"Can't query VCF for %s as chromosome coordinates not known", |
644 |
|
seq.getName())); |
645 |
0 |
return null; |
646 |
|
} |
647 |
|
|
648 |
21 |
String species = seqCoords.getSpeciesId(); |
649 |
21 |
String chromosome = seqCoords.getChromosomeId(); |
650 |
21 |
String seqRef = seqCoords.getAssemblyId(); |
651 |
21 |
MapList map = seqCoords.getMap(); |
652 |
|
|
653 |
21 |
if (!vcfSpeciesMatchesSequence(vcfAssembly, species)) |
654 |
|
{ |
655 |
0 |
return null; |
656 |
|
} |
657 |
|
|
658 |
21 |
if (vcfAssemblyMatchesSequence(vcfAssembly, seqRef)) |
659 |
|
{ |
660 |
21 |
return new VCFMap(chromosome, map); |
661 |
|
} |
662 |
|
|
663 |
0 |
if (!"GRCh38".equalsIgnoreCase(seqRef) |
664 |
|
|| !vcfAssembly.contains("Homo_sapiens_assembly19")) |
665 |
|
{ |
666 |
0 |
return null; |
667 |
|
} |
668 |
|
|
669 |
|
|
670 |
|
|
671 |
|
|
672 |
|
|
673 |
|
|
674 |
|
|
675 |
|
|
676 |
0 |
List<int[]> toVcfRanges = new ArrayList<>(); |
677 |
0 |
List<int[]> fromSequenceRanges = new ArrayList<>(); |
678 |
0 |
String toRef = "GRCh37"; |
679 |
|
|
680 |
0 |
for (int[] range : map.getToRanges()) |
681 |
|
{ |
682 |
0 |
int[] fromRange = map.locateInFrom(range[0], range[1]); |
683 |
0 |
if (fromRange == null) |
684 |
|
{ |
685 |
|
|
686 |
0 |
continue; |
687 |
|
} |
688 |
|
|
689 |
0 |
int[] newRange = mapReferenceRange(range, chromosome, "human", seqRef, |
690 |
|
toRef); |
691 |
0 |
if (newRange == null) |
692 |
|
{ |
693 |
0 |
Cache.log.error( |
694 |
|
String.format("Failed to map %s:%s:%s:%d:%d to %s", species, |
695 |
|
chromosome, seqRef, range[0], range[1], toRef)); |
696 |
0 |
continue; |
697 |
|
} |
698 |
|
else |
699 |
|
{ |
700 |
0 |
toVcfRanges.add(newRange); |
701 |
0 |
fromSequenceRanges.add(fromRange); |
702 |
|
} |
703 |
|
} |
704 |
|
|
705 |
0 |
return new VCFMap(chromosome, |
706 |
|
new MapList(fromSequenceRanges, toVcfRanges, 1, 1)); |
707 |
|
} |
708 |
|
|
709 |
|
|
710 |
|
|
711 |
|
|
712 |
|
|
713 |
|
|
714 |
|
@param |
715 |
|
@return |
716 |
|
|
|
|
| 75% |
Uncovered Elements: 3 (12) |
Complexity: 3 |
Complexity Density: 0.38 |
|
717 |
3 |
private VCFMap getContigMap(SequenceI seq)... |
718 |
|
{ |
719 |
3 |
String id = seq.getName(); |
720 |
3 |
SAMSequenceRecord contig = dictionary.getSequence(id); |
721 |
3 |
if (contig != null) |
722 |
|
{ |
723 |
3 |
int len = seq.getLength(); |
724 |
3 |
if (len == contig.getSequenceLength()) |
725 |
|
{ |
726 |
3 |
MapList map = new MapList(new int[] { 1, len }, |
727 |
|
new int[] |
728 |
|
{ 1, len }, 1, 1); |
729 |
3 |
return new VCFMap(id, map); |
730 |
|
} |
731 |
|
} |
732 |
0 |
return null; |
733 |
|
} |
734 |
|
|
735 |
|
|
736 |
|
|
737 |
|
|
738 |
|
|
739 |
|
@param |
740 |
|
@param |
741 |
|
@return |
742 |
|
|
|
|
| 60% |
Uncovered Elements: 2 (5) |
Complexity: 3 |
Complexity Density: 1 |
|
743 |
21 |
private boolean vcfAssemblyMatchesSequence(String vcfAssembly,... |
744 |
|
String seqRef) |
745 |
|
{ |
746 |
|
|
747 |
|
|
748 |
|
|
749 |
21 |
if ("GRCh38".equalsIgnoreCase(seqRef) |
750 |
|
&& vcfAssembly.contains("Homo_sapiens_assembly19")) |
751 |
|
{ |
752 |
0 |
return false; |
753 |
|
} |
754 |
21 |
return true; |
755 |
|
} |
756 |
|
|
757 |
|
|
758 |
|
|
759 |
|
|
760 |
|
|
761 |
|
@param |
762 |
|
@param |
763 |
|
@return |
764 |
|
|
|
|
| 33.3% |
Uncovered Elements: 6 (9) |
Complexity: 5 |
Complexity Density: 1 |
|
765 |
21 |
boolean vcfSpeciesMatchesSequence(String vcfAssembly, String speciesId)... |
766 |
|
{ |
767 |
|
|
768 |
|
|
769 |
|
|
770 |
|
|
771 |
|
|
772 |
|
|
773 |
|
|
774 |
21 |
if (vcfAssembly.contains("Homo_sapiens") |
775 |
|
&& "HOMO_SAPIENS".equals(speciesId)) |
776 |
|
{ |
777 |
21 |
return true; |
778 |
|
} |
779 |
|
|
780 |
0 |
if (vcfAssembly.contains("c_elegans") |
781 |
|
&& "CAENORHABDITIS_ELEGANS".equals(speciesId)) |
782 |
|
{ |
783 |
0 |
return true; |
784 |
|
} |
785 |
|
|
786 |
|
|
787 |
|
|
788 |
0 |
return false; |
789 |
|
} |
790 |
|
|
791 |
|
|
792 |
|
|
793 |
|
|
794 |
|
|
795 |
|
|
796 |
|
@param |
797 |
|
@param |
798 |
|
|
799 |
|
@return |
800 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (19) |
Complexity: 3 |
Complexity Density: 0.2 |
|
801 |
24 |
protected int addVcfVariants(SequenceI seq, VCFMap map)... |
802 |
|
{ |
803 |
24 |
boolean forwardStrand = map.map.isToForwardStrand(); |
804 |
|
|
805 |
|
|
806 |
|
|
807 |
|
|
808 |
24 |
int count = 0; |
809 |
|
|
810 |
24 |
for (int[] range : map.map.getToRanges()) |
811 |
|
{ |
812 |
39 |
int vcfStart = Math.min(range[0], range[1]); |
813 |
39 |
int vcfEnd = Math.max(range[0], range[1]); |
814 |
39 |
CloseableIterator<VariantContext> variants = reader |
815 |
|
.query(map.chromosome, vcfStart, vcfEnd); |
816 |
67 |
while (variants.hasNext()) |
817 |
|
{ |
818 |
28 |
VariantContext variant = variants.next(); |
819 |
|
|
820 |
28 |
int[] featureRange = map.map.locateInFrom(variant.getStart(), |
821 |
|
variant.getEnd()); |
822 |
|
|
823 |
28 |
if (featureRange != null) |
824 |
|
{ |
825 |
26 |
int featureStart = Math.min(featureRange[0], featureRange[1]); |
826 |
26 |
int featureEnd = Math.max(featureRange[0], featureRange[1]); |
827 |
26 |
count += addAlleleFeatures(seq, variant, featureStart, featureEnd, |
828 |
|
forwardStrand); |
829 |
|
} |
830 |
|
} |
831 |
39 |
variants.close(); |
832 |
|
} |
833 |
|
|
834 |
24 |
return count; |
835 |
|
} |
836 |
|
|
837 |
|
|
838 |
|
|
839 |
|
|
840 |
|
|
841 |
|
@param |
842 |
|
@param |
843 |
|
@return |
844 |
|
|
|
|
| 87.5% |
Uncovered Elements: 1 (8) |
Complexity: 3 |
Complexity Density: 0.5 |
|
845 |
43 |
protected float getAlleleFrequency(VariantContext variant, int alleleIndex)... |
846 |
|
{ |
847 |
43 |
float score = 0f; |
848 |
43 |
String attributeValue = getAttributeValue(variant, |
849 |
|
ALLELE_FREQUENCY_KEY, alleleIndex); |
850 |
43 |
if (attributeValue != null) |
851 |
|
{ |
852 |
43 |
try |
853 |
|
{ |
854 |
43 |
score = Float.parseFloat(attributeValue); |
855 |
|
} catch (NumberFormatException e) |
856 |
|
{ |
857 |
|
|
858 |
|
} |
859 |
|
} |
860 |
|
|
861 |
43 |
return score; |
862 |
|
} |
863 |
|
|
864 |
|
|
865 |
|
|
866 |
|
|
867 |
|
@param |
868 |
|
@param |
869 |
|
@param |
870 |
|
@return |
871 |
|
|
|
|
| 80% |
Uncovered Elements: 2 (10) |
Complexity: 3 |
Complexity Density: 0.5 |
|
872 |
119 |
protected String getAttributeValue(VariantContext variant,... |
873 |
|
String attributeName, int alleleIndex) |
874 |
|
{ |
875 |
119 |
Object att = variant.getAttribute(attributeName); |
876 |
|
|
877 |
119 |
if (att instanceof String) |
878 |
|
{ |
879 |
40 |
return (String) att; |
880 |
|
} |
881 |
79 |
else if (att instanceof ArrayList) |
882 |
|
{ |
883 |
79 |
return ((List<String>) att).get(alleleIndex); |
884 |
|
} |
885 |
|
|
886 |
0 |
return null; |
887 |
|
} |
888 |
|
|
889 |
|
|
890 |
|
|
891 |
|
|
892 |
|
|
893 |
|
@param |
894 |
|
@param |
895 |
|
@param |
896 |
|
@param |
897 |
|
@param |
898 |
|
@return |
899 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (7) |
Complexity: 2 |
Complexity Density: 0.4 |
|
900 |
26 |
protected int addAlleleFeatures(SequenceI seq, VariantContext variant,... |
901 |
|
int featureStart, int featureEnd, boolean forwardStrand) |
902 |
|
{ |
903 |
26 |
int added = 0; |
904 |
|
|
905 |
|
|
906 |
|
|
907 |
|
|
908 |
|
|
909 |
26 |
int altAlleleCount = variant.getAlternateAlleles().size(); |
910 |
69 |
for (int i = 0; i < altAlleleCount; i++) |
911 |
|
{ |
912 |
43 |
added += addAlleleFeature(seq, variant, i, featureStart, featureEnd, |
913 |
|
forwardStrand); |
914 |
|
} |
915 |
26 |
return added; |
916 |
|
} |
917 |
|
|
918 |
|
|
919 |
|
|
920 |
|
|
921 |
|
|
922 |
|
|
923 |
|
|
924 |
|
@param |
925 |
|
@param |
926 |
|
@param |
927 |
|
|
928 |
|
@param |
929 |
|
@param |
930 |
|
@param |
931 |
|
@return |
932 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (34) |
Complexity: 7 |
Complexity Density: 0.27 |
|
933 |
43 |
protected int addAlleleFeature(SequenceI seq, VariantContext variant,... |
934 |
|
int altAlleleIndex, int featureStart, int featureEnd, |
935 |
|
boolean forwardStrand) |
936 |
|
{ |
937 |
43 |
String reference = variant.getReference().getBaseString(); |
938 |
43 |
Allele alt = variant.getAlternateAllele(altAlleleIndex); |
939 |
43 |
String allele = alt.getBaseString(); |
940 |
|
|
941 |
|
|
942 |
|
|
943 |
|
|
944 |
|
|
945 |
43 |
int referenceLength = reference.length(); |
946 |
43 |
if (!forwardStrand && allele.length() > referenceLength |
947 |
|
&& allele.startsWith(reference)) |
948 |
|
{ |
949 |
4 |
featureStart -= referenceLength; |
950 |
4 |
featureEnd = featureStart; |
951 |
4 |
char insertAfter = seq.getCharAt(featureStart - seq.getStart()); |
952 |
4 |
reference = Dna.reverseComplement(String.valueOf(insertAfter)); |
953 |
4 |
allele = allele.substring(referenceLength) + reference; |
954 |
|
} |
955 |
|
|
956 |
|
|
957 |
|
|
958 |
|
|
959 |
|
|
960 |
43 |
StringBuilder sb = new StringBuilder(); |
961 |
43 |
sb.append(forwardStrand ? reference : Dna.reverseComplement(reference)); |
962 |
43 |
sb.append(COMMA); |
963 |
43 |
sb.append(forwardStrand ? allele : Dna.reverseComplement(allele)); |
964 |
43 |
String alleles = sb.toString(); |
965 |
|
|
966 |
|
|
967 |
|
|
968 |
|
|
969 |
|
|
970 |
43 |
String consequence = getConsequenceForAlleleAndFeature(variant, CSQ_FIELD, |
971 |
|
altAlleleIndex, csqAlleleFieldIndex, |
972 |
|
csqAlleleNumberFieldIndex, seq.getName().toLowerCase(), |
973 |
|
csqFeatureFieldIndex); |
974 |
|
|
975 |
|
|
976 |
|
|
977 |
|
|
978 |
43 |
String type = SequenceOntologyI.SEQUENCE_VARIANT; |
979 |
43 |
if (consequence != null) |
980 |
|
{ |
981 |
7 |
type = getOntologyTerm(consequence); |
982 |
|
} |
983 |
|
|
984 |
43 |
float score = getAlleleFrequency(variant, altAlleleIndex); |
985 |
|
|
986 |
43 |
SequenceFeature sf = new SequenceFeature(type, alleles, featureStart, |
987 |
|
featureEnd, score, FEATURE_GROUP_VCF); |
988 |
43 |
sf.setSource(sourceId); |
989 |
|
|
990 |
43 |
sf.setValue(Gff3Helper.ALLELES, alleles); |
991 |
|
|
992 |
43 |
addAlleleProperties(variant, sf, altAlleleIndex, consequence); |
993 |
|
|
994 |
43 |
seq.addSequenceFeature(sf); |
995 |
|
|
996 |
43 |
return 1; |
997 |
|
} |
998 |
|
|
999 |
|
|
1000 |
|
|
1001 |
|
|
1002 |
|
|
1003 |
|
|
1004 |
|
|
1005 |
|
|
1006 |
|
|
1007 |
|
|
1008 |
|
@param |
1009 |
|
@return |
1010 |
|
@see |
1011 |
|
|
|
|
| 68.2% |
Uncovered Elements: 7 (22) |
Complexity: 6 |
Complexity Density: 0.5 |
|
1012 |
7 |
String getOntologyTerm(String consequence)... |
1013 |
|
{ |
1014 |
7 |
String type = SequenceOntologyI.SEQUENCE_VARIANT; |
1015 |
|
|
1016 |
|
|
1017 |
|
|
1018 |
|
|
1019 |
|
|
1020 |
7 |
if (csqAlleleFieldIndex == -1) |
1021 |
|
{ |
1022 |
|
|
1023 |
|
|
1024 |
|
|
1025 |
0 |
return type; |
1026 |
|
} |
1027 |
|
|
1028 |
7 |
if (consequence != null) |
1029 |
|
{ |
1030 |
7 |
String[] csqFields = consequence.split(PIPE_REGEX); |
1031 |
7 |
if (csqFields.length > csqConsequenceFieldIndex) |
1032 |
|
{ |
1033 |
7 |
type = csqFields[csqConsequenceFieldIndex]; |
1034 |
|
} |
1035 |
|
} |
1036 |
|
else |
1037 |
|
{ |
1038 |
|
|
1039 |
|
} |
1040 |
|
|
1041 |
|
|
1042 |
|
|
1043 |
|
|
1044 |
|
|
1045 |
7 |
if (type != null) |
1046 |
|
{ |
1047 |
7 |
int pos = type.indexOf('&'); |
1048 |
7 |
if (pos > 0) |
1049 |
|
{ |
1050 |
0 |
type = type.substring(0, pos); |
1051 |
|
} |
1052 |
|
} |
1053 |
7 |
return type; |
1054 |
|
} |
1055 |
|
|
1056 |
|
|
1057 |
|
|
1058 |
|
|
1059 |
|
|
1060 |
|
|
1061 |
|
|
1062 |
|
|
1063 |
|
|
1064 |
|
|
1065 |
|
|
1066 |
|
|
1067 |
|
|
1068 |
|
|
1069 |
|
@param |
1070 |
|
@param |
1071 |
|
@param |
1072 |
|
@param |
1073 |
|
@param |
1074 |
|
@param |
1075 |
|
@param |
1076 |
|
@return |
1077 |
|
|
|
|
| 87.5% |
Uncovered Elements: 3 (24) |
Complexity: 9 |
Complexity Density: 0.64 |
|
1078 |
43 |
private String getConsequenceForAlleleAndFeature(VariantContext variant,... |
1079 |
|
String vcfInfoId, int altAlleleIndex, int alleleFieldIndex, |
1080 |
|
int alleleNumberFieldIndex, |
1081 |
|
String seqName, int featureFieldIndex) |
1082 |
|
{ |
1083 |
43 |
if (alleleFieldIndex == -1 || featureFieldIndex == -1) |
1084 |
|
{ |
1085 |
29 |
return null; |
1086 |
|
} |
1087 |
14 |
Object value = variant.getAttribute(vcfInfoId); |
1088 |
|
|
1089 |
14 |
if (value == null || !(value instanceof List<?>)) |
1090 |
|
{ |
1091 |
0 |
return null; |
1092 |
|
} |
1093 |
|
|
1094 |
|
|
1095 |
|
|
1096 |
|
|
1097 |
|
|
1098 |
14 |
List<String> consequences = (List<String>) value; |
1099 |
|
|
1100 |
14 |
for (String consequence : consequences) |
1101 |
|
{ |
1102 |
41 |
String[] csqFields = consequence.split(PIPE_REGEX); |
1103 |
41 |
if (csqFields.length > featureFieldIndex) |
1104 |
|
{ |
1105 |
41 |
String featureIdentifier = csqFields[featureFieldIndex]; |
1106 |
41 |
if (featureIdentifier.length() > 4 |
1107 |
|
&& seqName.indexOf(featureIdentifier.toLowerCase()) > -1) |
1108 |
|
{ |
1109 |
|
|
1110 |
|
|
1111 |
|
|
1112 |
10 |
if (matchAllele(variant, altAlleleIndex, csqFields, |
1113 |
|
alleleFieldIndex, alleleNumberFieldIndex)) |
1114 |
|
{ |
1115 |
7 |
return consequence; |
1116 |
|
} |
1117 |
|
} |
1118 |
|
} |
1119 |
|
} |
1120 |
7 |
return null; |
1121 |
|
} |
1122 |
|
|
|
|
| 43.8% |
Uncovered Elements: 9 (16) |
Complexity: 5 |
Complexity Density: 0.5 |
|
1123 |
10 |
private boolean matchAllele(VariantContext variant, int altAlleleIndex,... |
1124 |
|
String[] csqFields, int alleleFieldIndex, |
1125 |
|
int alleleNumberFieldIndex) |
1126 |
|
{ |
1127 |
|
|
1128 |
|
|
1129 |
|
|
1130 |
|
|
1131 |
10 |
if (alleleNumberFieldIndex > -1) |
1132 |
|
{ |
1133 |
0 |
if (csqFields.length <= alleleNumberFieldIndex) |
1134 |
|
{ |
1135 |
0 |
return false; |
1136 |
|
} |
1137 |
0 |
String alleleNum = csqFields[alleleNumberFieldIndex]; |
1138 |
0 |
return String.valueOf(altAlleleIndex + 1).equals(alleleNum); |
1139 |
|
} |
1140 |
|
|
1141 |
|
|
1142 |
|
|
1143 |
|
|
1144 |
10 |
if (alleleFieldIndex > -1 && csqFields.length > alleleFieldIndex) |
1145 |
|
{ |
1146 |
10 |
String csqAllele = csqFields[alleleFieldIndex]; |
1147 |
10 |
String vcfAllele = variant.getAlternateAllele(altAlleleIndex) |
1148 |
|
.getBaseString(); |
1149 |
10 |
return csqAllele.equals(vcfAllele); |
1150 |
|
} |
1151 |
0 |
return false; |
1152 |
|
} |
1153 |
|
|
1154 |
|
|
1155 |
|
|
1156 |
|
|
1157 |
|
@param |
1158 |
|
@param |
1159 |
|
@param |
1160 |
|
|
1161 |
|
@param |
1162 |
|
|
1163 |
|
|
1164 |
|
|
|
|
| 86.1% |
Uncovered Elements: 5 (36) |
Complexity: 8 |
Complexity Density: 0.36 |
|
1165 |
43 |
protected void addAlleleProperties(VariantContext variant,... |
1166 |
|
SequenceFeature sf, final int altAlelleIndex, String consequence) |
1167 |
|
{ |
1168 |
43 |
Map<String, Object> atts = variant.getAttributes(); |
1169 |
|
|
1170 |
43 |
for (Entry<String, Object> att : atts.entrySet()) |
1171 |
|
{ |
1172 |
142 |
String key = att.getKey(); |
1173 |
|
|
1174 |
|
|
1175 |
|
|
1176 |
|
|
1177 |
|
|
1178 |
142 |
if (CSQ_FIELD.equals(key)) |
1179 |
|
{ |
1180 |
14 |
addConsequences(variant, sf, consequence); |
1181 |
14 |
continue; |
1182 |
|
} |
1183 |
|
|
1184 |
|
|
1185 |
|
|
1186 |
|
|
1187 |
128 |
if (!vcfFieldsOfInterest.contains(key)) |
1188 |
|
{ |
1189 |
38 |
continue; |
1190 |
|
} |
1191 |
|
|
1192 |
|
|
1193 |
|
|
1194 |
|
|
1195 |
90 |
if (!vcfFieldsOfInterest.contains(key)) |
1196 |
|
{ |
1197 |
0 |
continue; |
1198 |
|
} |
1199 |
|
|
1200 |
|
|
1201 |
|
|
1202 |
|
|
1203 |
|
|
1204 |
|
|
1205 |
90 |
VCFInfoHeaderLine infoHeader = header.getInfoHeaderLine(key); |
1206 |
90 |
if (infoHeader == null) |
1207 |
|
{ |
1208 |
|
|
1209 |
|
|
1210 |
|
|
1211 |
|
|
1212 |
0 |
continue; |
1213 |
|
} |
1214 |
|
|
1215 |
90 |
VCFHeaderLineCount number = infoHeader.getCountType(); |
1216 |
90 |
int index = altAlelleIndex; |
1217 |
90 |
if (number == VCFHeaderLineCount.R) |
1218 |
|
{ |
1219 |
|
|
1220 |
|
|
1221 |
|
|
1222 |
|
|
1223 |
14 |
index++; |
1224 |
|
} |
1225 |
76 |
else if (number != VCFHeaderLineCount.A) |
1226 |
|
{ |
1227 |
|
|
1228 |
|
|
1229 |
|
|
1230 |
14 |
continue; |
1231 |
|
} |
1232 |
|
|
1233 |
|
|
1234 |
|
|
1235 |
|
|
1236 |
76 |
String value = getAttributeValue(variant, key, index); |
1237 |
76 |
if (value != null) |
1238 |
|
{ |
1239 |
76 |
sf.setValue(key, value); |
1240 |
|
} |
1241 |
|
} |
1242 |
|
} |
1243 |
|
|
1244 |
|
|
1245 |
|
|
1246 |
|
|
1247 |
|
|
1248 |
|
|
1249 |
|
|
1250 |
|
|
1251 |
|
|
1252 |
|
@param |
1253 |
|
@param |
1254 |
|
@param |
1255 |
|
|
|
|
| 81.5% |
Uncovered Elements: 5 (27) |
Complexity: 9 |
Complexity Density: 0.53 |
|
1256 |
14 |
protected void addConsequences(VariantContext variant, SequenceFeature sf,... |
1257 |
|
String myConsequence) |
1258 |
|
{ |
1259 |
14 |
Object value = variant.getAttribute(CSQ_FIELD); |
1260 |
|
|
1261 |
14 |
if (value == null || !(value instanceof List<?>)) |
1262 |
|
{ |
1263 |
0 |
return; |
1264 |
|
} |
1265 |
|
|
1266 |
14 |
List<String> consequences = (List<String>) value; |
1267 |
|
|
1268 |
|
|
1269 |
|
|
1270 |
|
|
1271 |
|
|
1272 |
14 |
Map<String, String> csqValues = new HashMap<>(); |
1273 |
|
|
1274 |
14 |
for (String consequence : consequences) |
1275 |
|
{ |
1276 |
50 |
if (myConsequence == null || myConsequence.equals(consequence)) |
1277 |
|
{ |
1278 |
31 |
String[] csqFields = consequence.split(PIPE_REGEX); |
1279 |
|
|
1280 |
|
|
1281 |
|
|
1282 |
|
|
1283 |
|
|
1284 |
31 |
int i = 0; |
1285 |
31 |
for (String field : csqFields) |
1286 |
|
{ |
1287 |
279 |
if (field != null && field.length() > 0) |
1288 |
|
{ |
1289 |
279 |
String id = vepFieldsOfInterest.get(i); |
1290 |
279 |
if (id != null) |
1291 |
|
{ |
1292 |
279 |
csqValues.put(id, field); |
1293 |
|
} |
1294 |
|
} |
1295 |
279 |
i++; |
1296 |
|
} |
1297 |
|
} |
1298 |
|
} |
1299 |
|
|
1300 |
14 |
if (!csqValues.isEmpty()) |
1301 |
|
{ |
1302 |
14 |
sf.setValue(CSQ_FIELD, csqValues); |
1303 |
|
} |
1304 |
|
} |
1305 |
|
|
1306 |
|
|
1307 |
|
|
1308 |
|
|
1309 |
|
|
1310 |
|
@param |
1311 |
|
@return |
1312 |
|
|
|
|
| 0% |
Uncovered Elements: 1 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
1313 |
0 |
protected String complement(byte[] reference)... |
1314 |
|
{ |
1315 |
0 |
return String.valueOf(Dna.getComplement((char) reference[0])); |
1316 |
|
} |
1317 |
|
|
1318 |
|
|
1319 |
|
|
1320 |
|
|
1321 |
|
|
1322 |
|
|
1323 |
|
|
1324 |
|
|
1325 |
|
|
1326 |
|
|
1327 |
|
|
1328 |
|
|
1329 |
|
@param |
1330 |
|
|
1331 |
|
@param |
1332 |
|
@param |
1333 |
|
@param |
1334 |
|
|
1335 |
|
@param |
1336 |
|
|
1337 |
|
@return |
1338 |
|
|
|
|
| 0% |
Uncovered Elements: 18 (18) |
Complexity: 4 |
Complexity Density: 0.33 |
|
1339 |
0 |
protected int[] mapReferenceRange(int[] queryRange, String chromosome,... |
1340 |
|
String species, String fromRef, String toRef) |
1341 |
|
{ |
1342 |
|
|
1343 |
|
|
1344 |
|
|
1345 |
|
|
1346 |
0 |
int[] mappedRange = findSubsumedRangeMapping(queryRange, chromosome, |
1347 |
|
species, fromRef, toRef); |
1348 |
0 |
if (mappedRange != null) |
1349 |
|
{ |
1350 |
0 |
return mappedRange; |
1351 |
|
} |
1352 |
|
|
1353 |
|
|
1354 |
|
|
1355 |
|
|
1356 |
0 |
EnsemblMap mapper = new EnsemblMap(); |
1357 |
0 |
int[] mapping = mapper.getAssemblyMapping(species, chromosome, fromRef, |
1358 |
|
toRef, queryRange); |
1359 |
|
|
1360 |
0 |
if (mapping == null) |
1361 |
|
{ |
1362 |
|
|
1363 |
0 |
return null; |
1364 |
|
} |
1365 |
|
|
1366 |
|
|
1367 |
|
|
1368 |
|
|
1369 |
0 |
String key = makeRangesKey(chromosome, species, fromRef, toRef); |
1370 |
0 |
if (!assemblyMappings.containsKey(key)) |
1371 |
|
{ |
1372 |
0 |
assemblyMappings.put(key, new HashMap<int[], int[]>()); |
1373 |
|
} |
1374 |
|
|
1375 |
0 |
assemblyMappings.get(key).put(queryRange, mapping); |
1376 |
|
|
1377 |
0 |
return mapping; |
1378 |
|
} |
1379 |
|
|
1380 |
|
|
1381 |
|
|
1382 |
|
|
1383 |
|
|
1384 |
|
|
1385 |
|
|
1386 |
|
|
1387 |
|
|
1388 |
|
|
1389 |
|
|
1390 |
|
|
1391 |
|
|
1392 |
|
|
1393 |
|
|
1394 |
|
@param |
1395 |
|
@param |
1396 |
|
@param |
1397 |
|
@param |
1398 |
|
@param |
1399 |
|
@return |
1400 |
|
|
|
|
| 0% |
Uncovered Elements: 19 (19) |
Complexity: 4 |
Complexity Density: 0.31 |
|
1401 |
0 |
protected int[] findSubsumedRangeMapping(int[] queryRange, String chromosome,... |
1402 |
|
String species, String fromRef, String toRef) |
1403 |
|
{ |
1404 |
0 |
String key = makeRangesKey(chromosome, species, fromRef, toRef); |
1405 |
0 |
if (assemblyMappings.containsKey(key)) |
1406 |
|
{ |
1407 |
0 |
Map<int[], int[]> mappedRanges = assemblyMappings.get(key); |
1408 |
0 |
for (Entry<int[], int[]> mappedRange : mappedRanges.entrySet()) |
1409 |
|
{ |
1410 |
0 |
int[] fromRange = mappedRange.getKey(); |
1411 |
0 |
int[] toRange = mappedRange.getValue(); |
1412 |
0 |
if (fromRange[1] - fromRange[0] == toRange[1] - toRange[0]) |
1413 |
|
{ |
1414 |
|
|
1415 |
|
|
1416 |
|
|
1417 |
0 |
if (MappingUtils.rangeContains(fromRange, queryRange)) |
1418 |
|
{ |
1419 |
|
|
1420 |
|
|
1421 |
|
|
1422 |
0 |
int offset = queryRange[0] - fromRange[0]; |
1423 |
0 |
int mappedRangeFrom = toRange[0] + offset; |
1424 |
0 |
int mappedRangeTo = mappedRangeFrom + (queryRange[1] - queryRange[0]); |
1425 |
0 |
return new int[] { mappedRangeFrom, mappedRangeTo }; |
1426 |
|
} |
1427 |
|
} |
1428 |
|
} |
1429 |
|
} |
1430 |
0 |
return null; |
1431 |
|
} |
1432 |
|
|
1433 |
|
|
1434 |
|
|
1435 |
|
|
1436 |
|
|
1437 |
|
|
1438 |
|
@param |
1439 |
|
@param |
1440 |
|
@param |
1441 |
|
|
1442 |
|
|
|
|
| 0% |
Uncovered Elements: 9 (9) |
Complexity: 2 |
Complexity Density: 0.29 |
|
1443 |
0 |
protected void transferFeature(SequenceFeature sf,... |
1444 |
|
SequenceI targetSequence, MapList mapping) |
1445 |
|
{ |
1446 |
0 |
int[] mappedRange = mapping.locateInTo(sf.getBegin(), sf.getEnd()); |
1447 |
|
|
1448 |
0 |
if (mappedRange != null) |
1449 |
|
{ |
1450 |
0 |
String group = sf.getFeatureGroup(); |
1451 |
0 |
int newBegin = Math.min(mappedRange[0], mappedRange[1]); |
1452 |
0 |
int newEnd = Math.max(mappedRange[0], mappedRange[1]); |
1453 |
0 |
SequenceFeature copy = new SequenceFeature(sf, newBegin, newEnd, |
1454 |
|
group, sf.getScore()); |
1455 |
0 |
targetSequence.addSequenceFeature(copy); |
1456 |
|
} |
1457 |
|
} |
1458 |
|
|
1459 |
|
|
1460 |
|
|
1461 |
|
|
1462 |
|
@param |
1463 |
|
@param |
1464 |
|
@param |
1465 |
|
@param |
1466 |
|
@return |
1467 |
|
|
|
|
| 0% |
Uncovered Elements: 1 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
1468 |
0 |
protected static String makeRangesKey(String chromosome, String species,... |
1469 |
|
String fromRef, String toRef) |
1470 |
|
{ |
1471 |
0 |
return species + EXCL + chromosome + EXCL + fromRef + EXCL |
1472 |
|
+ toRef; |
1473 |
|
} |
1474 |
|
} |