1 |
|
|
2 |
|
|
3 |
|
|
4 |
|
|
5 |
|
|
6 |
|
|
7 |
|
|
8 |
|
|
9 |
|
|
10 |
|
|
11 |
|
|
12 |
|
|
13 |
|
|
14 |
|
|
15 |
|
|
16 |
|
|
17 |
|
|
18 |
|
|
19 |
|
|
20 |
|
|
21 |
|
package jalview.analysis; |
22 |
|
|
23 |
|
import jalview.datamodel.AlignedCodonFrame; |
24 |
|
import jalview.datamodel.Alignment; |
25 |
|
import jalview.datamodel.AlignmentI; |
26 |
|
import jalview.datamodel.DBRefEntry; |
27 |
|
import jalview.datamodel.DBRefSource; |
28 |
|
import jalview.datamodel.Mapping; |
29 |
|
import jalview.datamodel.Sequence; |
30 |
|
import jalview.datamodel.SequenceFeature; |
31 |
|
import jalview.datamodel.SequenceI; |
32 |
|
import jalview.util.DBRefUtils; |
33 |
|
import jalview.util.MapList; |
34 |
|
import jalview.ws.SequenceFetcherFactory; |
35 |
|
import jalview.ws.seqfetcher.ASequenceFetcher; |
36 |
|
|
37 |
|
import java.util.ArrayList; |
38 |
|
import java.util.Iterator; |
39 |
|
import java.util.List; |
40 |
|
|
41 |
|
|
42 |
|
|
43 |
|
|
44 |
|
@author |
45 |
|
|
46 |
|
|
|
|
| 46.3% |
Uncovered Elements: 262 (488) |
Complexity: 137 |
Complexity Density: 0.48 |
|
47 |
|
public class CrossRef |
48 |
|
{ |
49 |
|
|
50 |
|
|
51 |
|
|
52 |
|
|
53 |
|
|
54 |
|
private AlignmentI dataset; |
55 |
|
|
56 |
|
|
57 |
|
|
58 |
|
|
59 |
|
private SequenceI[] fromSeqs; |
60 |
|
|
61 |
|
|
62 |
|
|
63 |
|
|
64 |
|
SequenceIdMatcher matcher; |
65 |
|
|
66 |
|
|
67 |
|
|
68 |
|
|
69 |
|
List<SequenceI> rseqs; |
70 |
|
|
71 |
|
|
72 |
|
|
73 |
|
|
74 |
|
@param |
75 |
|
|
76 |
|
@param |
77 |
|
|
78 |
|
|
79 |
|
|
|
|
| 75% |
Uncovered Elements: 1 (4) |
Complexity: 2 |
Complexity Density: 1 |
|
80 |
682 |
public CrossRef(SequenceI[] seqs, AlignmentI ds)... |
81 |
|
{ |
82 |
682 |
fromSeqs = seqs; |
83 |
682 |
dataset = ds.getDataset() == null ? ds : ds.getDataset(); |
84 |
|
} |
85 |
|
|
86 |
|
|
87 |
|
|
88 |
|
|
89 |
|
|
90 |
|
|
91 |
|
|
92 |
|
|
93 |
|
|
94 |
|
|
95 |
|
@param |
96 |
|
|
97 |
|
|
98 |
|
@return |
99 |
|
|
|
|
| 92.3% |
Uncovered Elements: 1 (13) |
Complexity: 3 |
Complexity Density: 0.33 |
|
100 |
675 |
public List<String> findXrefSourcesForSequences(boolean dna)... |
101 |
|
{ |
102 |
675 |
List<String> sources = new ArrayList<>(); |
103 |
675 |
for (SequenceI seq : fromSeqs) |
104 |
|
{ |
105 |
6684 |
if (seq != null) |
106 |
|
{ |
107 |
6684 |
findXrefSourcesForSequence(seq, dna, sources); |
108 |
|
} |
109 |
|
} |
110 |
675 |
sources.remove(DBRefSource.EMBL); |
111 |
|
|
112 |
675 |
if (dna) |
113 |
|
{ |
114 |
91 |
sources.remove(DBRefSource.ENSEMBL); |
115 |
|
|
116 |
|
|
117 |
91 |
sources.remove(DBRefSource.ENSEMBLGENOMES); |
118 |
|
} |
119 |
|
|
120 |
675 |
return sources; |
121 |
|
} |
122 |
|
|
123 |
|
|
124 |
|
|
125 |
|
|
126 |
|
|
127 |
|
|
128 |
|
|
129 |
|
|
130 |
|
|
131 |
|
|
132 |
|
@param |
133 |
|
|
134 |
|
@param |
135 |
|
|
136 |
|
|
137 |
|
@param |
138 |
|
|
139 |
|
|
|
|
| 90.9% |
Uncovered Elements: 1 (11) |
Complexity: 2 |
Complexity Density: 0.22 |
|
140 |
6684 |
void findXrefSourcesForSequence(SequenceI seq, boolean fromDna,... |
141 |
|
List<String> sources) |
142 |
|
{ |
143 |
|
|
144 |
|
|
145 |
|
|
146 |
6684 |
List<DBRefEntry> rfs = DBRefUtils.selectDbRefs(!fromDna, |
147 |
|
seq.getDBRefs()); |
148 |
6684 |
addXrefsToSources(rfs, sources); |
149 |
6684 |
if (dataset != null) |
150 |
|
{ |
151 |
|
|
152 |
|
|
153 |
|
|
154 |
6684 |
List<DBRefEntry> lrfs = DBRefUtils.selectDbRefs(fromDna, |
155 |
|
seq.getDBRefs()); |
156 |
6684 |
List<SequenceI> foundSeqs = new ArrayList<>(); |
157 |
|
|
158 |
|
|
159 |
|
|
160 |
|
|
161 |
|
|
162 |
6684 |
searchDatasetXrefs(fromDna, seq, lrfs, foundSeqs, null); |
163 |
|
|
164 |
|
|
165 |
|
|
166 |
|
|
167 |
6684 |
for (SequenceI rs : foundSeqs) |
168 |
|
{ |
169 |
272 |
List<DBRefEntry> xrs = DBRefUtils.selectDbRefs(!fromDna, |
170 |
|
rs.getDBRefs()); |
171 |
272 |
addXrefsToSources(xrs, sources); |
172 |
|
} |
173 |
|
} |
174 |
|
} |
175 |
|
|
176 |
|
|
177 |
|
|
178 |
|
|
179 |
|
|
180 |
|
@param |
181 |
|
@param |
182 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (9) |
Complexity: 3 |
Complexity Density: 0.6 |
|
183 |
6956 |
void addXrefsToSources(List<DBRefEntry> xrefs, List<String> sources)... |
184 |
|
{ |
185 |
6956 |
if (xrefs != null) |
186 |
|
{ |
187 |
364 |
for (DBRefEntry ref : xrefs) |
188 |
|
{ |
189 |
|
|
190 |
|
|
191 |
|
|
192 |
1358 |
String source = DBRefUtils.getCanonicalName(ref.getSource()); |
193 |
1358 |
if (!sources.contains(source)) |
194 |
|
{ |
195 |
21 |
sources.add(source); |
196 |
|
} |
197 |
|
} |
198 |
|
} |
199 |
|
} |
200 |
|
|
201 |
|
|
202 |
|
|
203 |
|
|
204 |
|
|
205 |
|
|
206 |
|
|
207 |
|
|
208 |
|
|
209 |
|
|
210 |
|
|
211 |
|
|
212 |
|
|
213 |
|
|
214 |
|
|
215 |
|
|
216 |
|
|
217 |
|
@param |
218 |
|
@return |
219 |
|
|
|
|
| 82% |
Uncovered Elements: 18 (100) |
Complexity: 31 |
Complexity Density: 0.53 |
|
220 |
6 |
public Alignment findXrefSequences(String source, boolean fromDna)... |
221 |
|
{ |
222 |
|
|
223 |
6 |
rseqs = new ArrayList<>(); |
224 |
6 |
AlignedCodonFrame cf = new AlignedCodonFrame(); |
225 |
6 |
matcher = new SequenceIdMatcher(dataset.getSequences()); |
226 |
|
|
227 |
6 |
for (SequenceI seq : fromSeqs) |
228 |
|
{ |
229 |
48 |
SequenceI dss = seq; |
230 |
93 |
while (dss.getDatasetSequence() != null) |
231 |
|
{ |
232 |
45 |
dss = dss.getDatasetSequence(); |
233 |
|
} |
234 |
48 |
boolean found = false; |
235 |
48 |
List<DBRefEntry> xrfs = DBRefUtils.selectDbRefs(!fromDna, |
236 |
|
dss.getDBRefs()); |
237 |
|
|
238 |
|
|
239 |
|
|
240 |
48 |
if ((xrfs == null || xrfs.size() == 0) && dataset != null) |
241 |
|
{ |
242 |
|
|
243 |
|
|
244 |
|
|
245 |
|
|
246 |
3 |
List<DBRefEntry> lrfs = DBRefUtils.selectDbRefs(fromDna, |
247 |
|
seq.getDBRefs()); |
248 |
|
|
249 |
|
|
250 |
|
|
251 |
|
|
252 |
|
|
253 |
|
|
254 |
3 |
found = searchDatasetXrefs(fromDna, dss, lrfs, rseqs, cf); |
255 |
|
} |
256 |
48 |
if (xrfs == null && !found) |
257 |
|
{ |
258 |
|
|
259 |
|
|
260 |
|
|
261 |
|
|
262 |
1 |
continue; |
263 |
|
} |
264 |
47 |
List<DBRefEntry> sourceRefs = DBRefUtils.searchRefsForSource(xrfs, |
265 |
|
source); |
266 |
47 |
Iterator<DBRefEntry> refIterator = sourceRefs.iterator(); |
267 |
|
|
268 |
|
|
269 |
93 |
while (refIterator.hasNext()) |
270 |
|
{ |
271 |
46 |
DBRefEntry xref = refIterator.next(); |
272 |
46 |
found = false; |
273 |
|
|
274 |
|
|
275 |
46 |
if (xref.hasMap() && xref.getMap().getMap().isTripletMap()) |
276 |
|
{ |
277 |
24 |
SequenceI mappedTo = xref.getMap().getTo(); |
278 |
24 |
if (mappedTo != null) |
279 |
|
{ |
280 |
|
|
281 |
|
|
282 |
|
|
283 |
|
|
284 |
|
|
285 |
|
|
286 |
24 |
found = true; |
287 |
|
|
288 |
|
|
289 |
|
|
290 |
|
|
291 |
|
|
292 |
24 |
SequenceI matchInDataset = findInDataset(xref); |
293 |
24 |
if (matchInDataset != null && xref.getMap().getTo() != null |
294 |
|
&& matchInDataset != xref.getMap().getTo()) |
295 |
|
{ |
296 |
0 |
jalview.bin.Console.errPrintln( |
297 |
|
"Implementation problem (reopen JAL-2154): CrossRef.findInDataset seems to have recovered a different sequence than the one explicitly mapped for xref." |
298 |
|
+ "Found:" + matchInDataset + "\nExpected:" |
299 |
|
+ xref.getMap().getTo() + "\nFor xref:" |
300 |
|
+ xref); |
301 |
|
} |
302 |
|
|
303 |
24 |
if (matchInDataset != null) |
304 |
|
{ |
305 |
22 |
if (!rseqs.contains(matchInDataset)) |
306 |
|
{ |
307 |
0 |
rseqs.add(matchInDataset); |
308 |
|
} |
309 |
|
|
310 |
|
|
311 |
|
|
312 |
22 |
if (xref.getMap().getMap().isTripletMap() |
313 |
|
&& dataset.getMapping(seq, matchInDataset) == null |
314 |
|
&& cf.getMappingBetween(seq, matchInDataset) == null) |
315 |
|
{ |
316 |
|
|
317 |
|
|
318 |
11 |
if (fromDna) |
319 |
|
{ |
320 |
11 |
cf.addMap(dss, matchInDataset, xref.getMap().getMap(), |
321 |
|
xref.getMap().getMappedFromId()); |
322 |
|
} |
323 |
|
else |
324 |
|
{ |
325 |
0 |
cf.addMap(matchInDataset, dss, |
326 |
|
xref.getMap().getMap().getInverse(), |
327 |
|
xref.getMap().getMappedFromId()); |
328 |
|
} |
329 |
|
} |
330 |
|
|
331 |
22 |
refIterator.remove(); |
332 |
22 |
continue; |
333 |
|
} |
334 |
|
|
335 |
2 |
SequenceI rsq = new Sequence(mappedTo); |
336 |
2 |
rseqs.add(rsq); |
337 |
2 |
if (xref.getMap().getMap().isTripletMap()) |
338 |
|
{ |
339 |
|
|
340 |
2 |
if (fromDna) |
341 |
|
{ |
342 |
|
|
343 |
2 |
cf.addMap(dss, rsq, xref.getMap().getMap(), |
344 |
|
xref.getMap().getMappedFromId()); |
345 |
|
} |
346 |
|
else |
347 |
|
{ |
348 |
|
|
349 |
0 |
cf.addMap(rsq, dss, xref.getMap().getMap().getInverse(), |
350 |
|
xref.getMap().getMappedFromId()); |
351 |
|
} |
352 |
|
} |
353 |
|
} |
354 |
|
} |
355 |
|
|
356 |
24 |
if (!found) |
357 |
|
{ |
358 |
22 |
SequenceI matchedSeq = matcher.findIdMatch( |
359 |
|
xref.getSource() + "|" + xref.getAccessionId()); |
360 |
|
|
361 |
|
|
362 |
22 |
if (matchedSeq != null && matchedSeq.isProtein() == fromDna) |
363 |
|
{ |
364 |
0 |
if (constructMapping(seq, matchedSeq, xref, cf, fromDna)) |
365 |
|
{ |
366 |
0 |
found = true; |
367 |
|
} |
368 |
|
} |
369 |
|
} |
370 |
|
|
371 |
24 |
if (!found) |
372 |
|
{ |
373 |
|
|
374 |
|
|
375 |
22 |
found = searchDataset(fromDna, dss, xref, rseqs, cf, false, |
376 |
|
DBRefUtils.SEARCH_MODE_FULL); |
377 |
|
} |
378 |
24 |
if (found) |
379 |
|
{ |
380 |
24 |
refIterator.remove(); |
381 |
|
} |
382 |
|
} |
383 |
|
|
384 |
|
|
385 |
|
|
386 |
|
|
387 |
47 |
if (!sourceRefs.isEmpty()) |
388 |
|
{ |
389 |
0 |
retrieveCrossRef(sourceRefs, seq, xrfs, fromDna, cf); |
390 |
|
} |
391 |
|
} |
392 |
|
|
393 |
6 |
Alignment ral = null; |
394 |
6 |
if (rseqs.size() > 0) |
395 |
|
{ |
396 |
5 |
ral = new Alignment(rseqs.toArray(new SequenceI[rseqs.size()])); |
397 |
5 |
if (!cf.isEmpty()) |
398 |
|
{ |
399 |
2 |
dataset.addCodonFrame(cf); |
400 |
|
} |
401 |
|
} |
402 |
6 |
return ral; |
403 |
|
} |
404 |
|
|
|
|
| 0% |
Uncovered Elements: 39 (39) |
Complexity: 10 |
Complexity Density: 0.4 |
|
405 |
0 |
private void retrieveCrossRef(List<DBRefEntry> sourceRefs, SequenceI seq,... |
406 |
|
List<DBRefEntry> xrfs, boolean fromDna, AlignedCodonFrame cf) |
407 |
|
{ |
408 |
0 |
ASequenceFetcher sftch = SequenceFetcherFactory.getSequenceFetcher(); |
409 |
0 |
SequenceI[] retrieved = null; |
410 |
0 |
SequenceI dss = seq.getDatasetSequence() == null ? seq |
411 |
|
: seq.getDatasetSequence(); |
412 |
|
|
413 |
|
|
414 |
|
|
415 |
0 |
removeAlreadyRetrievedSeqs(sourceRefs, fromDna); |
416 |
0 |
if (sourceRefs.size() == 0) |
417 |
|
{ |
418 |
|
|
419 |
|
|
420 |
0 |
return; |
421 |
|
} |
422 |
0 |
try |
423 |
|
{ |
424 |
0 |
retrieved = sftch.getSequences(sourceRefs, !fromDna); |
425 |
|
} catch (Exception e) |
426 |
|
{ |
427 |
0 |
jalview.bin.Console.errPrintln( |
428 |
|
"Problem whilst retrieving cross references for Sequence : " |
429 |
|
+ seq.getName()); |
430 |
0 |
e.printStackTrace(); |
431 |
|
} |
432 |
|
|
433 |
0 |
if (retrieved != null) |
434 |
|
{ |
435 |
0 |
boolean addedXref = false; |
436 |
0 |
List<SequenceI> newDsSeqs = new ArrayList<>(), |
437 |
|
doNotAdd = new ArrayList<>(); |
438 |
|
|
439 |
0 |
for (SequenceI retrievedSequence : retrieved) |
440 |
|
{ |
441 |
|
|
442 |
|
|
443 |
0 |
SequenceI retrievedDss = retrievedSequence |
444 |
|
.getDatasetSequence() == null ? retrievedSequence |
445 |
|
: retrievedSequence.getDatasetSequence(); |
446 |
0 |
addedXref |= importCrossRefSeq(cf, newDsSeqs, doNotAdd, dss, |
447 |
|
retrievedDss); |
448 |
|
} |
449 |
|
|
450 |
|
|
451 |
|
|
452 |
|
|
453 |
|
|
454 |
0 |
if (!addedXref) |
455 |
|
{ |
456 |
|
|
457 |
|
|
458 |
0 |
updateDbrefMappings(seq, xrfs, retrieved, cf, fromDna); |
459 |
0 |
for (SequenceI retrievedSequence : retrieved) |
460 |
|
{ |
461 |
|
|
462 |
|
|
463 |
0 |
SequenceI retrievedDss = retrievedSequence |
464 |
|
.getDatasetSequence() == null ? retrievedSequence |
465 |
|
: retrievedSequence.getDatasetSequence(); |
466 |
0 |
addedXref |= importCrossRefSeq(cf, newDsSeqs, doNotAdd, dss, |
467 |
|
retrievedDss); |
468 |
|
} |
469 |
|
} |
470 |
0 |
for (SequenceI newToSeq : newDsSeqs) |
471 |
|
{ |
472 |
0 |
if (!doNotAdd.contains(newToSeq) |
473 |
|
&& dataset.findIndex(newToSeq) == -1) |
474 |
|
{ |
475 |
0 |
dataset.addSequence(newToSeq); |
476 |
0 |
matcher.add(newToSeq); |
477 |
|
} |
478 |
|
} |
479 |
|
} |
480 |
|
} |
481 |
|
|
482 |
|
|
483 |
|
|
484 |
|
|
485 |
|
|
486 |
|
@param |
487 |
|
|
488 |
|
@param |
489 |
|
|
490 |
|
|
|
|
| 0% |
Uncovered Elements: 26 (26) |
Complexity: 6 |
Complexity Density: 0.38 |
|
491 |
0 |
private void removeAlreadyRetrievedSeqs(List<DBRefEntry> sourceRefs,... |
492 |
|
boolean fromDna) |
493 |
|
{ |
494 |
0 |
List<DBRefEntry> dbrSourceSet = new ArrayList<>(sourceRefs); |
495 |
0 |
List<SequenceI> dsSeqs = dataset.getSequences(); |
496 |
0 |
for (int ids = 0, nds = dsSeqs.size(); ids < nds; ids++) |
497 |
|
{ |
498 |
0 |
SequenceI sq = dsSeqs.get(ids); |
499 |
0 |
boolean dupeFound = false; |
500 |
|
|
501 |
|
|
502 |
0 |
if (sq.isProtein() == fromDna) |
503 |
|
{ |
504 |
0 |
List<DBRefEntry> sqdbrefs = sq.getPrimaryDBRefs(); |
505 |
0 |
for (int idb = 0, ndb = sqdbrefs.size(); idb < ndb; idb++) |
506 |
|
{ |
507 |
0 |
DBRefEntry dbr = sqdbrefs.get(idb); |
508 |
0 |
List<DBRefEntry> searchrefs = DBRefUtils.searchRefs(dbrSourceSet, |
509 |
|
dbr, DBRefUtils.SEARCH_MODE_FULL); |
510 |
0 |
for (int isr = 0, nsr = searchrefs.size(); isr < nsr; isr++) |
511 |
|
{ |
512 |
0 |
sourceRefs.remove(searchrefs.get(isr)); |
513 |
0 |
dupeFound = true; |
514 |
|
} |
515 |
|
} |
516 |
|
} |
517 |
0 |
if (dupeFound) |
518 |
|
{ |
519 |
|
|
520 |
0 |
dbrSourceSet.clear(); |
521 |
0 |
dbrSourceSet.addAll(sourceRefs); |
522 |
|
} |
523 |
|
} |
524 |
|
} |
525 |
|
|
526 |
|
|
527 |
|
|
528 |
|
|
529 |
|
|
530 |
|
|
531 |
|
|
532 |
|
@param |
533 |
|
@param |
534 |
|
@param |
535 |
|
@return |
536 |
|
|
|
|
| 0% |
Uncovered Elements: 71 (71) |
Complexity: 16 |
Complexity Density: 0.34 |
|
537 |
0 |
private boolean importCrossRefSeq(AlignedCodonFrame cf,... |
538 |
|
List<SequenceI> newDsSeqs, List<SequenceI> doNotAdd, |
539 |
|
SequenceI sourceSequence, SequenceI retrievedSequence) |
540 |
|
{ |
541 |
|
|
542 |
|
|
543 |
|
|
544 |
|
|
545 |
0 |
boolean imported = false; |
546 |
0 |
List<DBRefEntry> dbr = retrievedSequence.getDBRefs(); |
547 |
0 |
if (dbr != null) |
548 |
|
{ |
549 |
0 |
for (int ib = 0, nb = dbr.size(); ib < nb; ib++) |
550 |
|
{ |
551 |
|
|
552 |
0 |
DBRefEntry dbref = dbr.get(ib); |
553 |
|
|
554 |
0 |
SequenceI matched = findInDataset(dbref); |
555 |
0 |
if (matched == sourceSequence) |
556 |
|
{ |
557 |
|
|
558 |
0 |
imported = true; |
559 |
|
} |
560 |
|
|
561 |
|
|
562 |
0 |
Mapping map = dbref.getMap(); |
563 |
0 |
if (map != null) |
564 |
|
{ |
565 |
0 |
SequenceI ms = map.getTo(); |
566 |
0 |
if (ms != null && map.getMap() != null) |
567 |
|
{ |
568 |
0 |
if (ms == sourceSequence) |
569 |
|
{ |
570 |
|
|
571 |
|
|
572 |
0 |
continue; |
573 |
|
} |
574 |
0 |
if (matched == null) |
575 |
|
{ |
576 |
|
|
577 |
|
|
578 |
|
|
579 |
0 |
newDsSeqs.add(ms); |
580 |
0 |
continue; |
581 |
|
} |
582 |
|
|
583 |
|
|
584 |
|
|
585 |
|
|
586 |
|
|
587 |
0 |
try |
588 |
|
{ |
589 |
|
|
590 |
|
|
591 |
|
|
592 |
|
|
593 |
|
|
594 |
|
|
595 |
|
|
596 |
|
|
597 |
|
|
598 |
0 |
int sf = map.getMap().getToLowest(); |
599 |
0 |
int st = map.getMap().getToHighest(); |
600 |
0 |
SequenceI mappedrg = ms.getSubSequence(sf, st); |
601 |
0 |
if (mappedrg.getLength() > 0 && ms.getSequenceAsString() |
602 |
|
.equals(matched.getSequenceAsString())) |
603 |
|
{ |
604 |
|
|
605 |
|
|
606 |
|
|
607 |
0 |
String msg = "Mapping updated from " + ms.getName() |
608 |
|
+ " to retrieved crossreference " |
609 |
|
+ matched.getName(); |
610 |
0 |
jalview.bin.Console.outPrintln(msg); |
611 |
|
|
612 |
0 |
List<DBRefEntry> toRefs = map.getTo().getDBRefs(); |
613 |
0 |
if (toRefs != null) |
614 |
|
{ |
615 |
|
|
616 |
|
|
617 |
|
|
618 |
0 |
for (DBRefEntry ref : toRefs) |
619 |
|
{ |
620 |
0 |
if (dbref.getSrcAccString() |
621 |
|
.equals(ref.getSrcAccString())) |
622 |
|
{ |
623 |
0 |
continue; |
624 |
|
} |
625 |
0 |
matched.addDBRef(ref); |
626 |
|
} |
627 |
|
} |
628 |
0 |
doNotAdd.add(map.getTo()); |
629 |
0 |
map.setTo(matched); |
630 |
|
|
631 |
|
|
632 |
|
|
633 |
|
|
634 |
|
|
635 |
0 |
setReverseMapping(matched, dbref, cf); |
636 |
|
|
637 |
|
|
638 |
|
|
639 |
|
|
640 |
|
|
641 |
|
|
642 |
0 |
List<SequenceFeature> sfs = ms.getFeatures() |
643 |
|
.getAllFeatures(); |
644 |
0 |
for (SequenceFeature feat : sfs) |
645 |
|
{ |
646 |
|
|
647 |
|
|
648 |
|
|
649 |
|
|
650 |
|
|
651 |
0 |
SequenceFeature newFeature = new SequenceFeature(feat) |
652 |
|
{ |
|
|
| 0% |
Uncovered Elements: 1 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
653 |
0 |
@Override... |
654 |
|
public boolean equals(Object o) |
655 |
|
{ |
656 |
0 |
return super.equals(o, true); |
657 |
|
} |
658 |
|
}; |
659 |
0 |
matched.addSequenceFeature(newFeature); |
660 |
|
} |
661 |
|
} |
662 |
0 |
cf.addMap(retrievedSequence, map.getTo(), map.getMap()); |
663 |
|
} catch (Exception e) |
664 |
|
{ |
665 |
0 |
jalview.bin.Console.errPrintln( |
666 |
|
"Exception when consolidating Mapped sequence set..."); |
667 |
0 |
e.printStackTrace(System.err); |
668 |
|
} |
669 |
|
} |
670 |
|
} |
671 |
|
} |
672 |
|
} |
673 |
0 |
if (imported) |
674 |
|
{ |
675 |
0 |
retrievedSequence.updatePDBIds(); |
676 |
0 |
rseqs.add(retrievedSequence); |
677 |
0 |
if (dataset.findIndex(retrievedSequence) == -1) |
678 |
|
{ |
679 |
0 |
dataset.addSequence(retrievedSequence); |
680 |
0 |
matcher.add(retrievedSequence); |
681 |
|
} |
682 |
|
} |
683 |
0 |
return imported; |
684 |
|
} |
685 |
|
|
686 |
|
|
687 |
|
|
688 |
|
|
689 |
|
|
690 |
|
|
691 |
|
|
692 |
|
@param |
693 |
|
|
694 |
|
@param |
695 |
|
@param |
696 |
|
|
|
|
| 0% |
Uncovered Elements: 20 (20) |
Complexity: 6 |
Complexity Density: 0.5 |
|
697 |
0 |
void setReverseMapping(SequenceI mapFrom, DBRefEntry dbref,... |
698 |
|
AlignedCodonFrame mappings) |
699 |
|
{ |
700 |
0 |
SequenceI mapTo = dbref.getMap().getTo(); |
701 |
0 |
if (mapTo == null) |
702 |
|
{ |
703 |
0 |
return; |
704 |
|
} |
705 |
0 |
List<DBRefEntry> dbrefs = mapTo.getDBRefs(); |
706 |
0 |
if (dbrefs == null) |
707 |
|
{ |
708 |
0 |
return; |
709 |
|
} |
710 |
0 |
for (DBRefEntry toRef : dbrefs) |
711 |
|
{ |
712 |
0 |
if (toRef.hasMap() && mapFrom == toRef.getMap().getTo()) |
713 |
|
{ |
714 |
|
|
715 |
|
|
716 |
|
|
717 |
0 |
if (toRef.getMap().getMap() == null) |
718 |
|
{ |
719 |
0 |
MapList inverse = dbref.getMap().getMap().getInverse(); |
720 |
0 |
toRef.getMap().setMap(inverse); |
721 |
0 |
mappings.addMap(mapTo, mapFrom, inverse); |
722 |
|
} |
723 |
|
} |
724 |
|
} |
725 |
|
} |
726 |
|
|
727 |
|
|
728 |
|
|
729 |
|
|
730 |
|
|
731 |
|
|
732 |
|
|
733 |
|
@param |
734 |
|
|
735 |
|
@return |
736 |
|
|
|
|
| 70% |
Uncovered Elements: 9 (30) |
Complexity: 13 |
Complexity Density: 0.72 |
|
737 |
24 |
SequenceI findInDataset(DBRefEntry xref)... |
738 |
|
{ |
739 |
24 |
if (xref == null || !xref.hasMap() || xref.getMap().getTo() == null) |
740 |
|
{ |
741 |
0 |
return null; |
742 |
|
} |
743 |
24 |
SequenceI mapsTo = xref.getMap().getTo(); |
744 |
24 |
String name = xref.getAccessionId(); |
745 |
24 |
String name2 = xref.getSource() + "|" + name; |
746 |
24 |
SequenceI dss = mapsTo.getDatasetSequence() == null ? mapsTo |
747 |
|
: mapsTo.getDatasetSequence(); |
748 |
|
|
749 |
24 |
if (dataset.findIndex(dss) > -1) |
750 |
|
{ |
751 |
22 |
return dss; |
752 |
|
} |
753 |
2 |
DBRefEntry template = new DBRefEntry(xref.getSource(), null, |
754 |
|
xref.getAccessionId()); |
755 |
|
|
756 |
|
|
757 |
|
|
758 |
2 |
SequenceI firstIdMatch = null; |
759 |
2 |
for (SequenceI seq : dataset.getSequences()) |
760 |
|
{ |
761 |
|
|
762 |
2 |
List<DBRefEntry> match = DBRefUtils.searchRefs(seq.getPrimaryDBRefs(), |
763 |
|
template, DBRefUtils.SEARCH_MODE_FULL); |
764 |
2 |
if (match != null && match.size() == 1 && sameSequence(seq, dss)) |
765 |
|
{ |
766 |
0 |
return seq; |
767 |
|
} |
768 |
|
|
769 |
|
|
770 |
|
|
771 |
|
|
772 |
|
|
773 |
2 |
if (firstIdMatch == null && (name.equals(seq.getName()) |
774 |
|
|| seq.getName().startsWith(name2))) |
775 |
|
{ |
776 |
0 |
if (sameSequence(seq, dss)) |
777 |
|
{ |
778 |
0 |
firstIdMatch = seq; |
779 |
|
} |
780 |
|
} |
781 |
|
} |
782 |
2 |
return firstIdMatch; |
783 |
|
} |
784 |
|
|
785 |
|
|
786 |
|
|
787 |
|
|
788 |
|
|
789 |
|
|
790 |
|
|
791 |
|
@param |
792 |
|
@param |
793 |
|
@return |
794 |
|
|
795 |
|
|
|
|
| 90.9% |
Uncovered Elements: 2 (22) |
Complexity: 9 |
Complexity Density: 0.75 |
|
796 |
7 |
static boolean sameSequence(SequenceI seq1, SequenceI seq2)... |
797 |
|
{ |
798 |
7 |
if (seq1 == seq2) |
799 |
|
{ |
800 |
1 |
return true; |
801 |
|
} |
802 |
6 |
if (seq1 == null || seq2 == null) |
803 |
|
{ |
804 |
2 |
return false; |
805 |
|
} |
806 |
|
|
807 |
4 |
if (seq1.getLength() != seq2.getLength()) |
808 |
|
{ |
809 |
2 |
return false; |
810 |
|
} |
811 |
2 |
int length = seq1.getLength(); |
812 |
14 |
for (int i = 0; i < length; i++) |
813 |
|
{ |
814 |
12 |
int diff = seq1.getCharAt(i) - seq2.getCharAt(i); |
815 |
|
|
816 |
|
|
817 |
|
|
818 |
12 |
if (diff != 0 && diff != 32 && diff != -32) |
819 |
|
{ |
820 |
0 |
return false; |
821 |
|
} |
822 |
|
} |
823 |
2 |
return true; |
824 |
|
} |
825 |
|
|
826 |
|
|
827 |
|
|
828 |
|
|
829 |
|
|
830 |
|
|
831 |
|
|
832 |
|
@param |
833 |
|
@param |
834 |
|
@param |
835 |
|
@param |
836 |
|
|
|
|
| 0% |
Uncovered Elements: 13 (13) |
Complexity: 3 |
Complexity Density: 0.33 |
|
837 |
0 |
void updateDbrefMappings(SequenceI mapFrom, List<DBRefEntry> xrefs,... |
838 |
|
SequenceI[] retrieved, AlignedCodonFrame acf, boolean fromDna) |
839 |
|
{ |
840 |
0 |
SequenceIdMatcher idMatcher = new SequenceIdMatcher(retrieved); |
841 |
0 |
for (DBRefEntry xref : xrefs) |
842 |
|
{ |
843 |
0 |
if (!xref.hasMap()) |
844 |
|
{ |
845 |
0 |
String targetSeqName = xref.getSource() + "|" |
846 |
|
+ xref.getAccessionId(); |
847 |
0 |
SequenceI[] matches = idMatcher.findAllIdMatches(targetSeqName); |
848 |
0 |
if (matches == null) |
849 |
|
{ |
850 |
0 |
return; |
851 |
|
} |
852 |
0 |
for (SequenceI seq : matches) |
853 |
|
{ |
854 |
0 |
constructMapping(mapFrom, seq, xref, acf, fromDna); |
855 |
|
} |
856 |
|
} |
857 |
|
} |
858 |
|
} |
859 |
|
|
860 |
|
|
861 |
|
|
862 |
|
|
863 |
|
|
864 |
|
|
865 |
|
|
866 |
|
|
867 |
|
|
868 |
|
|
869 |
|
|
870 |
|
|
871 |
|
|
872 |
|
|
873 |
|
|
874 |
|
@param |
875 |
|
@param |
876 |
|
@param |
877 |
|
@param |
878 |
|
@return |
879 |
|
|
|
|
| 0% |
Uncovered Elements: 42 (42) |
Complexity: 12 |
Complexity Density: 0.5 |
|
880 |
0 |
boolean constructMapping(SequenceI mapFrom, SequenceI mapTo,... |
881 |
|
DBRefEntry xref, AlignedCodonFrame mappings, boolean fromDna) |
882 |
|
{ |
883 |
0 |
MapList mapping = null; |
884 |
0 |
SequenceI dsmapFrom = mapFrom.getDatasetSequence() == null ? mapFrom |
885 |
|
: mapFrom.getDatasetSequence(); |
886 |
0 |
SequenceI dsmapTo = mapTo.getDatasetSequence() == null ? mapTo |
887 |
|
: mapTo.getDatasetSequence(); |
888 |
|
|
889 |
|
|
890 |
|
|
891 |
|
|
892 |
0 |
if (dsmapTo.getDBRefs() != null) |
893 |
|
{ |
894 |
0 |
for (DBRefEntry dbref : dsmapTo.getDBRefs()) |
895 |
|
{ |
896 |
0 |
String name = dbref.getSource() + "|" + dbref.getAccessionId(); |
897 |
0 |
if (dbref.hasMap() && dsmapFrom.getName().startsWith(name)) |
898 |
|
{ |
899 |
|
|
900 |
|
|
901 |
|
|
902 |
|
|
903 |
0 |
MapList reverse = dbref.getMap().getMap().getInverse(); |
904 |
0 |
xref.setMap(new Mapping(dsmapTo, reverse)); |
905 |
0 |
mappings.addMap(mapFrom, dsmapTo, reverse); |
906 |
0 |
return true; |
907 |
|
} |
908 |
|
} |
909 |
|
} |
910 |
|
|
911 |
0 |
if (fromDna) |
912 |
|
{ |
913 |
0 |
mapping = AlignmentUtils.mapCdnaToProtein(mapTo, mapFrom); |
914 |
|
} |
915 |
|
else |
916 |
|
{ |
917 |
0 |
mapping = AlignmentUtils.mapCdnaToProtein(mapFrom, mapTo); |
918 |
0 |
if (mapping != null) |
919 |
|
{ |
920 |
0 |
mapping = mapping.getInverse(); |
921 |
|
} |
922 |
|
} |
923 |
0 |
if (mapping == null) |
924 |
|
{ |
925 |
0 |
return false; |
926 |
|
} |
927 |
0 |
xref.setMap(new Mapping(mapTo, mapping)); |
928 |
|
|
929 |
|
|
930 |
|
|
931 |
|
|
932 |
0 |
if (mapFrom.getDatasetSequence() != null && false) |
933 |
|
|
934 |
|
{ |
935 |
|
|
936 |
|
|
937 |
|
|
938 |
|
|
939 |
|
|
940 |
|
|
941 |
|
|
942 |
|
} |
943 |
|
|
944 |
0 |
if (fromDna) |
945 |
|
{ |
946 |
|
|
947 |
0 |
mappings.addMap(mapFrom, mapTo, mapping); |
948 |
|
} |
949 |
|
else |
950 |
|
{ |
951 |
0 |
mappings.addMap(mapTo, mapFrom, mapping.getInverse()); |
952 |
|
} |
953 |
|
|
954 |
0 |
return true; |
955 |
|
} |
956 |
|
|
957 |
|
|
958 |
|
|
959 |
|
|
960 |
|
|
961 |
|
|
962 |
|
@param |
963 |
|
|
964 |
|
|
965 |
|
@param |
966 |
|
@param |
967 |
|
@param |
968 |
|
@return |
969 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (10) |
Complexity: 3 |
Complexity Density: 0.5 |
|
970 |
6687 |
private boolean searchDatasetXrefs(boolean fromDna, SequenceI sequenceI,... |
971 |
|
List<DBRefEntry> lrfs, List<SequenceI> foundSeqs, |
972 |
|
AlignedCodonFrame cf) |
973 |
|
{ |
974 |
6687 |
boolean found = false; |
975 |
6687 |
if (lrfs == null) |
976 |
|
{ |
977 |
4996 |
return false; |
978 |
|
} |
979 |
3599 |
for (int i = 0, n = lrfs.size(); i < n; i++) |
980 |
|
{ |
981 |
|
|
982 |
|
|
983 |
|
|
984 |
|
|
985 |
1908 |
found |= searchDataset(fromDna, sequenceI, lrfs.get(i), foundSeqs, cf, |
986 |
|
false, DBRefUtils.SEARCH_MODE_NO_MAP_NO_VERSION); |
987 |
|
} |
988 |
1691 |
return found; |
989 |
|
} |
990 |
|
|
991 |
|
|
992 |
|
|
993 |
|
|
994 |
|
|
995 |
|
@param |
996 |
|
|
997 |
|
|
998 |
|
@param |
999 |
|
|
1000 |
|
@param |
1001 |
|
|
1002 |
|
@param |
1003 |
|
|
1004 |
|
@param |
1005 |
|
|
1006 |
|
@param |
1007 |
|
|
1008 |
|
|
1009 |
|
|
1010 |
|
|
1011 |
|
|
1012 |
|
|
1013 |
|
|
1014 |
|
|
1015 |
|
|
1016 |
|
|
1017 |
|
@param |
1018 |
|
|
1019 |
|
@return |
1020 |
|
|
|
|
| 82% |
Uncovered Elements: 11 (61) |
Complexity: 17 |
Complexity Density: 0.49 |
|
1021 |
1933 |
boolean searchDataset(boolean fromDna, SequenceI fromSeq, DBRefEntry xrf,... |
1022 |
|
List<SequenceI> foundSeqs, AlignedCodonFrame mappings, |
1023 |
|
boolean direct, int mode) |
1024 |
|
{ |
1025 |
1933 |
boolean found = false; |
1026 |
1933 |
if (dataset == null) |
1027 |
|
{ |
1028 |
0 |
return false; |
1029 |
|
} |
1030 |
1933 |
if (dataset.getSequences() == null) |
1031 |
|
{ |
1032 |
0 |
jalview.bin.Console |
1033 |
|
.errPrintln("Empty dataset sequence set - NO VECTOR"); |
1034 |
0 |
return false; |
1035 |
|
} |
1036 |
1933 |
List<SequenceI> ds = dataset.getSequences(); |
1037 |
1933 |
synchronized (ds) |
1038 |
|
{ |
1039 |
1933 |
for (SequenceI nxt : ds) |
1040 |
|
{ |
1041 |
43145 |
if (nxt != null) |
1042 |
|
{ |
1043 |
43145 |
if (nxt.getDatasetSequence() != null) |
1044 |
|
{ |
1045 |
0 |
jalview.bin.Console.errPrintln( |
1046 |
|
"Implementation warning: CrossRef initialised with a dataset alignment with non-dataset sequences in it! (" |
1047 |
|
+ nxt.getDisplayId(true) + " has ds reference " |
1048 |
|
+ nxt.getDatasetSequence().getDisplayId(true) |
1049 |
|
+ ")"); |
1050 |
|
} |
1051 |
43145 |
if (nxt == fromSeq || nxt == fromSeq.getDatasetSequence()) |
1052 |
|
{ |
1053 |
1930 |
continue; |
1054 |
|
} |
1055 |
|
|
1056 |
|
|
1057 |
|
|
1058 |
|
|
1059 |
|
{ |
1060 |
41215 |
boolean isDna = !nxt.isProtein(); |
1061 |
41215 |
if (direct ? (isDna != fromDna) : (isDna == fromDna)) |
1062 |
|
{ |
1063 |
|
|
1064 |
36172 |
continue; |
1065 |
|
} |
1066 |
|
} |
1067 |
|
|
1068 |
|
|
1069 |
5043 |
List<DBRefEntry> poss = nxt.getDBRefs(); |
1070 |
5043 |
List<DBRefEntry> cands = null; |
1071 |
|
|
1072 |
|
|
1073 |
|
|
1074 |
|
|
1075 |
5043 |
cands = DBRefUtils.searchRefs(poss, xrf, mode); |
1076 |
|
|
1077 |
|
|
1078 |
|
|
1079 |
|
|
1080 |
|
|
1081 |
5043 |
if (!cands.isEmpty()) |
1082 |
|
{ |
1083 |
880 |
if (foundSeqs.contains(nxt)) |
1084 |
|
{ |
1085 |
582 |
continue; |
1086 |
|
} |
1087 |
298 |
found = true; |
1088 |
298 |
foundSeqs.add(nxt); |
1089 |
298 |
if (mappings != null && !direct) |
1090 |
|
{ |
1091 |
|
|
1092 |
|
|
1093 |
|
|
1094 |
|
|
1095 |
|
|
1096 |
26 |
for (DBRefEntry candidate : cands) |
1097 |
|
{ |
1098 |
26 |
Mapping mapping = candidate.getMap(); |
1099 |
26 |
if (mapping != null) |
1100 |
|
{ |
1101 |
1 |
MapList map = mapping.getMap(); |
1102 |
1 |
if (mapping.getTo() != null |
1103 |
|
&& map.getFromRatio() != map.getToRatio()) |
1104 |
|
{ |
1105 |
|
|
1106 |
|
|
1107 |
|
|
1108 |
1 |
if (map.getFromRatio() == 3) |
1109 |
|
{ |
1110 |
1 |
mappings.addMap(nxt, fromSeq, map); |
1111 |
|
} |
1112 |
|
else |
1113 |
|
{ |
1114 |
0 |
mappings.addMap(nxt, fromSeq, map.getInverse()); |
1115 |
|
} |
1116 |
|
} |
1117 |
|
} |
1118 |
|
} |
1119 |
|
} |
1120 |
|
} |
1121 |
|
} |
1122 |
|
} |
1123 |
|
} |
1124 |
1933 |
return found; |
1125 |
|
} |
1126 |
|
} |