1 |
|
|
2 |
|
|
3 |
|
|
4 |
|
|
5 |
|
|
6 |
|
|
7 |
|
|
8 |
|
|
9 |
|
|
10 |
|
|
11 |
|
|
12 |
|
|
13 |
|
|
14 |
|
|
15 |
|
|
16 |
|
|
17 |
|
|
18 |
|
|
19 |
|
|
20 |
|
|
21 |
|
package jalview.analysis; |
22 |
|
|
23 |
|
import jalview.datamodel.AlignedCodonFrame; |
24 |
|
import jalview.datamodel.Alignment; |
25 |
|
import jalview.datamodel.AlignmentI; |
26 |
|
import jalview.datamodel.DBRefEntry; |
27 |
|
import jalview.datamodel.DBRefSource; |
28 |
|
import jalview.datamodel.Mapping; |
29 |
|
import jalview.datamodel.Sequence; |
30 |
|
import jalview.datamodel.SequenceFeature; |
31 |
|
import jalview.datamodel.SequenceI; |
32 |
|
import jalview.util.DBRefUtils; |
33 |
|
import jalview.util.MapList; |
34 |
|
import jalview.ws.SequenceFetcherFactory; |
35 |
|
import jalview.ws.seqfetcher.ASequenceFetcher; |
36 |
|
|
37 |
|
import java.util.ArrayList; |
38 |
|
import java.util.Iterator; |
39 |
|
import java.util.List; |
40 |
|
|
41 |
|
|
42 |
|
|
43 |
|
|
44 |
|
@author |
45 |
|
|
46 |
|
|
|
|
| 42.3% |
Uncovered Elements: 275 (477) |
Complexity: 133 |
Complexity Density: 0.47 |
|
47 |
|
public class CrossRef |
48 |
|
{ |
49 |
|
|
50 |
|
|
51 |
|
|
52 |
|
|
53 |
|
|
54 |
|
private AlignmentI dataset; |
55 |
|
|
56 |
|
|
57 |
|
|
58 |
|
|
59 |
|
private SequenceI[] fromSeqs; |
60 |
|
|
61 |
|
|
62 |
|
|
63 |
|
|
64 |
|
SequenceIdMatcher matcher; |
65 |
|
|
66 |
|
|
67 |
|
|
68 |
|
|
69 |
|
List<SequenceI> rseqs; |
70 |
|
|
71 |
|
|
72 |
|
|
73 |
|
|
74 |
|
@param |
75 |
|
|
76 |
|
@param |
77 |
|
|
78 |
|
|
79 |
|
|
|
|
| 75% |
Uncovered Elements: 1 (4) |
Complexity: 2 |
Complexity Density: 1 |
|
80 |
387 |
public CrossRef(SequenceI[] seqs, AlignmentI ds)... |
81 |
|
{ |
82 |
387 |
fromSeqs = seqs; |
83 |
387 |
dataset = ds.getDataset() == null ? ds : ds.getDataset(); |
84 |
|
} |
85 |
|
|
86 |
|
|
87 |
|
|
88 |
|
|
89 |
|
|
90 |
|
|
91 |
|
|
92 |
|
|
93 |
|
|
94 |
|
|
95 |
|
@param |
96 |
|
|
97 |
|
|
98 |
|
@return |
99 |
|
|
|
|
| 92.3% |
Uncovered Elements: 1 (13) |
Complexity: 3 |
Complexity Density: 0.33 |
|
100 |
379 |
public List<String> findXrefSourcesForSequences(boolean dna)... |
101 |
|
{ |
102 |
379 |
List<String> sources = new ArrayList<>(); |
103 |
379 |
for (SequenceI seq : fromSeqs) |
104 |
|
{ |
105 |
4232 |
if (seq != null) |
106 |
|
{ |
107 |
4232 |
findXrefSourcesForSequence(seq, dna, sources); |
108 |
|
} |
109 |
|
} |
110 |
379 |
sources.remove(DBRefSource.EMBL); |
111 |
|
|
112 |
379 |
if (dna) |
113 |
|
{ |
114 |
16 |
sources.remove(DBRefSource.ENSEMBL); |
115 |
|
|
116 |
|
|
117 |
16 |
sources.remove(DBRefSource.ENSEMBLGENOMES); |
118 |
|
} |
119 |
|
|
120 |
379 |
return sources; |
121 |
|
} |
122 |
|
|
123 |
|
|
124 |
|
|
125 |
|
|
126 |
|
|
127 |
|
|
128 |
|
|
129 |
|
|
130 |
|
|
131 |
|
|
132 |
|
@param |
133 |
|
|
134 |
|
@param |
135 |
|
|
136 |
|
|
137 |
|
@param |
138 |
|
|
139 |
|
|
|
|
| 72.7% |
Uncovered Elements: 3 (11) |
Complexity: 2 |
Complexity Density: 0.22 |
|
140 |
4232 |
void findXrefSourcesForSequence(SequenceI seq, boolean fromDna,... |
141 |
|
List<String> sources) |
142 |
|
{ |
143 |
|
|
144 |
|
|
145 |
|
|
146 |
4232 |
DBRefEntry[] rfs = DBRefUtils.selectDbRefs(!fromDna, seq.getDBRefs()); |
147 |
4232 |
addXrefsToSources(rfs, sources); |
148 |
4232 |
if (dataset != null) |
149 |
|
{ |
150 |
|
|
151 |
|
|
152 |
|
|
153 |
4232 |
DBRefEntry[] lrfs = DBRefUtils.selectDbRefs(fromDna, seq.getDBRefs()); |
154 |
4232 |
List<SequenceI> foundSeqs = new ArrayList<>(); |
155 |
|
|
156 |
|
|
157 |
|
|
158 |
|
|
159 |
|
|
160 |
4232 |
searchDatasetXrefs(fromDna, seq, lrfs, foundSeqs, null); |
161 |
|
|
162 |
|
|
163 |
|
|
164 |
|
|
165 |
4232 |
for (SequenceI rs : foundSeqs) |
166 |
|
{ |
167 |
0 |
DBRefEntry[] xrs = DBRefUtils.selectDbRefs(!fromDna, |
168 |
|
rs.getDBRefs()); |
169 |
0 |
addXrefsToSources(xrs, sources); |
170 |
|
} |
171 |
|
} |
172 |
|
} |
173 |
|
|
174 |
|
|
175 |
|
|
176 |
|
|
177 |
|
|
178 |
|
@param |
179 |
|
@param |
180 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (9) |
Complexity: 3 |
Complexity Density: 0.6 |
|
181 |
4232 |
void addXrefsToSources(DBRefEntry[] xrefs, List<String> sources)... |
182 |
|
{ |
183 |
4232 |
if (xrefs != null) |
184 |
|
{ |
185 |
3 |
for (DBRefEntry ref : xrefs) |
186 |
|
{ |
187 |
|
|
188 |
|
|
189 |
|
|
190 |
9 |
String source = DBRefUtils.getCanonicalName(ref.getSource()); |
191 |
9 |
if (!sources.contains(source)) |
192 |
|
{ |
193 |
8 |
sources.add(source); |
194 |
|
} |
195 |
|
} |
196 |
|
} |
197 |
|
} |
198 |
|
|
199 |
|
|
200 |
|
|
201 |
|
|
202 |
|
|
203 |
|
|
204 |
|
|
205 |
|
|
206 |
|
|
207 |
|
|
208 |
|
|
209 |
|
|
210 |
|
|
211 |
|
|
212 |
|
|
213 |
|
|
214 |
|
|
215 |
|
@param |
216 |
|
@return |
217 |
|
|
|
|
| 64% |
Uncovered Elements: 36 (100) |
Complexity: 31 |
Complexity Density: 0.53 |
|
218 |
7 |
public Alignment findXrefSequences(String source, boolean fromDna)... |
219 |
|
{ |
220 |
|
|
221 |
7 |
rseqs = new ArrayList<>(); |
222 |
7 |
AlignedCodonFrame cf = new AlignedCodonFrame(); |
223 |
7 |
matcher = new SequenceIdMatcher(dataset.getSequences()); |
224 |
|
|
225 |
7 |
for (SequenceI seq : fromSeqs) |
226 |
|
{ |
227 |
10 |
SequenceI dss = seq; |
228 |
11 |
while (dss.getDatasetSequence() != null) |
229 |
|
{ |
230 |
1 |
dss = dss.getDatasetSequence(); |
231 |
|
} |
232 |
10 |
boolean found = false; |
233 |
10 |
DBRefEntry[] xrfs = DBRefUtils.selectDbRefs(!fromDna, |
234 |
|
dss.getDBRefs()); |
235 |
|
|
236 |
|
|
237 |
|
|
238 |
10 |
if ((xrfs == null || xrfs.length == 0) && dataset != null) |
239 |
|
{ |
240 |
|
|
241 |
|
|
242 |
|
|
243 |
|
|
244 |
3 |
DBRefEntry[] lrfs = DBRefUtils.selectDbRefs(fromDna, |
245 |
|
seq.getDBRefs()); |
246 |
|
|
247 |
|
|
248 |
|
|
249 |
|
|
250 |
|
|
251 |
|
|
252 |
3 |
found = searchDatasetXrefs(fromDna, dss, lrfs, rseqs, cf); |
253 |
|
} |
254 |
10 |
if (xrfs == null && !found) |
255 |
|
{ |
256 |
|
|
257 |
|
|
258 |
|
|
259 |
|
|
260 |
1 |
continue; |
261 |
|
} |
262 |
9 |
List<DBRefEntry> sourceRefs = DBRefUtils.searchRefsForSource(xrfs, |
263 |
|
source); |
264 |
9 |
Iterator<DBRefEntry> refIterator = sourceRefs.iterator(); |
265 |
|
|
266 |
|
|
267 |
24 |
while (refIterator.hasNext()) |
268 |
|
{ |
269 |
15 |
DBRefEntry xref = refIterator.next(); |
270 |
15 |
found = false; |
271 |
|
|
272 |
|
|
273 |
2 |
if (xref.hasMap() && xref.getMap().getMap().isTripletMap()) |
274 |
|
{ |
275 |
2 |
SequenceI mappedTo = xref.getMap().getTo(); |
276 |
2 |
if (mappedTo != null) |
277 |
|
{ |
278 |
|
|
279 |
|
|
280 |
|
|
281 |
|
|
282 |
|
|
283 |
|
|
284 |
2 |
found = true; |
285 |
|
|
286 |
|
|
287 |
|
|
288 |
|
|
289 |
|
|
290 |
2 |
SequenceI matchInDataset = findInDataset(xref); |
291 |
2 |
if (matchInDataset != null && xref.getMap().getTo() != null |
292 |
|
&& matchInDataset != xref.getMap().getTo()) |
293 |
|
{ |
294 |
0 |
System.err.println( |
295 |
|
"Implementation problem (reopen JAL-2154): CrossRef.findInDataset seems to have recovered a different sequence than the one explicitly mapped for xref." |
296 |
|
+ "Found:" + matchInDataset + "\nExpected:" |
297 |
|
+ xref.getMap().getTo() + "\nFor xref:" |
298 |
|
+ xref); |
299 |
|
} |
300 |
|
|
301 |
2 |
if (matchInDataset != null) |
302 |
|
{ |
303 |
0 |
if (!rseqs.contains(matchInDataset)) |
304 |
|
{ |
305 |
0 |
rseqs.add(matchInDataset); |
306 |
|
} |
307 |
|
|
308 |
|
|
309 |
|
|
310 |
0 |
if (xref.getMap().getMap().isTripletMap() |
311 |
|
&& dataset.getMapping(seq, matchInDataset) == null |
312 |
|
&& cf.getMappingBetween(seq, matchInDataset) == null) |
313 |
|
{ |
314 |
|
|
315 |
|
|
316 |
0 |
if (fromDna) |
317 |
|
{ |
318 |
0 |
cf.addMap(dss, matchInDataset, xref.getMap().getMap(), |
319 |
|
xref.getMap().getMappedFromId()); |
320 |
|
} |
321 |
|
else |
322 |
|
{ |
323 |
0 |
cf.addMap(matchInDataset, dss, |
324 |
|
xref.getMap().getMap().getInverse(), |
325 |
|
xref.getMap().getMappedFromId()); |
326 |
|
} |
327 |
|
} |
328 |
|
|
329 |
0 |
refIterator.remove(); |
330 |
0 |
continue; |
331 |
|
} |
332 |
|
|
333 |
2 |
SequenceI rsq = new Sequence(mappedTo); |
334 |
2 |
rseqs.add(rsq); |
335 |
2 |
if (xref.getMap().getMap().isTripletMap()) |
336 |
|
{ |
337 |
|
|
338 |
2 |
if (fromDna) |
339 |
|
{ |
340 |
|
|
341 |
2 |
cf.addMap(dss, rsq, xref.getMap().getMap(), |
342 |
|
xref.getMap().getMappedFromId()); |
343 |
|
} |
344 |
|
else |
345 |
|
{ |
346 |
|
|
347 |
0 |
cf.addMap(rsq, dss, xref.getMap().getMap().getInverse(), |
348 |
|
xref.getMap().getMappedFromId()); |
349 |
|
} |
350 |
|
} |
351 |
|
} |
352 |
|
} |
353 |
|
|
354 |
2 |
if (!found) |
355 |
|
{ |
356 |
0 |
SequenceI matchedSeq = matcher.findIdMatch( |
357 |
|
xref.getSource() + "|" + xref.getAccessionId()); |
358 |
|
|
359 |
|
|
360 |
0 |
if (matchedSeq != null && matchedSeq.isProtein() == fromDna) |
361 |
|
{ |
362 |
0 |
if (constructMapping(seq, matchedSeq, xref, cf, fromDna)) |
363 |
|
{ |
364 |
0 |
found = true; |
365 |
|
} |
366 |
|
} |
367 |
|
} |
368 |
|
|
369 |
4 |
if (!found) |
370 |
|
{ |
371 |
|
|
372 |
|
|
373 |
0 |
found = searchDataset(fromDna, dss, xref, rseqs, cf, false); |
374 |
|
} |
375 |
4 |
if (found) |
376 |
|
{ |
377 |
4 |
refIterator.remove(); |
378 |
|
} |
379 |
|
} |
380 |
|
|
381 |
|
|
382 |
|
|
383 |
|
|
384 |
3 |
if (!sourceRefs.isEmpty()) |
385 |
|
{ |
386 |
0 |
retrieveCrossRef(sourceRefs, seq, xrfs, fromDna, cf); |
387 |
|
} |
388 |
|
} |
389 |
|
|
390 |
7 |
Alignment ral = null; |
391 |
7 |
if (rseqs.size() > 0) |
392 |
|
{ |
393 |
4 |
ral = new Alignment(rseqs.toArray(new SequenceI[rseqs.size()])); |
394 |
4 |
if (!cf.isEmpty()) |
395 |
|
{ |
396 |
2 |
dataset.addCodonFrame(cf); |
397 |
|
} |
398 |
|
} |
399 |
7 |
return ral; |
400 |
|
} |
401 |
|
|
|
|
| 0% |
Uncovered Elements: 39 (39) |
Complexity: 10 |
Complexity Density: 0.4 |
|
402 |
0 |
private void retrieveCrossRef(List<DBRefEntry> sourceRefs, SequenceI seq,... |
403 |
|
DBRefEntry[] xrfs, boolean fromDna, AlignedCodonFrame cf) |
404 |
|
{ |
405 |
0 |
ASequenceFetcher sftch = SequenceFetcherFactory.getSequenceFetcher(); |
406 |
0 |
SequenceI[] retrieved = null; |
407 |
0 |
SequenceI dss = seq.getDatasetSequence() == null ? seq |
408 |
|
: seq.getDatasetSequence(); |
409 |
|
|
410 |
|
|
411 |
|
|
412 |
0 |
removeAlreadyRetrievedSeqs(sourceRefs, fromDna); |
413 |
0 |
if (sourceRefs.size() == 0) |
414 |
|
{ |
415 |
|
|
416 |
|
|
417 |
0 |
return; |
418 |
|
} |
419 |
0 |
try |
420 |
|
{ |
421 |
0 |
retrieved = sftch.getSequences(sourceRefs, !fromDna); |
422 |
|
} catch (Exception e) |
423 |
|
{ |
424 |
0 |
System.err.println( |
425 |
|
"Problem whilst retrieving cross references for Sequence : " |
426 |
|
+ seq.getName()); |
427 |
0 |
e.printStackTrace(); |
428 |
|
} |
429 |
|
|
430 |
0 |
if (retrieved != null) |
431 |
|
{ |
432 |
0 |
boolean addedXref = false; |
433 |
0 |
List<SequenceI> newDsSeqs = new ArrayList<>(), |
434 |
|
doNotAdd = new ArrayList<>(); |
435 |
|
|
436 |
0 |
for (SequenceI retrievedSequence : retrieved) |
437 |
|
{ |
438 |
|
|
439 |
|
|
440 |
0 |
SequenceI retrievedDss = retrievedSequence |
441 |
|
.getDatasetSequence() == null ? retrievedSequence |
442 |
|
: retrievedSequence.getDatasetSequence(); |
443 |
0 |
addedXref |= importCrossRefSeq(cf, newDsSeqs, doNotAdd, dss, |
444 |
|
retrievedDss); |
445 |
|
} |
446 |
0 |
if (!addedXref) |
447 |
|
{ |
448 |
|
|
449 |
|
|
450 |
0 |
updateDbrefMappings(seq, xrfs, retrieved, cf, fromDna); |
451 |
0 |
for (SequenceI retrievedSequence : retrieved) |
452 |
|
{ |
453 |
|
|
454 |
|
|
455 |
0 |
SequenceI retrievedDss = retrievedSequence |
456 |
|
.getDatasetSequence() == null ? retrievedSequence |
457 |
|
: retrievedSequence.getDatasetSequence(); |
458 |
0 |
addedXref |= importCrossRefSeq(cf, newDsSeqs, doNotAdd, dss, |
459 |
|
retrievedDss); |
460 |
|
} |
461 |
|
} |
462 |
0 |
for (SequenceI newToSeq : newDsSeqs) |
463 |
|
{ |
464 |
0 |
if (!doNotAdd.contains(newToSeq) |
465 |
|
&& dataset.findIndex(newToSeq) == -1) |
466 |
|
{ |
467 |
0 |
dataset.addSequence(newToSeq); |
468 |
0 |
matcher.add(newToSeq); |
469 |
|
} |
470 |
|
} |
471 |
|
} |
472 |
|
} |
473 |
|
|
474 |
|
|
475 |
|
|
476 |
|
|
477 |
|
|
478 |
|
@param |
479 |
|
|
480 |
|
@param |
481 |
|
|
482 |
|
|
|
|
| 0% |
Uncovered Elements: 14 (14) |
Complexity: 3 |
Complexity Density: 0.3 |
|
483 |
0 |
private void removeAlreadyRetrievedSeqs(List<DBRefEntry> sourceRefs,... |
484 |
|
boolean fromDna) |
485 |
|
{ |
486 |
0 |
DBRefEntry[] dbrSourceSet = sourceRefs.toArray(new DBRefEntry[0]); |
487 |
0 |
for (SequenceI sq : dataset.getSequences()) |
488 |
|
{ |
489 |
0 |
boolean dupeFound = false; |
490 |
|
|
491 |
|
|
492 |
0 |
if (sq.isProtein() == fromDna) |
493 |
|
{ |
494 |
0 |
for (DBRefEntry dbr : sq.getPrimaryDBRefs()) |
495 |
|
{ |
496 |
0 |
for (DBRefEntry found : DBRefUtils.searchRefs(dbrSourceSet, dbr)) |
497 |
|
{ |
498 |
0 |
sourceRefs.remove(found); |
499 |
0 |
dupeFound = true; |
500 |
|
} |
501 |
|
} |
502 |
|
} |
503 |
0 |
if (dupeFound) |
504 |
|
{ |
505 |
|
|
506 |
0 |
dbrSourceSet = sourceRefs.toArray(new DBRefEntry[0]); |
507 |
|
} |
508 |
|
} |
509 |
|
} |
510 |
|
|
511 |
|
|
512 |
|
|
513 |
|
|
514 |
|
|
515 |
|
@param |
516 |
|
@param |
517 |
|
@param |
518 |
|
@return |
519 |
|
|
|
|
| 0% |
Uncovered Elements: 68 (68) |
Complexity: 15 |
Complexity Density: 0.33 |
|
520 |
0 |
private boolean importCrossRefSeq(AlignedCodonFrame cf,... |
521 |
|
List<SequenceI> newDsSeqs, List<SequenceI> doNotAdd, |
522 |
|
SequenceI sourceSequence, SequenceI retrievedSequence) |
523 |
|
{ |
524 |
|
|
525 |
|
|
526 |
|
|
527 |
|
|
528 |
0 |
boolean imported = false; |
529 |
0 |
DBRefEntry[] dbr = retrievedSequence.getDBRefs(); |
530 |
0 |
if (dbr != null) |
531 |
|
{ |
532 |
0 |
for (DBRefEntry dbref : dbr) |
533 |
|
{ |
534 |
0 |
SequenceI matched = findInDataset(dbref); |
535 |
0 |
if (matched == sourceSequence) |
536 |
|
{ |
537 |
|
|
538 |
0 |
imported = true; |
539 |
|
} |
540 |
|
|
541 |
|
|
542 |
0 |
Mapping map = dbref.getMap(); |
543 |
0 |
if (map != null) |
544 |
|
{ |
545 |
0 |
if (map.getTo() != null && map.getMap() != null) |
546 |
|
{ |
547 |
0 |
if (map.getTo() == sourceSequence) |
548 |
|
{ |
549 |
|
|
550 |
|
|
551 |
0 |
continue; |
552 |
|
} |
553 |
0 |
if (matched == null) |
554 |
|
{ |
555 |
|
|
556 |
|
|
557 |
|
|
558 |
0 |
newDsSeqs.add(map.getTo()); |
559 |
0 |
continue; |
560 |
|
} |
561 |
|
|
562 |
|
|
563 |
|
|
564 |
|
|
565 |
|
|
566 |
0 |
try |
567 |
|
{ |
568 |
|
|
569 |
|
|
570 |
0 |
SequenceI ms = map.getTo(); |
571 |
|
|
572 |
|
|
573 |
|
|
574 |
|
|
575 |
|
|
576 |
|
|
577 |
|
|
578 |
0 |
int sf = map.getMap().getToLowest(); |
579 |
0 |
int st = map.getMap().getToHighest(); |
580 |
0 |
SequenceI mappedrg = ms.getSubSequence(sf, st); |
581 |
0 |
if (mappedrg.getLength() > 0 && ms.getSequenceAsString() |
582 |
|
.equals(matched.getSequenceAsString())) |
583 |
|
{ |
584 |
|
|
585 |
|
|
586 |
|
|
587 |
0 |
String msg = "Mapping updated from " + ms.getName() |
588 |
|
+ " to retrieved crossreference " |
589 |
|
+ matched.getName(); |
590 |
0 |
System.out.println(msg); |
591 |
|
|
592 |
0 |
DBRefEntry[] toRefs = map.getTo().getDBRefs(); |
593 |
0 |
if (toRefs != null) |
594 |
|
{ |
595 |
|
|
596 |
|
|
597 |
|
|
598 |
0 |
for (DBRefEntry ref : toRefs) |
599 |
|
{ |
600 |
0 |
if (dbref.getSrcAccString() |
601 |
|
.equals(ref.getSrcAccString())) |
602 |
|
{ |
603 |
0 |
continue; |
604 |
|
} |
605 |
0 |
matched.addDBRef(ref); |
606 |
|
} |
607 |
|
} |
608 |
0 |
doNotAdd.add(map.getTo()); |
609 |
0 |
map.setTo(matched); |
610 |
|
|
611 |
|
|
612 |
|
|
613 |
|
|
614 |
|
|
615 |
0 |
setReverseMapping(matched, dbref, cf); |
616 |
|
|
617 |
|
|
618 |
|
|
619 |
|
|
620 |
|
|
621 |
|
|
622 |
0 |
List<SequenceFeature> sfs = ms.getFeatures() |
623 |
|
.getAllFeatures(); |
624 |
0 |
for (SequenceFeature feat : sfs) |
625 |
|
{ |
626 |
|
|
627 |
|
|
628 |
|
|
629 |
|
|
630 |
|
|
631 |
0 |
SequenceFeature newFeature = new SequenceFeature(feat) |
632 |
|
{ |
|
|
| 0% |
Uncovered Elements: 1 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
633 |
0 |
@Override... |
634 |
|
public boolean equals(Object o) |
635 |
|
{ |
636 |
0 |
return super.equals(o, true); |
637 |
|
} |
638 |
|
}; |
639 |
0 |
matched.addSequenceFeature(newFeature); |
640 |
|
} |
641 |
|
} |
642 |
0 |
cf.addMap(retrievedSequence, map.getTo(), map.getMap()); |
643 |
|
} catch (Exception e) |
644 |
|
{ |
645 |
0 |
System.err.println( |
646 |
|
"Exception when consolidating Mapped sequence set..."); |
647 |
0 |
e.printStackTrace(System.err); |
648 |
|
} |
649 |
|
} |
650 |
|
} |
651 |
|
} |
652 |
|
} |
653 |
0 |
if (imported) |
654 |
|
{ |
655 |
0 |
retrievedSequence.updatePDBIds(); |
656 |
0 |
rseqs.add(retrievedSequence); |
657 |
0 |
if (dataset.findIndex(retrievedSequence) == -1) |
658 |
|
{ |
659 |
0 |
dataset.addSequence(retrievedSequence); |
660 |
0 |
matcher.add(retrievedSequence); |
661 |
|
} |
662 |
|
} |
663 |
0 |
return imported; |
664 |
|
} |
665 |
|
|
666 |
|
|
667 |
|
|
668 |
|
|
669 |
|
|
670 |
|
|
671 |
|
|
672 |
|
@param |
673 |
|
|
674 |
|
@param |
675 |
|
@param |
676 |
|
|
|
|
| 0% |
Uncovered Elements: 20 (20) |
Complexity: 6 |
Complexity Density: 0.5 |
|
677 |
0 |
void setReverseMapping(SequenceI mapFrom, DBRefEntry dbref,... |
678 |
|
AlignedCodonFrame mappings) |
679 |
|
{ |
680 |
0 |
SequenceI mapTo = dbref.getMap().getTo(); |
681 |
0 |
if (mapTo == null) |
682 |
|
{ |
683 |
0 |
return; |
684 |
|
} |
685 |
0 |
DBRefEntry[] dbrefs = mapTo.getDBRefs(); |
686 |
0 |
if (dbrefs == null) |
687 |
|
{ |
688 |
0 |
return; |
689 |
|
} |
690 |
0 |
for (DBRefEntry toRef : dbrefs) |
691 |
|
{ |
692 |
0 |
if (toRef.hasMap() && mapFrom == toRef.getMap().getTo()) |
693 |
|
{ |
694 |
|
|
695 |
|
|
696 |
|
|
697 |
0 |
if (toRef.getMap().getMap() == null) |
698 |
|
{ |
699 |
0 |
MapList inverse = dbref.getMap().getMap().getInverse(); |
700 |
0 |
toRef.getMap().setMap(inverse); |
701 |
0 |
mappings.addMap(mapTo, mapFrom, inverse); |
702 |
|
} |
703 |
|
} |
704 |
|
} |
705 |
|
} |
706 |
|
|
707 |
|
|
708 |
|
|
709 |
|
|
710 |
|
|
711 |
|
|
712 |
|
@param |
713 |
|
|
714 |
|
@return |
715 |
|
|
|
|
| 60% |
Uncovered Elements: 12 (30) |
Complexity: 13 |
Complexity Density: 0.72 |
|
716 |
26 |
SequenceI findInDataset(DBRefEntry xref)... |
717 |
|
{ |
718 |
10 |
if (xref == null || !xref.hasMap() || xref.getMap().getTo() == null) |
719 |
|
{ |
720 |
0 |
return null; |
721 |
|
} |
722 |
10 |
SequenceI mapsTo = xref.getMap().getTo(); |
723 |
10 |
String name = xref.getAccessionId(); |
724 |
10 |
String name2 = xref.getSource() + "|" + name; |
725 |
2 |
SequenceI dss = mapsTo.getDatasetSequence() == null ? mapsTo |
726 |
|
: mapsTo.getDatasetSequence(); |
727 |
|
|
728 |
10 |
if (dataset.findIndex(dss) > -1) |
729 |
|
{ |
730 |
0 |
return dss; |
731 |
|
} |
732 |
10 |
DBRefEntry template = new DBRefEntry(xref.getSource(), null, |
733 |
|
xref.getAccessionId()); |
734 |
|
|
735 |
|
|
736 |
|
|
737 |
10 |
SequenceI firstIdMatch = null; |
738 |
10 |
for (SequenceI seq : dataset.getSequences()) |
739 |
|
{ |
740 |
|
|
741 |
32 |
List<DBRefEntry> match = DBRefUtils.searchRefs( |
742 |
|
seq.getPrimaryDBRefs().toArray(new DBRefEntry[0]), template); |
743 |
32 |
if (match != null && match.size() == 1 && sameSequence(seq, dss)) |
744 |
|
{ |
745 |
0 |
return seq; |
746 |
|
} |
747 |
|
|
748 |
|
|
749 |
|
|
750 |
|
|
751 |
|
|
752 |
26 |
if (firstIdMatch == null && (name.equals(seq.getName()) |
753 |
|
|| seq.getName().startsWith(name2))) |
754 |
|
{ |
755 |
0 |
if (sameSequence(seq, dss)) |
756 |
|
{ |
757 |
0 |
firstIdMatch = seq; |
758 |
|
} |
759 |
|
} |
760 |
|
} |
761 |
10 |
return firstIdMatch; |
762 |
|
} |
763 |
|
|
764 |
|
|
765 |
|
|
766 |
|
|
767 |
|
|
768 |
|
|
769 |
|
|
770 |
|
@param |
771 |
|
@param |
772 |
|
@return |
773 |
|
|
774 |
|
|
|
|
| 90.9% |
Uncovered Elements: 2 (22) |
Complexity: 9 |
Complexity Density: 0.75 |
|
775 |
13 |
static boolean sameSequence(SequenceI seq1, SequenceI seq2)... |
776 |
|
{ |
777 |
13 |
if (seq1 == seq2) |
778 |
|
{ |
779 |
1 |
return true; |
780 |
|
} |
781 |
12 |
if (seq1 == null || seq2 == null) |
782 |
|
{ |
783 |
2 |
return false; |
784 |
|
} |
785 |
|
|
786 |
10 |
if (seq1.getLength() != seq2.getLength()) |
787 |
|
{ |
788 |
2 |
return false; |
789 |
|
} |
790 |
8 |
int length = seq1.getLength(); |
791 |
44 |
for (int i = 0; i < length; i++) |
792 |
|
{ |
793 |
36 |
int diff = seq1.getCharAt(i) - seq2.getCharAt(i); |
794 |
|
|
795 |
|
|
796 |
|
|
797 |
36 |
if (diff != 0 && diff != 32 && diff != -32) |
798 |
|
{ |
799 |
0 |
return false; |
800 |
|
} |
801 |
|
} |
802 |
8 |
return true; |
803 |
|
} |
804 |
|
|
805 |
|
|
806 |
|
|
807 |
|
|
808 |
|
|
809 |
|
|
810 |
|
@param |
811 |
|
@param |
812 |
|
@param |
813 |
|
@param |
814 |
|
|
|
|
| 0% |
Uncovered Elements: 13 (13) |
Complexity: 3 |
Complexity Density: 0.33 |
|
815 |
0 |
void updateDbrefMappings(SequenceI mapFrom, DBRefEntry[] xrefs,... |
816 |
|
SequenceI[] retrieved, AlignedCodonFrame acf, boolean fromDna) |
817 |
|
{ |
818 |
0 |
SequenceIdMatcher idMatcher = new SequenceIdMatcher(retrieved); |
819 |
0 |
for (DBRefEntry xref : xrefs) |
820 |
|
{ |
821 |
0 |
if (!xref.hasMap()) |
822 |
|
{ |
823 |
0 |
String targetSeqName = xref.getSource() + "|" |
824 |
|
+ xref.getAccessionId(); |
825 |
0 |
SequenceI[] matches = idMatcher.findAllIdMatches(targetSeqName); |
826 |
0 |
if (matches == null) |
827 |
|
{ |
828 |
0 |
return; |
829 |
|
} |
830 |
0 |
for (SequenceI seq : matches) |
831 |
|
{ |
832 |
0 |
constructMapping(mapFrom, seq, xref, acf, fromDna); |
833 |
|
} |
834 |
|
} |
835 |
|
} |
836 |
|
} |
837 |
|
|
838 |
|
|
839 |
|
|
840 |
|
|
841 |
|
|
842 |
|
|
843 |
|
|
844 |
|
|
845 |
|
|
846 |
|
|
847 |
|
|
848 |
|
|
849 |
|
|
850 |
|
|
851 |
|
|
852 |
|
@param |
853 |
|
@param |
854 |
|
@param |
855 |
|
@param |
856 |
|
@return |
857 |
|
|
|
|
| 0% |
Uncovered Elements: 43 (43) |
Complexity: 12 |
Complexity Density: 0.48 |
|
858 |
0 |
boolean constructMapping(SequenceI mapFrom, SequenceI mapTo,... |
859 |
|
DBRefEntry xref, AlignedCodonFrame mappings, boolean fromDna) |
860 |
|
{ |
861 |
0 |
MapList mapping = null; |
862 |
0 |
SequenceI dsmapFrom = mapFrom.getDatasetSequence() == null ? mapFrom |
863 |
|
: mapFrom.getDatasetSequence(); |
864 |
0 |
SequenceI dsmapTo = mapTo.getDatasetSequence() == null ? mapTo |
865 |
|
: mapTo.getDatasetSequence(); |
866 |
|
|
867 |
|
|
868 |
|
|
869 |
|
|
870 |
0 |
if (dsmapTo.getDBRefs() != null) |
871 |
|
{ |
872 |
0 |
for (DBRefEntry dbref : dsmapTo.getDBRefs()) |
873 |
|
{ |
874 |
0 |
String name = dbref.getSource() + "|" + dbref.getAccessionId(); |
875 |
0 |
if (dbref.hasMap() && dsmapFrom.getName().startsWith(name)) |
876 |
|
{ |
877 |
|
|
878 |
|
|
879 |
|
|
880 |
|
|
881 |
0 |
MapList reverse = dbref.getMap().getMap().getInverse(); |
882 |
0 |
xref.setMap(new Mapping(dsmapTo, reverse)); |
883 |
0 |
mappings.addMap(mapFrom, dsmapTo, reverse); |
884 |
0 |
return true; |
885 |
|
} |
886 |
|
} |
887 |
|
} |
888 |
|
|
889 |
0 |
if (fromDna) |
890 |
|
{ |
891 |
0 |
mapping = AlignmentUtils.mapCdnaToProtein(mapTo, mapFrom); |
892 |
|
} |
893 |
|
else |
894 |
|
{ |
895 |
0 |
mapping = AlignmentUtils.mapCdnaToProtein(mapFrom, mapTo); |
896 |
0 |
if (mapping != null) |
897 |
|
{ |
898 |
0 |
mapping = mapping.getInverse(); |
899 |
|
} |
900 |
|
} |
901 |
0 |
if (mapping == null) |
902 |
|
{ |
903 |
0 |
return false; |
904 |
|
} |
905 |
0 |
xref.setMap(new Mapping(mapTo, mapping)); |
906 |
|
|
907 |
|
|
908 |
|
|
909 |
|
|
910 |
0 |
if (mapFrom.getDatasetSequence() != null && false) |
911 |
|
|
912 |
|
{ |
913 |
|
|
914 |
|
|
915 |
|
|
916 |
|
|
917 |
|
|
918 |
|
|
919 |
|
|
920 |
|
} |
921 |
|
|
922 |
0 |
if (fromDna) |
923 |
|
{ |
924 |
0 |
AlignmentUtils.computeProteinFeatures(mapFrom, mapTo, mapping); |
925 |
0 |
mappings.addMap(mapFrom, mapTo, mapping); |
926 |
|
} |
927 |
|
else |
928 |
|
{ |
929 |
0 |
mappings.addMap(mapTo, mapFrom, mapping.getInverse()); |
930 |
|
} |
931 |
|
|
932 |
0 |
return true; |
933 |
|
} |
934 |
|
|
935 |
|
|
936 |
|
|
937 |
|
|
938 |
|
|
939 |
|
|
940 |
|
@param |
941 |
|
|
942 |
|
|
943 |
|
@param |
944 |
|
@param |
945 |
|
@param |
946 |
|
@return |
947 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (13) |
Complexity: 3 |
Complexity Density: 0.33 |
|
948 |
4235 |
private boolean searchDatasetXrefs(boolean fromDna, SequenceI sequenceI,... |
949 |
|
DBRefEntry[] lrfs, List<SequenceI> foundSeqs, |
950 |
|
AlignedCodonFrame cf) |
951 |
|
{ |
952 |
4235 |
boolean found = false; |
953 |
4235 |
if (lrfs == null) |
954 |
|
{ |
955 |
1735 |
return false; |
956 |
|
} |
957 |
5001 |
for (int i = 0; i < lrfs.length; i++) |
958 |
|
{ |
959 |
2501 |
DBRefEntry xref = new DBRefEntry(lrfs[i]); |
960 |
|
|
961 |
2501 |
xref.setVersion(null); |
962 |
2501 |
xref.setMap(null); |
963 |
2501 |
found |= searchDataset(fromDna, sequenceI, xref, foundSeqs, cf, |
964 |
|
false); |
965 |
|
} |
966 |
2500 |
return found; |
967 |
|
} |
968 |
|
|
969 |
|
|
970 |
|
|
971 |
|
|
972 |
|
|
973 |
|
@param |
974 |
|
|
975 |
|
|
976 |
|
@param |
977 |
|
|
978 |
|
@param |
979 |
|
|
980 |
|
@param |
981 |
|
|
982 |
|
@param |
983 |
|
|
984 |
|
@param |
985 |
|
|
986 |
|
|
987 |
|
|
988 |
|
|
989 |
|
|
990 |
|
|
991 |
|
|
992 |
|
|
993 |
|
|
994 |
|
|
995 |
|
@return |
996 |
|
|
|
|
| 75.4% |
Uncovered Elements: 15 (61) |
Complexity: 17 |
Complexity Density: 0.49 |
|
997 |
2515 |
boolean searchDataset(boolean fromDna, SequenceI fromSeq, DBRefEntry xrf,... |
998 |
|
List<SequenceI> foundSeqs, AlignedCodonFrame mappings, |
999 |
|
boolean direct) |
1000 |
|
{ |
1001 |
2515 |
boolean found = false; |
1002 |
2515 |
if (dataset == null) |
1003 |
|
{ |
1004 |
0 |
return false; |
1005 |
|
} |
1006 |
2515 |
if (dataset.getSequences() == null) |
1007 |
|
{ |
1008 |
0 |
System.err.println("Empty dataset sequence set - NO VECTOR"); |
1009 |
0 |
return false; |
1010 |
|
} |
1011 |
2515 |
List<SequenceI> ds = dataset.getSequences(); |
1012 |
2515 |
synchronized (ds) |
1013 |
|
{ |
1014 |
2515 |
for (SequenceI nxt : ds) |
1015 |
|
{ |
1016 |
45449 |
if (nxt != null) |
1017 |
|
{ |
1018 |
45449 |
if (nxt.getDatasetSequence() != null) |
1019 |
|
{ |
1020 |
0 |
System.err.println( |
1021 |
|
"Implementation warning: CrossRef initialised with a dataset alignment with non-dataset sequences in it! (" |
1022 |
|
+ nxt.getDisplayId(true) + " has ds reference " |
1023 |
|
+ nxt.getDatasetSequence().getDisplayId(true) |
1024 |
|
+ ")"); |
1025 |
|
} |
1026 |
45449 |
if (nxt == fromSeq || nxt == fromSeq.getDatasetSequence()) |
1027 |
|
{ |
1028 |
2512 |
continue; |
1029 |
|
} |
1030 |
|
|
1031 |
|
|
1032 |
|
|
1033 |
|
|
1034 |
|
{ |
1035 |
42937 |
boolean isDna = !nxt.isProtein(); |
1036 |
42937 |
if (direct ? (isDna != fromDna) : (isDna == fromDna)) |
1037 |
|
{ |
1038 |
|
|
1039 |
42930 |
continue; |
1040 |
|
} |
1041 |
|
} |
1042 |
|
|
1043 |
|
|
1044 |
7 |
DBRefEntry[] poss = nxt.getDBRefs(); |
1045 |
7 |
List<DBRefEntry> cands = null; |
1046 |
|
|
1047 |
|
|
1048 |
|
|
1049 |
|
|
1050 |
7 |
cands = DBRefUtils.searchRefs(poss, xrf); |
1051 |
|
|
1052 |
|
|
1053 |
|
|
1054 |
|
|
1055 |
|
|
1056 |
4 |
if (!cands.isEmpty()) |
1057 |
|
{ |
1058 |
4 |
if (foundSeqs.contains(nxt)) |
1059 |
|
{ |
1060 |
0 |
continue; |
1061 |
|
} |
1062 |
4 |
found = true; |
1063 |
4 |
foundSeqs.add(nxt); |
1064 |
4 |
if (mappings != null && !direct) |
1065 |
|
{ |
1066 |
|
|
1067 |
|
|
1068 |
|
|
1069 |
|
|
1070 |
|
|
1071 |
4 |
for (DBRefEntry candidate : cands) |
1072 |
|
{ |
1073 |
4 |
Mapping mapping = candidate.getMap(); |
1074 |
4 |
if (mapping != null) |
1075 |
|
{ |
1076 |
1 |
MapList map = mapping.getMap(); |
1077 |
1 |
if (mapping.getTo() != null |
1078 |
|
&& map.getFromRatio() != map.getToRatio()) |
1079 |
|
{ |
1080 |
|
|
1081 |
|
|
1082 |
|
|
1083 |
1 |
if (map.getFromRatio() == 3) |
1084 |
|
{ |
1085 |
1 |
mappings.addMap(nxt, fromSeq, map); |
1086 |
|
} |
1087 |
|
else |
1088 |
|
{ |
1089 |
0 |
mappings.addMap(nxt, fromSeq, map.getInverse()); |
1090 |
|
} |
1091 |
|
} |
1092 |
|
} |
1093 |
|
} |
1094 |
|
} |
1095 |
|
} |
1096 |
|
} |
1097 |
|
} |
1098 |
|
} |
1099 |
2515 |
return found; |
1100 |
|
} |
1101 |
|
} |