1 |
|
|
2 |
|
|
3 |
|
|
4 |
|
|
5 |
|
|
6 |
|
|
7 |
|
|
8 |
|
|
9 |
|
|
10 |
|
|
11 |
|
|
12 |
|
|
13 |
|
|
14 |
|
|
15 |
|
|
16 |
|
|
17 |
|
|
18 |
|
|
19 |
|
|
20 |
|
|
21 |
|
package jalview.analysis; |
22 |
|
|
23 |
|
import jalview.datamodel.AlignedCodonFrame; |
24 |
|
import jalview.datamodel.Alignment; |
25 |
|
import jalview.datamodel.AlignmentI; |
26 |
|
import jalview.datamodel.DBRefEntry; |
27 |
|
import jalview.datamodel.DBRefSource; |
28 |
|
import jalview.datamodel.Mapping; |
29 |
|
import jalview.datamodel.Sequence; |
30 |
|
import jalview.datamodel.SequenceFeature; |
31 |
|
import jalview.datamodel.SequenceI; |
32 |
|
import jalview.util.DBRefUtils; |
33 |
|
import jalview.util.MapList; |
34 |
|
import jalview.ws.SequenceFetcherFactory; |
35 |
|
import jalview.ws.seqfetcher.ASequenceFetcher; |
36 |
|
|
37 |
|
import java.util.ArrayList; |
38 |
|
import java.util.Iterator; |
39 |
|
import java.util.List; |
40 |
|
|
41 |
|
|
42 |
|
|
43 |
|
|
44 |
|
@author |
45 |
|
|
46 |
|
|
|
|
| 46.3% |
Uncovered Elements: 262 (488) |
Complexity: 137 |
Complexity Density: 0.48 |
|
47 |
|
public class CrossRef |
48 |
|
{ |
49 |
|
|
50 |
|
|
51 |
|
|
52 |
|
|
53 |
|
|
54 |
|
private AlignmentI dataset; |
55 |
|
|
56 |
|
|
57 |
|
|
58 |
|
|
59 |
|
private SequenceI[] fromSeqs; |
60 |
|
|
61 |
|
|
62 |
|
|
63 |
|
|
64 |
|
SequenceIdMatcher matcher; |
65 |
|
|
66 |
|
|
67 |
|
|
68 |
|
|
69 |
|
List<SequenceI> rseqs; |
70 |
|
|
71 |
|
|
72 |
|
|
73 |
|
|
74 |
|
@param |
75 |
|
|
76 |
|
@param |
77 |
|
|
78 |
|
|
79 |
|
|
|
|
| 75% |
Uncovered Elements: 1 (4) |
Complexity: 2 |
Complexity Density: 1 |
|
80 |
432 |
public CrossRef(SequenceI[] seqs, AlignmentI ds)... |
81 |
|
{ |
82 |
432 |
fromSeqs = seqs; |
83 |
432 |
dataset = ds.getDataset() == null ? ds : ds.getDataset(); |
84 |
|
} |
85 |
|
|
86 |
|
|
87 |
|
|
88 |
|
|
89 |
|
|
90 |
|
|
91 |
|
|
92 |
|
|
93 |
|
|
94 |
|
|
95 |
|
@param |
96 |
|
|
97 |
|
|
98 |
|
@return |
99 |
|
|
|
|
| 92.3% |
Uncovered Elements: 1 (13) |
Complexity: 3 |
Complexity Density: 0.33 |
|
100 |
425 |
public List<String> findXrefSourcesForSequences(boolean dna)... |
101 |
|
{ |
102 |
425 |
List<String> sources = new ArrayList<>(); |
103 |
425 |
for (SequenceI seq : fromSeqs) |
104 |
|
{ |
105 |
4686 |
if (seq != null) |
106 |
|
{ |
107 |
4686 |
findXrefSourcesForSequence(seq, dna, sources); |
108 |
|
} |
109 |
|
} |
110 |
425 |
sources.remove(DBRefSource.EMBL); |
111 |
|
|
112 |
425 |
if (dna) |
113 |
|
{ |
114 |
29 |
sources.remove(DBRefSource.ENSEMBL); |
115 |
|
|
116 |
|
|
117 |
29 |
sources.remove(DBRefSource.ENSEMBLGENOMES); |
118 |
|
} |
119 |
|
|
120 |
425 |
return sources; |
121 |
|
} |
122 |
|
|
123 |
|
|
124 |
|
|
125 |
|
|
126 |
|
|
127 |
|
|
128 |
|
|
129 |
|
|
130 |
|
|
131 |
|
|
132 |
|
@param |
133 |
|
|
134 |
|
@param |
135 |
|
|
136 |
|
|
137 |
|
@param |
138 |
|
|
139 |
|
|
|
|
| 90.9% |
Uncovered Elements: 1 (11) |
Complexity: 2 |
Complexity Density: 0.22 |
|
140 |
4686 |
void findXrefSourcesForSequence(SequenceI seq, boolean fromDna,... |
141 |
|
List<String> sources) |
142 |
|
{ |
143 |
|
|
144 |
|
|
145 |
|
|
146 |
4686 |
List<DBRefEntry> rfs = DBRefUtils.selectDbRefs(!fromDna, seq.getDBRefs()); |
147 |
4686 |
addXrefsToSources(rfs, sources); |
148 |
4686 |
if (dataset != null) |
149 |
|
{ |
150 |
|
|
151 |
|
|
152 |
|
|
153 |
4686 |
List<DBRefEntry> lrfs = DBRefUtils.selectDbRefs(fromDna, seq.getDBRefs()); |
154 |
4686 |
List<SequenceI> foundSeqs = new ArrayList<>(); |
155 |
|
|
156 |
|
|
157 |
|
|
158 |
|
|
159 |
|
|
160 |
4686 |
searchDatasetXrefs(fromDna, seq, lrfs, foundSeqs, null); |
161 |
|
|
162 |
|
|
163 |
|
|
164 |
|
|
165 |
4686 |
for (SequenceI rs : foundSeqs) |
166 |
|
{ |
167 |
394 |
List<DBRefEntry> xrs = DBRefUtils.selectDbRefs(!fromDna, |
168 |
|
rs.getDBRefs()); |
169 |
394 |
addXrefsToSources(xrs, sources); |
170 |
|
} |
171 |
|
} |
172 |
|
} |
173 |
|
|
174 |
|
|
175 |
|
|
176 |
|
|
177 |
|
|
178 |
|
@param |
179 |
|
@param |
180 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (9) |
Complexity: 3 |
Complexity Density: 0.6 |
|
181 |
5080 |
void addXrefsToSources(List<DBRefEntry> xrefs, List<String> sources)... |
182 |
|
{ |
183 |
5080 |
if (xrefs != null) |
184 |
|
{ |
185 |
486 |
for (DBRefEntry ref : xrefs) |
186 |
|
{ |
187 |
|
|
188 |
|
|
189 |
|
|
190 |
1812 |
String source = DBRefUtils.getCanonicalName(ref.getSource()); |
191 |
1812 |
if (!sources.contains(source)) |
192 |
|
{ |
193 |
21 |
sources.add(source); |
194 |
|
} |
195 |
|
} |
196 |
|
} |
197 |
|
} |
198 |
|
|
199 |
|
|
200 |
|
|
201 |
|
|
202 |
|
|
203 |
|
|
204 |
|
|
205 |
|
|
206 |
|
|
207 |
|
|
208 |
|
|
209 |
|
|
210 |
|
|
211 |
|
|
212 |
|
|
213 |
|
|
214 |
|
|
215 |
|
@param |
216 |
|
@return |
217 |
|
|
|
|
| 82% |
Uncovered Elements: 18 (100) |
Complexity: 31 |
Complexity Density: 0.53 |
|
218 |
6 |
public Alignment findXrefSequences(String source, boolean fromDna)... |
219 |
|
{ |
220 |
|
|
221 |
6 |
rseqs = new ArrayList<>(); |
222 |
6 |
AlignedCodonFrame cf = new AlignedCodonFrame(); |
223 |
6 |
matcher = new SequenceIdMatcher(dataset.getSequences()); |
224 |
|
|
225 |
6 |
for (SequenceI seq : fromSeqs) |
226 |
|
{ |
227 |
48 |
SequenceI dss = seq; |
228 |
93 |
while (dss.getDatasetSequence() != null) |
229 |
|
{ |
230 |
45 |
dss = dss.getDatasetSequence(); |
231 |
|
} |
232 |
48 |
boolean found = false; |
233 |
48 |
List<DBRefEntry> xrfs = DBRefUtils.selectDbRefs(!fromDna, |
234 |
|
dss.getDBRefs()); |
235 |
|
|
236 |
|
|
237 |
|
|
238 |
48 |
if ((xrfs == null || xrfs.size() == 0) && dataset != null) |
239 |
|
{ |
240 |
|
|
241 |
|
|
242 |
|
|
243 |
|
|
244 |
3 |
List<DBRefEntry> lrfs = DBRefUtils.selectDbRefs(fromDna, |
245 |
|
seq.getDBRefs()); |
246 |
|
|
247 |
|
|
248 |
|
|
249 |
|
|
250 |
|
|
251 |
|
|
252 |
3 |
found = searchDatasetXrefs(fromDna, dss, lrfs, rseqs, cf); |
253 |
|
} |
254 |
48 |
if (xrfs == null && !found) |
255 |
|
{ |
256 |
|
|
257 |
|
|
258 |
|
|
259 |
|
|
260 |
1 |
continue; |
261 |
|
} |
262 |
47 |
List<DBRefEntry> sourceRefs = DBRefUtils.searchRefsForSource(xrfs, |
263 |
|
source); |
264 |
47 |
Iterator<DBRefEntry> refIterator = sourceRefs.iterator(); |
265 |
|
|
266 |
|
|
267 |
93 |
while (refIterator.hasNext()) |
268 |
|
{ |
269 |
46 |
DBRefEntry xref = refIterator.next(); |
270 |
46 |
found = false; |
271 |
|
|
272 |
|
|
273 |
46 |
if (xref.hasMap() && xref.getMap().getMap().isTripletMap()) |
274 |
|
{ |
275 |
24 |
SequenceI mappedTo = xref.getMap().getTo(); |
276 |
24 |
if (mappedTo != null) |
277 |
|
{ |
278 |
|
|
279 |
|
|
280 |
|
|
281 |
|
|
282 |
|
|
283 |
|
|
284 |
24 |
found = true; |
285 |
|
|
286 |
|
|
287 |
|
|
288 |
|
|
289 |
|
|
290 |
24 |
SequenceI matchInDataset = findInDataset(xref); |
291 |
24 |
if (matchInDataset != null && xref.getMap().getTo() != null |
292 |
|
&& matchInDataset != xref.getMap().getTo()) |
293 |
|
{ |
294 |
0 |
System.err.println( |
295 |
|
"Implementation problem (reopen JAL-2154): CrossRef.findInDataset seems to have recovered a different sequence than the one explicitly mapped for xref." |
296 |
|
+ "Found:" + matchInDataset + "\nExpected:" |
297 |
|
+ xref.getMap().getTo() + "\nFor xref:" |
298 |
|
+ xref); |
299 |
|
} |
300 |
|
|
301 |
24 |
if (matchInDataset != null) |
302 |
|
{ |
303 |
22 |
if (!rseqs.contains(matchInDataset)) |
304 |
|
{ |
305 |
0 |
rseqs.add(matchInDataset); |
306 |
|
} |
307 |
|
|
308 |
|
|
309 |
|
|
310 |
22 |
if (xref.getMap().getMap().isTripletMap() |
311 |
|
&& dataset.getMapping(seq, matchInDataset) == null |
312 |
|
&& cf.getMappingBetween(seq, matchInDataset) == null) |
313 |
|
{ |
314 |
|
|
315 |
|
|
316 |
11 |
if (fromDna) |
317 |
|
{ |
318 |
11 |
cf.addMap(dss, matchInDataset, xref.getMap().getMap(), |
319 |
|
xref.getMap().getMappedFromId()); |
320 |
|
} |
321 |
|
else |
322 |
|
{ |
323 |
0 |
cf.addMap(matchInDataset, dss, |
324 |
|
xref.getMap().getMap().getInverse(), |
325 |
|
xref.getMap().getMappedFromId()); |
326 |
|
} |
327 |
|
} |
328 |
|
|
329 |
22 |
refIterator.remove(); |
330 |
22 |
continue; |
331 |
|
} |
332 |
|
|
333 |
2 |
SequenceI rsq = new Sequence(mappedTo); |
334 |
2 |
rseqs.add(rsq); |
335 |
2 |
if (xref.getMap().getMap().isTripletMap()) |
336 |
|
{ |
337 |
|
|
338 |
2 |
if (fromDna) |
339 |
|
{ |
340 |
|
|
341 |
2 |
cf.addMap(dss, rsq, xref.getMap().getMap(), |
342 |
|
xref.getMap().getMappedFromId()); |
343 |
|
} |
344 |
|
else |
345 |
|
{ |
346 |
|
|
347 |
0 |
cf.addMap(rsq, dss, xref.getMap().getMap().getInverse(), |
348 |
|
xref.getMap().getMappedFromId()); |
349 |
|
} |
350 |
|
} |
351 |
|
} |
352 |
|
} |
353 |
|
|
354 |
24 |
if (!found) |
355 |
|
{ |
356 |
22 |
SequenceI matchedSeq = matcher.findIdMatch( |
357 |
|
xref.getSource() + "|" + xref.getAccessionId()); |
358 |
|
|
359 |
|
|
360 |
22 |
if (matchedSeq != null && matchedSeq.isProtein() == fromDna) |
361 |
|
{ |
362 |
0 |
if (constructMapping(seq, matchedSeq, xref, cf, fromDna)) |
363 |
|
{ |
364 |
0 |
found = true; |
365 |
|
} |
366 |
|
} |
367 |
|
} |
368 |
|
|
369 |
24 |
if (!found) |
370 |
|
{ |
371 |
|
|
372 |
|
|
373 |
22 |
found = searchDataset(fromDna, dss, xref, rseqs, cf, false, DBRefUtils.SEARCH_MODE_FULL); |
374 |
|
} |
375 |
24 |
if (found) |
376 |
|
{ |
377 |
24 |
refIterator.remove(); |
378 |
|
} |
379 |
|
} |
380 |
|
|
381 |
|
|
382 |
|
|
383 |
|
|
384 |
47 |
if (!sourceRefs.isEmpty()) |
385 |
|
{ |
386 |
0 |
retrieveCrossRef(sourceRefs, seq, xrfs, fromDna, cf); |
387 |
|
} |
388 |
|
} |
389 |
|
|
390 |
6 |
Alignment ral = null; |
391 |
6 |
if (rseqs.size() > 0) |
392 |
|
{ |
393 |
5 |
ral = new Alignment(rseqs.toArray(new SequenceI[rseqs.size()])); |
394 |
5 |
if (!cf.isEmpty()) |
395 |
|
{ |
396 |
2 |
dataset.addCodonFrame(cf); |
397 |
|
} |
398 |
|
} |
399 |
6 |
return ral; |
400 |
|
} |
401 |
|
|
|
|
| 0% |
Uncovered Elements: 39 (39) |
Complexity: 10 |
Complexity Density: 0.4 |
|
402 |
0 |
private void retrieveCrossRef(List<DBRefEntry> sourceRefs, SequenceI seq,... |
403 |
|
List<DBRefEntry> xrfs, boolean fromDna, AlignedCodonFrame cf) |
404 |
|
{ |
405 |
0 |
ASequenceFetcher sftch = SequenceFetcherFactory.getSequenceFetcher(); |
406 |
0 |
SequenceI[] retrieved = null; |
407 |
0 |
SequenceI dss = seq.getDatasetSequence() == null ? seq |
408 |
|
: seq.getDatasetSequence(); |
409 |
|
|
410 |
|
|
411 |
|
|
412 |
0 |
removeAlreadyRetrievedSeqs(sourceRefs, fromDna); |
413 |
0 |
if (sourceRefs.size() == 0) |
414 |
|
{ |
415 |
|
|
416 |
|
|
417 |
0 |
return; |
418 |
|
} |
419 |
0 |
try |
420 |
|
{ |
421 |
0 |
retrieved = sftch.getSequences(sourceRefs, !fromDna); |
422 |
|
} catch (Exception e) |
423 |
|
{ |
424 |
0 |
System.err.println( |
425 |
|
"Problem whilst retrieving cross references for Sequence : " |
426 |
|
+ seq.getName()); |
427 |
0 |
e.printStackTrace(); |
428 |
|
} |
429 |
|
|
430 |
0 |
if (retrieved != null) |
431 |
|
{ |
432 |
0 |
boolean addedXref = false; |
433 |
0 |
List<SequenceI> newDsSeqs = new ArrayList<>(), |
434 |
|
doNotAdd = new ArrayList<>(); |
435 |
|
|
436 |
0 |
for (SequenceI retrievedSequence : retrieved) |
437 |
|
{ |
438 |
|
|
439 |
|
|
440 |
0 |
SequenceI retrievedDss = retrievedSequence |
441 |
|
.getDatasetSequence() == null ? retrievedSequence |
442 |
|
: retrievedSequence.getDatasetSequence(); |
443 |
0 |
addedXref |= importCrossRefSeq(cf, newDsSeqs, doNotAdd, dss, |
444 |
|
retrievedDss); |
445 |
|
} |
446 |
0 |
if (!addedXref) |
447 |
|
{ |
448 |
|
|
449 |
|
|
450 |
0 |
updateDbrefMappings(seq, xrfs, retrieved, cf, fromDna); |
451 |
0 |
for (SequenceI retrievedSequence : retrieved) |
452 |
|
{ |
453 |
|
|
454 |
|
|
455 |
0 |
SequenceI retrievedDss = retrievedSequence |
456 |
|
.getDatasetSequence() == null ? retrievedSequence |
457 |
|
: retrievedSequence.getDatasetSequence(); |
458 |
0 |
addedXref |= importCrossRefSeq(cf, newDsSeqs, doNotAdd, dss, |
459 |
|
retrievedDss); |
460 |
|
} |
461 |
|
} |
462 |
0 |
for (SequenceI newToSeq : newDsSeqs) |
463 |
|
{ |
464 |
0 |
if (!doNotAdd.contains(newToSeq) |
465 |
|
&& dataset.findIndex(newToSeq) == -1) |
466 |
|
{ |
467 |
0 |
dataset.addSequence(newToSeq); |
468 |
0 |
matcher.add(newToSeq); |
469 |
|
} |
470 |
|
} |
471 |
|
} |
472 |
|
} |
473 |
|
|
474 |
|
|
475 |
|
|
476 |
|
|
477 |
|
|
478 |
|
@param |
479 |
|
|
480 |
|
@param |
481 |
|
|
482 |
|
|
|
|
| 0% |
Uncovered Elements: 26 (26) |
Complexity: 6 |
Complexity Density: 0.38 |
|
483 |
0 |
private void removeAlreadyRetrievedSeqs(List<DBRefEntry> sourceRefs,... |
484 |
|
boolean fromDna) |
485 |
|
{ |
486 |
0 |
List<DBRefEntry> dbrSourceSet = new ArrayList<>(sourceRefs); |
487 |
0 |
List<SequenceI> dsSeqs = dataset.getSequences(); |
488 |
0 |
for (int ids = 0, nds = dsSeqs.size(); ids < nds; ids++) |
489 |
|
{ |
490 |
0 |
SequenceI sq = dsSeqs.get(ids); |
491 |
0 |
boolean dupeFound = false; |
492 |
|
|
493 |
|
|
494 |
0 |
if (sq.isProtein() == fromDna) |
495 |
|
{ |
496 |
0 |
List<DBRefEntry> sqdbrefs = sq.getPrimaryDBRefs(); |
497 |
0 |
for (int idb = 0, ndb = sqdbrefs.size(); idb < ndb; idb++) |
498 |
|
{ |
499 |
0 |
DBRefEntry dbr = sqdbrefs.get(idb); |
500 |
0 |
List<DBRefEntry> searchrefs = DBRefUtils.searchRefs(dbrSourceSet, dbr, DBRefUtils.SEARCH_MODE_FULL); |
501 |
0 |
for (int isr = 0, nsr = searchrefs.size(); isr < nsr; isr++) |
502 |
|
{ |
503 |
0 |
sourceRefs.remove(searchrefs.get(isr)); |
504 |
0 |
dupeFound = true; |
505 |
|
} |
506 |
|
} |
507 |
|
} |
508 |
0 |
if (dupeFound) |
509 |
|
{ |
510 |
|
|
511 |
0 |
dbrSourceSet.clear(); |
512 |
0 |
dbrSourceSet.addAll(sourceRefs); |
513 |
|
} |
514 |
|
} |
515 |
|
} |
516 |
|
|
517 |
|
|
518 |
|
|
519 |
|
|
520 |
|
|
521 |
|
@param |
522 |
|
@param |
523 |
|
@param |
524 |
|
@return |
525 |
|
|
|
|
| 0% |
Uncovered Elements: 71 (71) |
Complexity: 16 |
Complexity Density: 0.34 |
|
526 |
0 |
private boolean importCrossRefSeq(AlignedCodonFrame cf,... |
527 |
|
List<SequenceI> newDsSeqs, List<SequenceI> doNotAdd, |
528 |
|
SequenceI sourceSequence, SequenceI retrievedSequence) |
529 |
|
{ |
530 |
|
|
531 |
|
|
532 |
|
|
533 |
|
|
534 |
0 |
boolean imported = false; |
535 |
0 |
List<DBRefEntry> dbr = retrievedSequence.getDBRefs(); |
536 |
0 |
if (dbr != null) |
537 |
|
{ |
538 |
0 |
for (int ib = 0, nb = dbr.size(); ib < nb; ib++) |
539 |
|
{ |
540 |
|
|
541 |
0 |
DBRefEntry dbref = dbr.get(ib); |
542 |
0 |
SequenceI matched = findInDataset(dbref); |
543 |
0 |
if (matched == sourceSequence) |
544 |
|
{ |
545 |
|
|
546 |
0 |
imported = true; |
547 |
|
} |
548 |
|
|
549 |
|
|
550 |
0 |
Mapping map = dbref.getMap(); |
551 |
0 |
if (map != null) |
552 |
|
{ |
553 |
0 |
SequenceI ms = map.getTo(); |
554 |
0 |
if (ms != null && map.getMap() != null) |
555 |
|
{ |
556 |
0 |
if (ms == sourceSequence) |
557 |
|
{ |
558 |
|
|
559 |
|
|
560 |
0 |
continue; |
561 |
|
} |
562 |
0 |
if (matched == null) |
563 |
|
{ |
564 |
|
|
565 |
|
|
566 |
|
|
567 |
0 |
newDsSeqs.add(ms); |
568 |
0 |
continue; |
569 |
|
} |
570 |
|
|
571 |
|
|
572 |
|
|
573 |
|
|
574 |
|
|
575 |
0 |
try |
576 |
|
{ |
577 |
|
|
578 |
|
|
579 |
|
|
580 |
|
|
581 |
|
|
582 |
|
|
583 |
|
|
584 |
|
|
585 |
|
|
586 |
0 |
int sf = map.getMap().getToLowest(); |
587 |
0 |
int st = map.getMap().getToHighest(); |
588 |
0 |
SequenceI mappedrg = ms.getSubSequence(sf, st); |
589 |
0 |
if (mappedrg.getLength() > 0 && ms.getSequenceAsString() |
590 |
|
.equals(matched.getSequenceAsString())) |
591 |
|
{ |
592 |
|
|
593 |
|
|
594 |
|
|
595 |
0 |
String msg = "Mapping updated from " + ms.getName() |
596 |
|
+ " to retrieved crossreference " |
597 |
|
+ matched.getName(); |
598 |
0 |
System.out.println(msg); |
599 |
|
|
600 |
0 |
List<DBRefEntry> toRefs = map.getTo().getDBRefs(); |
601 |
0 |
if (toRefs != null) |
602 |
|
{ |
603 |
|
|
604 |
|
|
605 |
|
|
606 |
0 |
for (DBRefEntry ref : toRefs) |
607 |
|
{ |
608 |
0 |
if (dbref.getSrcAccString() |
609 |
|
.equals(ref.getSrcAccString())) |
610 |
|
{ |
611 |
0 |
continue; |
612 |
|
} |
613 |
0 |
matched.addDBRef(ref); |
614 |
|
} |
615 |
|
} |
616 |
0 |
doNotAdd.add(map.getTo()); |
617 |
0 |
map.setTo(matched); |
618 |
|
|
619 |
|
|
620 |
|
|
621 |
|
|
622 |
|
|
623 |
0 |
setReverseMapping(matched, dbref, cf); |
624 |
|
|
625 |
|
|
626 |
|
|
627 |
|
|
628 |
|
|
629 |
|
|
630 |
0 |
List<SequenceFeature> sfs = ms.getFeatures() |
631 |
|
.getAllFeatures(); |
632 |
0 |
for (SequenceFeature feat : sfs) |
633 |
|
{ |
634 |
|
|
635 |
|
|
636 |
|
|
637 |
|
|
638 |
|
|
639 |
0 |
SequenceFeature newFeature = new SequenceFeature(feat) |
640 |
|
{ |
|
|
| 0% |
Uncovered Elements: 1 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
641 |
0 |
@Override... |
642 |
|
public boolean equals(Object o) |
643 |
|
{ |
644 |
0 |
return super.equals(o, true); |
645 |
|
} |
646 |
|
}; |
647 |
0 |
matched.addSequenceFeature(newFeature); |
648 |
|
} |
649 |
|
} |
650 |
0 |
cf.addMap(retrievedSequence, map.getTo(), map.getMap()); |
651 |
|
} catch (Exception e) |
652 |
|
{ |
653 |
0 |
System.err.println( |
654 |
|
"Exception when consolidating Mapped sequence set..."); |
655 |
0 |
e.printStackTrace(System.err); |
656 |
|
} |
657 |
|
} |
658 |
|
} |
659 |
|
} |
660 |
|
} |
661 |
0 |
if (imported) |
662 |
|
{ |
663 |
0 |
retrievedSequence.updatePDBIds(); |
664 |
0 |
rseqs.add(retrievedSequence); |
665 |
0 |
if (dataset.findIndex(retrievedSequence) == -1) |
666 |
|
{ |
667 |
0 |
dataset.addSequence(retrievedSequence); |
668 |
0 |
matcher.add(retrievedSequence); |
669 |
|
} |
670 |
|
} |
671 |
0 |
return imported; |
672 |
|
} |
673 |
|
|
674 |
|
|
675 |
|
|
676 |
|
|
677 |
|
|
678 |
|
|
679 |
|
|
680 |
|
@param |
681 |
|
|
682 |
|
@param |
683 |
|
@param |
684 |
|
|
|
|
| 0% |
Uncovered Elements: 20 (20) |
Complexity: 6 |
Complexity Density: 0.5 |
|
685 |
0 |
void setReverseMapping(SequenceI mapFrom, DBRefEntry dbref,... |
686 |
|
AlignedCodonFrame mappings) |
687 |
|
{ |
688 |
0 |
SequenceI mapTo = dbref.getMap().getTo(); |
689 |
0 |
if (mapTo == null) |
690 |
|
{ |
691 |
0 |
return; |
692 |
|
} |
693 |
0 |
List<DBRefEntry> dbrefs = mapTo.getDBRefs(); |
694 |
0 |
if (dbrefs == null) |
695 |
|
{ |
696 |
0 |
return; |
697 |
|
} |
698 |
0 |
for (DBRefEntry toRef : dbrefs) |
699 |
|
{ |
700 |
0 |
if (toRef.hasMap() && mapFrom == toRef.getMap().getTo()) |
701 |
|
{ |
702 |
|
|
703 |
|
|
704 |
|
|
705 |
0 |
if (toRef.getMap().getMap() == null) |
706 |
|
{ |
707 |
0 |
MapList inverse = dbref.getMap().getMap().getInverse(); |
708 |
0 |
toRef.getMap().setMap(inverse); |
709 |
0 |
mappings.addMap(mapTo, mapFrom, inverse); |
710 |
|
} |
711 |
|
} |
712 |
|
} |
713 |
|
} |
714 |
|
|
715 |
|
|
716 |
|
|
717 |
|
|
718 |
|
|
719 |
|
|
720 |
|
@param |
721 |
|
|
722 |
|
@return |
723 |
|
|
|
|
| 70% |
Uncovered Elements: 9 (30) |
Complexity: 13 |
Complexity Density: 0.72 |
|
724 |
24 |
SequenceI findInDataset(DBRefEntry xref)... |
725 |
|
{ |
726 |
24 |
if (xref == null || !xref.hasMap() || xref.getMap().getTo() == null) |
727 |
|
{ |
728 |
0 |
return null; |
729 |
|
} |
730 |
24 |
SequenceI mapsTo = xref.getMap().getTo(); |
731 |
24 |
String name = xref.getAccessionId(); |
732 |
24 |
String name2 = xref.getSource() + "|" + name; |
733 |
24 |
SequenceI dss = mapsTo.getDatasetSequence() == null ? mapsTo |
734 |
|
: mapsTo.getDatasetSequence(); |
735 |
|
|
736 |
24 |
if (dataset.findIndex(dss) > -1) |
737 |
|
{ |
738 |
22 |
return dss; |
739 |
|
} |
740 |
2 |
DBRefEntry template = new DBRefEntry(xref.getSource(), null, |
741 |
|
xref.getAccessionId()); |
742 |
|
|
743 |
|
|
744 |
|
|
745 |
2 |
SequenceI firstIdMatch = null; |
746 |
2 |
for (SequenceI seq : dataset.getSequences()) |
747 |
|
{ |
748 |
|
|
749 |
2 |
List<DBRefEntry> match = DBRefUtils.searchRefs( |
750 |
|
seq.getPrimaryDBRefs(), template, DBRefUtils.SEARCH_MODE_FULL); |
751 |
2 |
if (match != null && match.size() == 1 && sameSequence(seq, dss)) |
752 |
|
{ |
753 |
0 |
return seq; |
754 |
|
} |
755 |
|
|
756 |
|
|
757 |
|
|
758 |
|
|
759 |
|
|
760 |
2 |
if (firstIdMatch == null && (name.equals(seq.getName()) |
761 |
|
|| seq.getName().startsWith(name2))) |
762 |
|
{ |
763 |
0 |
if (sameSequence(seq, dss)) |
764 |
|
{ |
765 |
0 |
firstIdMatch = seq; |
766 |
|
} |
767 |
|
} |
768 |
|
} |
769 |
2 |
return firstIdMatch; |
770 |
|
} |
771 |
|
|
772 |
|
|
773 |
|
|
774 |
|
|
775 |
|
|
776 |
|
|
777 |
|
|
778 |
|
@param |
779 |
|
@param |
780 |
|
@return |
781 |
|
|
782 |
|
|
|
|
| 90.9% |
Uncovered Elements: 2 (22) |
Complexity: 9 |
Complexity Density: 0.75 |
|
783 |
7 |
static boolean sameSequence(SequenceI seq1, SequenceI seq2)... |
784 |
|
{ |
785 |
7 |
if (seq1 == seq2) |
786 |
|
{ |
787 |
1 |
return true; |
788 |
|
} |
789 |
6 |
if (seq1 == null || seq2 == null) |
790 |
|
{ |
791 |
2 |
return false; |
792 |
|
} |
793 |
|
|
794 |
4 |
if (seq1.getLength() != seq2.getLength()) |
795 |
|
{ |
796 |
2 |
return false; |
797 |
|
} |
798 |
2 |
int length = seq1.getLength(); |
799 |
14 |
for (int i = 0; i < length; i++) |
800 |
|
{ |
801 |
12 |
int diff = seq1.getCharAt(i) - seq2.getCharAt(i); |
802 |
|
|
803 |
|
|
804 |
|
|
805 |
12 |
if (diff != 0 && diff != 32 && diff != -32) |
806 |
|
{ |
807 |
0 |
return false; |
808 |
|
} |
809 |
|
} |
810 |
2 |
return true; |
811 |
|
} |
812 |
|
|
813 |
|
|
814 |
|
|
815 |
|
|
816 |
|
|
817 |
|
|
818 |
|
@param |
819 |
|
@param |
820 |
|
@param |
821 |
|
@param |
822 |
|
|
|
|
| 0% |
Uncovered Elements: 13 (13) |
Complexity: 3 |
Complexity Density: 0.33 |
|
823 |
0 |
void updateDbrefMappings(SequenceI mapFrom, List<DBRefEntry> xrefs,... |
824 |
|
SequenceI[] retrieved, AlignedCodonFrame acf, boolean fromDna) |
825 |
|
{ |
826 |
0 |
SequenceIdMatcher idMatcher = new SequenceIdMatcher(retrieved); |
827 |
0 |
for (DBRefEntry xref : xrefs) |
828 |
|
{ |
829 |
0 |
if (!xref.hasMap()) |
830 |
|
{ |
831 |
0 |
String targetSeqName = xref.getSource() + "|" |
832 |
|
+ xref.getAccessionId(); |
833 |
0 |
SequenceI[] matches = idMatcher.findAllIdMatches(targetSeqName); |
834 |
0 |
if (matches == null) |
835 |
|
{ |
836 |
0 |
return; |
837 |
|
} |
838 |
0 |
for (SequenceI seq : matches) |
839 |
|
{ |
840 |
0 |
constructMapping(mapFrom, seq, xref, acf, fromDna); |
841 |
|
} |
842 |
|
} |
843 |
|
} |
844 |
|
} |
845 |
|
|
846 |
|
|
847 |
|
|
848 |
|
|
849 |
|
|
850 |
|
|
851 |
|
|
852 |
|
|
853 |
|
|
854 |
|
|
855 |
|
|
856 |
|
|
857 |
|
|
858 |
|
|
859 |
|
|
860 |
|
@param |
861 |
|
@param |
862 |
|
@param |
863 |
|
@param |
864 |
|
@return |
865 |
|
|
|
|
| 0% |
Uncovered Elements: 42 (42) |
Complexity: 12 |
Complexity Density: 0.5 |
|
866 |
0 |
boolean constructMapping(SequenceI mapFrom, SequenceI mapTo,... |
867 |
|
DBRefEntry xref, AlignedCodonFrame mappings, boolean fromDna) |
868 |
|
{ |
869 |
0 |
MapList mapping = null; |
870 |
0 |
SequenceI dsmapFrom = mapFrom.getDatasetSequence() == null ? mapFrom |
871 |
|
: mapFrom.getDatasetSequence(); |
872 |
0 |
SequenceI dsmapTo = mapTo.getDatasetSequence() == null ? mapTo |
873 |
|
: mapTo.getDatasetSequence(); |
874 |
|
|
875 |
|
|
876 |
|
|
877 |
|
|
878 |
0 |
if (dsmapTo.getDBRefs() != null) |
879 |
|
{ |
880 |
0 |
for (DBRefEntry dbref : dsmapTo.getDBRefs()) |
881 |
|
{ |
882 |
0 |
String name = dbref.getSource() + "|" + dbref.getAccessionId(); |
883 |
0 |
if (dbref.hasMap() && dsmapFrom.getName().startsWith(name)) |
884 |
|
{ |
885 |
|
|
886 |
|
|
887 |
|
|
888 |
|
|
889 |
0 |
MapList reverse = dbref.getMap().getMap().getInverse(); |
890 |
0 |
xref.setMap(new Mapping(dsmapTo, reverse)); |
891 |
0 |
mappings.addMap(mapFrom, dsmapTo, reverse); |
892 |
0 |
return true; |
893 |
|
} |
894 |
|
} |
895 |
|
} |
896 |
|
|
897 |
0 |
if (fromDna) |
898 |
|
{ |
899 |
0 |
mapping = AlignmentUtils.mapCdnaToProtein(mapTo, mapFrom); |
900 |
|
} |
901 |
|
else |
902 |
|
{ |
903 |
0 |
mapping = AlignmentUtils.mapCdnaToProtein(mapFrom, mapTo); |
904 |
0 |
if (mapping != null) |
905 |
|
{ |
906 |
0 |
mapping = mapping.getInverse(); |
907 |
|
} |
908 |
|
} |
909 |
0 |
if (mapping == null) |
910 |
|
{ |
911 |
0 |
return false; |
912 |
|
} |
913 |
0 |
xref.setMap(new Mapping(mapTo, mapping)); |
914 |
|
|
915 |
|
|
916 |
|
|
917 |
|
|
918 |
0 |
if (mapFrom.getDatasetSequence() != null && false) |
919 |
|
|
920 |
|
{ |
921 |
|
|
922 |
|
|
923 |
|
|
924 |
|
|
925 |
|
|
926 |
|
|
927 |
|
|
928 |
|
} |
929 |
|
|
930 |
0 |
if (fromDna) |
931 |
|
{ |
932 |
|
|
933 |
0 |
mappings.addMap(mapFrom, mapTo, mapping); |
934 |
|
} |
935 |
|
else |
936 |
|
{ |
937 |
0 |
mappings.addMap(mapTo, mapFrom, mapping.getInverse()); |
938 |
|
} |
939 |
|
|
940 |
0 |
return true; |
941 |
|
} |
942 |
|
|
943 |
|
|
944 |
|
|
945 |
|
|
946 |
|
|
947 |
|
|
948 |
|
@param |
949 |
|
|
950 |
|
|
951 |
|
@param |
952 |
|
@param |
953 |
|
@param |
954 |
|
@return |
955 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (10) |
Complexity: 3 |
Complexity Density: 0.5 |
|
956 |
4689 |
private boolean searchDatasetXrefs(boolean fromDna, SequenceI sequenceI,... |
957 |
|
List<DBRefEntry> lrfs, List<SequenceI> foundSeqs, |
958 |
|
AlignedCodonFrame cf) |
959 |
|
{ |
960 |
4689 |
boolean found = false; |
961 |
4689 |
if (lrfs == null) |
962 |
|
{ |
963 |
2968 |
return false; |
964 |
|
} |
965 |
3797 |
for (int i = 0, n = lrfs.size(); i < n; i++) |
966 |
|
{ |
967 |
|
|
968 |
|
|
969 |
|
|
970 |
|
|
971 |
2076 |
found |= searchDataset(fromDna, sequenceI, lrfs.get(i), foundSeqs, cf, |
972 |
|
false, DBRefUtils.SEARCH_MODE_NO_MAP_NO_VERSION); |
973 |
|
} |
974 |
1721 |
return found; |
975 |
|
} |
976 |
|
|
977 |
|
|
978 |
|
|
979 |
|
|
980 |
|
|
981 |
|
@param |
982 |
|
|
983 |
|
|
984 |
|
@param |
985 |
|
|
986 |
|
@param |
987 |
|
|
988 |
|
@param |
989 |
|
|
990 |
|
@param |
991 |
|
|
992 |
|
@param |
993 |
|
|
994 |
|
|
995 |
|
|
996 |
|
|
997 |
|
|
998 |
|
|
999 |
|
|
1000 |
|
|
1001 |
|
|
1002 |
|
|
1003 |
|
@param |
1004 |
|
@return |
1005 |
|
|
|
|
| 82% |
Uncovered Elements: 11 (61) |
Complexity: 17 |
Complexity Density: 0.49 |
|
1006 |
2101 |
boolean searchDataset(boolean fromDna, SequenceI fromSeq, DBRefEntry xrf,... |
1007 |
|
List<SequenceI> foundSeqs, AlignedCodonFrame mappings, |
1008 |
|
boolean direct, int mode) |
1009 |
|
{ |
1010 |
2101 |
boolean found = false; |
1011 |
2101 |
if (dataset == null) |
1012 |
|
{ |
1013 |
0 |
return false; |
1014 |
|
} |
1015 |
2101 |
if (dataset.getSequences() == null) |
1016 |
|
{ |
1017 |
0 |
System.err.println("Empty dataset sequence set - NO VECTOR"); |
1018 |
0 |
return false; |
1019 |
|
} |
1020 |
2101 |
List<SequenceI> ds = dataset.getSequences(); |
1021 |
2101 |
synchronized (ds) |
1022 |
|
{ |
1023 |
2101 |
for (SequenceI nxt : ds) |
1024 |
|
{ |
1025 |
46848 |
if (nxt != null) |
1026 |
|
{ |
1027 |
46848 |
if (nxt.getDatasetSequence() != null) |
1028 |
|
{ |
1029 |
0 |
System.err.println( |
1030 |
|
"Implementation warning: CrossRef initialised with a dataset alignment with non-dataset sequences in it! (" |
1031 |
|
+ nxt.getDisplayId(true) + " has ds reference " |
1032 |
|
+ nxt.getDatasetSequence().getDisplayId(true) |
1033 |
|
+ ")"); |
1034 |
|
} |
1035 |
46848 |
if (nxt == fromSeq || nxt == fromSeq.getDatasetSequence()) |
1036 |
|
{ |
1037 |
2098 |
continue; |
1038 |
|
} |
1039 |
|
|
1040 |
|
|
1041 |
|
|
1042 |
|
|
1043 |
|
{ |
1044 |
44750 |
boolean isDna = !nxt.isProtein(); |
1045 |
44750 |
if (direct ? (isDna != fromDna) : (isDna == fromDna)) |
1046 |
|
{ |
1047 |
|
|
1048 |
38916 |
continue; |
1049 |
|
} |
1050 |
|
} |
1051 |
|
|
1052 |
|
|
1053 |
5834 |
List<DBRefEntry> poss = nxt.getDBRefs(); |
1054 |
5834 |
List<DBRefEntry> cands = null; |
1055 |
|
|
1056 |
|
|
1057 |
|
|
1058 |
|
|
1059 |
5834 |
cands = DBRefUtils.searchRefs(poss, xrf, mode); |
1060 |
|
|
1061 |
|
|
1062 |
|
|
1063 |
|
|
1064 |
|
|
1065 |
5834 |
if (!cands.isEmpty()) |
1066 |
|
{ |
1067 |
1442 |
if (foundSeqs.contains(nxt)) |
1068 |
|
{ |
1069 |
1022 |
continue; |
1070 |
|
} |
1071 |
420 |
found = true; |
1072 |
420 |
foundSeqs.add(nxt); |
1073 |
420 |
if (mappings != null && !direct) |
1074 |
|
{ |
1075 |
|
|
1076 |
|
|
1077 |
|
|
1078 |
|
|
1079 |
|
|
1080 |
26 |
for (DBRefEntry candidate : cands) |
1081 |
|
{ |
1082 |
26 |
Mapping mapping = candidate.getMap(); |
1083 |
26 |
if (mapping != null) |
1084 |
|
{ |
1085 |
1 |
MapList map = mapping.getMap(); |
1086 |
1 |
if (mapping.getTo() != null |
1087 |
|
&& map.getFromRatio() != map.getToRatio()) |
1088 |
|
{ |
1089 |
|
|
1090 |
|
|
1091 |
|
|
1092 |
1 |
if (map.getFromRatio() == 3) |
1093 |
|
{ |
1094 |
1 |
mappings.addMap(nxt, fromSeq, map); |
1095 |
|
} |
1096 |
|
else |
1097 |
|
{ |
1098 |
0 |
mappings.addMap(nxt, fromSeq, map.getInverse()); |
1099 |
|
} |
1100 |
|
} |
1101 |
|
} |
1102 |
|
} |
1103 |
|
} |
1104 |
|
} |
1105 |
|
} |
1106 |
|
} |
1107 |
|
} |
1108 |
2101 |
return found; |
1109 |
|
} |
1110 |
|
} |