Class |
Line # |
Actions |
|||
---|---|---|---|---|---|
AlignedCodonFrame | 34 | 172 | 97 | ||
AlignedCodonFrame.SequenceToSequenceMapping | 40 | 66 | 35 |
1 | /* | |
2 | * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) | |
3 | * Copyright (C) $$Year-Rel$$ The Jalview Authors | |
4 | * | |
5 | * This file is part of Jalview. | |
6 | * | |
7 | * Jalview is free software: you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation, either version 3 | |
10 | * of the License, or (at your option) any later version. | |
11 | * | |
12 | * Jalview is distributed in the hope that it will be useful, but | |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty | |
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR | |
15 | * PURPOSE. See the GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with Jalview. If not, see <http://www.gnu.org/licenses/>. | |
19 | * The Jalview Authors are detailed in the 'AUTHORS' file. | |
20 | */ | |
21 | package jalview.datamodel; | |
22 | ||
23 | import java.util.AbstractList; | |
24 | import java.util.ArrayList; | |
25 | import java.util.List; | |
26 | ||
27 | import jalview.util.MapList; | |
28 | import jalview.util.MappingUtils; | |
29 | ||
30 | /** | |
31 | * Stores mapping between the columns of a protein alignment and a DNA alignment | |
32 | * and a list of individual codon to amino acid mappings between sequences. | |
33 | */ | |
34 | public class AlignedCodonFrame | |
35 | { | |
36 | ||
37 | /* | |
38 | * Data bean to hold mappings from one sequence to another | |
39 | */ | |
40 | public class SequenceToSequenceMapping | |
41 | { | |
42 | private SequenceI fromSeq; | |
43 | ||
44 | private Mapping mapping; | |
45 | ||
46 | 420 | SequenceToSequenceMapping(SequenceI from, Mapping map) |
47 | { | |
48 | 420 | this.fromSeq = from; |
49 | 420 | this.mapping = map; |
50 | } | |
51 | ||
52 | /** | |
53 | * Readable representation for debugging only, not guaranteed not to change | |
54 | */ | |
55 | 0 | @Override |
56 | public String toString() | |
57 | { | |
58 | 0 | return String.format("From %s %s", fromSeq.getName(), |
59 | mapping.toString()); | |
60 | } | |
61 | ||
62 | /** | |
63 | * Returns a hashCode derived from the hashcodes of the mappings and fromSeq | |
64 | * | |
65 | * @see SequenceToSequenceMapping#hashCode() | |
66 | */ | |
67 | 0 | @Override |
68 | public int hashCode() | |
69 | { | |
70 | 0 | return (fromSeq == null ? 0 : fromSeq.hashCode() * 31) |
71 | + mapping.hashCode(); | |
72 | } | |
73 | ||
74 | /** | |
75 | * Answers true if the objects hold the same mapping between the same two | |
76 | * sequences | |
77 | * | |
78 | * @see Mapping#equals | |
79 | */ | |
80 | 2683 | @Override |
81 | public boolean equals(Object obj) | |
82 | { | |
83 | 2683 | if (!(obj instanceof SequenceToSequenceMapping)) |
84 | { | |
85 | 0 | return false; |
86 | } | |
87 | 2683 | SequenceToSequenceMapping that = (SequenceToSequenceMapping) obj; |
88 | 2683 | if (this.mapping == null) |
89 | { | |
90 | 0 | return that.mapping == null; |
91 | } | |
92 | // TODO: can simplify by asserting fromSeq is a dataset sequence | |
93 | 2683 | return (this.fromSeq == that.fromSeq |
94 | || (this.fromSeq != null && that.fromSeq != null | |
95 | && this.fromSeq.getDatasetSequence() != null | |
96 | && this.fromSeq.getDatasetSequence() == that.fromSeq | |
97 | .getDatasetSequence())) | |
98 | && this.mapping.equals(that.mapping); | |
99 | } | |
100 | ||
101 | 65 | public SequenceI getFromSeq() |
102 | { | |
103 | 65 | return fromSeq; |
104 | } | |
105 | ||
106 | 314 | public Mapping getMapping() |
107 | { | |
108 | 314 | return mapping; |
109 | } | |
110 | ||
111 | /** | |
112 | * Returns true if the mapping covers the full length of the given sequence. | |
113 | * This allows us to distinguish the CDS that codes for a protein from | |
114 | * another overlapping CDS in the parent dna sequence. | |
115 | * | |
116 | * @param seq | |
117 | * @return | |
118 | */ | |
119 | 280 | public boolean covers(SequenceI seq) |
120 | { | |
121 | 280 | return covers(seq, false, false); |
122 | } | |
123 | ||
124 | /** | |
125 | * | |
126 | * @param seq | |
127 | * @param localCover | |
128 | * - when true - compare extent of seq's dataset sequence rather | |
129 | * than the local extent | |
130 | * @param either | |
131 | * - when true coverage is required for either seq or the mapped | |
132 | * sequence | |
133 | * @return true if mapping covers full length of given sequence (or the | |
134 | * other if either==true) | |
135 | */ | |
136 | 579 | public boolean covers(SequenceI seq, boolean localCover, boolean either) |
137 | { | |
138 | 579 | List<int[]> mappedRanges = null, otherRanges = null; |
139 | 579 | MapList mapList = mapping.getMap(); |
140 | 579 | int mstart = seq.getStart(), mend = seq.getEnd(), ostart, oend; |
141 | 579 | ; |
142 | 579 | if (fromSeq == seq || fromSeq == seq.getDatasetSequence()) |
143 | { | |
144 | 153 | if (localCover && fromSeq != seq) |
145 | { | |
146 | 72 | mstart = fromSeq.getStart(); |
147 | 72 | mend = fromSeq.getEnd(); |
148 | } | |
149 | 153 | mappedRanges = mapList.getFromRanges(); |
150 | 153 | otherRanges = mapList.getToRanges(); |
151 | 153 | ostart = mapping.to.getStart(); |
152 | 153 | oend = mapping.to.getEnd(); |
153 | } | |
154 | 426 | else if (mapping.to == seq || mapping.to == seq.getDatasetSequence()) |
155 | { | |
156 | 124 | if (localCover && mapping.to != seq) |
157 | { | |
158 | 65 | mstart = mapping.to.getStart(); |
159 | 65 | mend = mapping.to.getEnd(); |
160 | } | |
161 | 124 | mappedRanges = mapList.getToRanges(); |
162 | 124 | otherRanges = mapList.getFromRanges(); |
163 | 124 | ostart = fromSeq.getStart(); |
164 | 124 | oend = fromSeq.getEnd(); |
165 | } | |
166 | else | |
167 | { | |
168 | 302 | return false; |
169 | } | |
170 | ||
171 | /* | |
172 | * check that each mapped range lies within the sequence range | |
173 | * (necessary for circular CDS - example EMBL:J03321:AAA91567) | |
174 | * and mapped length covers (at least) sequence length | |
175 | */ | |
176 | 277 | int length = countRange(mappedRanges, mstart, mend); |
177 | ||
178 | 277 | if (length != -1) |
179 | { | |
180 | // add 3 to mapped length to allow for a mapped stop codon | |
181 | 263 | if (length + 3 >= (mend - mstart + 1)) |
182 | { | |
183 | 230 | return true; |
184 | } | |
185 | } | |
186 | 47 | if (either) |
187 | { | |
188 | // also check coverage of the other range | |
189 | 25 | length = countRange(otherRanges, ostart, oend); |
190 | 25 | if (length != -1) |
191 | { | |
192 | 25 | if (length + 1 >= (oend - ostart + 1)) |
193 | { | |
194 | 25 | return true; |
195 | } | |
196 | } | |
197 | } | |
198 | 22 | return false; |
199 | } | |
200 | ||
201 | 302 | private int countRange(List<int[]> mappedRanges, int mstart, int mend) |
202 | { | |
203 | 302 | int length = 0; |
204 | 302 | for (int[] range : mappedRanges) |
205 | { | |
206 | 388 | int from = Math.min(range[0], range[1]); |
207 | 388 | int to = Math.max(range[0], range[1]); |
208 | 388 | if (from < mstart || to > mend) |
209 | { | |
210 | 14 | return -1; |
211 | } | |
212 | 374 | length += (to - from + 1); |
213 | } | |
214 | 288 | return length; |
215 | } | |
216 | ||
217 | /** | |
218 | * Adds any regions mapped to or from position {@code pos} in sequence | |
219 | * {@code seq} to the given search results Note: recommend first using the | |
220 | * .covers(,true,true) to ensure mapping covers both sequences | |
221 | * | |
222 | * @param seq | |
223 | * @param pos | |
224 | * @param sr | |
225 | */ | |
226 | 142 | public void markMappedRegion(SequenceI seq, int pos, SearchResultsI sr) |
227 | { | |
228 | 142 | int[] codon = null; |
229 | 142 | SequenceI mappedSeq = null; |
230 | 142 | SequenceI ds = seq.getDatasetSequence(); |
231 | 142 | if (ds == null) |
232 | { | |
233 | 142 | ds = seq; |
234 | } | |
235 | ||
236 | 142 | if (this.fromSeq == seq || this.fromSeq == ds) |
237 | { | |
238 | 74 | codon = this.mapping.map.locateInTo(pos, pos); |
239 | 74 | mappedSeq = this.mapping.to; |
240 | } | |
241 | 68 | else if (this.mapping.to == seq || this.mapping.to == ds) |
242 | { | |
243 | 68 | codon = this.mapping.map.locateInFrom(pos, pos); |
244 | 68 | mappedSeq = this.fromSeq; |
245 | } | |
246 | ||
247 | 142 | if (codon != null) |
248 | { | |
249 | 267 | for (int i = 0; i < codon.length; i += 2) |
250 | { | |
251 | 142 | sr.addResult(mappedSeq, codon[i], codon[i + 1]); |
252 | } | |
253 | } | |
254 | } | |
255 | } | |
256 | ||
257 | private List<SequenceToSequenceMapping> mappings; | |
258 | ||
259 | /** | |
260 | * Constructor | |
261 | */ | |
262 | 201 | public AlignedCodonFrame() |
263 | { | |
264 | 201 | mappings = new ArrayList<>(); |
265 | } | |
266 | ||
267 | /** | |
268 | * Adds a mapping between the dataset sequences for the associated dna and | |
269 | * protein sequence objects | |
270 | * | |
271 | * @param dnaseq | |
272 | * @param aaseq | |
273 | * @param map | |
274 | */ | |
275 | 415 | public void addMap(SequenceI dnaseq, SequenceI aaseq, MapList map) |
276 | { | |
277 | 415 | addMap(dnaseq, aaseq, map, null); |
278 | } | |
279 | ||
280 | /** | |
281 | * Adds a mapping between the dataset sequences for the associated dna and | |
282 | * protein sequence objects | |
283 | * | |
284 | * @param dnaseq | |
285 | * @param aaseq | |
286 | * @param map | |
287 | * @param mapFromId | |
288 | */ | |
289 | 428 | public void addMap(SequenceI dnaseq, SequenceI aaseq, MapList map, |
290 | String mapFromId) | |
291 | { | |
292 | // JBPNote DEBUG! THIS ! | |
293 | // dnaseq.transferAnnotation(aaseq, mp); | |
294 | // aaseq.transferAnnotation(dnaseq, new Mapping(map.getInverse())); | |
295 | ||
296 | 428 | SequenceI fromSeq = (dnaseq.getDatasetSequence() == null) ? dnaseq |
297 | : dnaseq.getDatasetSequence(); | |
298 | 428 | SequenceI toSeq = (aaseq.getDatasetSequence() == null) ? aaseq |
299 | : aaseq.getDatasetSequence(); | |
300 | ||
301 | /* | |
302 | * if we already hold a mapping between these sequences, just add to it | |
303 | * note that 'adding' a duplicate map does nothing; this protects against | |
304 | * creating duplicate mappings in AlignedCodonFrame | |
305 | */ | |
306 | 428 | for (SequenceToSequenceMapping ssm : mappings) |
307 | { | |
308 | 1157 | if (ssm.fromSeq == fromSeq && ssm.mapping.to == toSeq) |
309 | { | |
310 | 8 | ssm.mapping.map.addMapList(map); |
311 | 8 | return; |
312 | } | |
313 | } | |
314 | ||
315 | /* | |
316 | * otherwise, add a new sequence mapping | |
317 | */ | |
318 | 420 | Mapping mp = new Mapping(toSeq, map); |
319 | 420 | mp.setMappedFromId(mapFromId); |
320 | 420 | mappings.add(new SequenceToSequenceMapping(fromSeq, mp)); |
321 | } | |
322 | ||
323 | 44 | public SequenceI[] getdnaSeqs() |
324 | { | |
325 | // TODO return a list instead? | |
326 | // return dnaSeqs; | |
327 | 44 | List<SequenceI> seqs = new ArrayList<>(); |
328 | 44 | for (SequenceToSequenceMapping ssm : mappings) |
329 | { | |
330 | 52 | seqs.add(ssm.fromSeq); |
331 | } | |
332 | 44 | return seqs.toArray(new SequenceI[seqs.size()]); |
333 | } | |
334 | ||
335 | 16 | public SequenceI[] getAaSeqs() |
336 | { | |
337 | // TODO not used - remove? | |
338 | 16 | List<SequenceI> seqs = new ArrayList<>(); |
339 | 16 | for (SequenceToSequenceMapping ssm : mappings) |
340 | { | |
341 | 16 | seqs.add(ssm.mapping.to); |
342 | } | |
343 | 16 | return seqs.toArray(new SequenceI[seqs.size()]); |
344 | } | |
345 | ||
346 | 50 | public MapList[] getdnaToProt() |
347 | { | |
348 | 50 | List<MapList> maps = new ArrayList<>(); |
349 | 50 | for (SequenceToSequenceMapping ssm : mappings) |
350 | { | |
351 | 90 | maps.add(ssm.mapping.map); |
352 | } | |
353 | 50 | return maps.toArray(new MapList[maps.size()]); |
354 | } | |
355 | ||
356 | 8 | public Mapping[] getProtMappings() |
357 | { | |
358 | 8 | List<Mapping> maps = new ArrayList<>(); |
359 | 8 | for (SequenceToSequenceMapping ssm : mappings) |
360 | { | |
361 | 9 | maps.add(ssm.mapping); |
362 | } | |
363 | 8 | return maps.toArray(new Mapping[maps.size()]); |
364 | } | |
365 | ||
366 | /** | |
367 | * Returns the first mapping found which is to or from the given sequence, or | |
368 | * null if none is found | |
369 | * | |
370 | * @param seq | |
371 | * @return | |
372 | */ | |
373 | 32 | public Mapping getMappingForSequence(SequenceI seq) |
374 | { | |
375 | 32 | SequenceI seqDs = seq.getDatasetSequence(); |
376 | 32 | seqDs = seqDs != null ? seqDs : seq; |
377 | ||
378 | 32 | for (SequenceToSequenceMapping ssm : mappings) |
379 | { | |
380 | 40 | if (ssm.fromSeq == seqDs || ssm.mapping.to == seqDs) |
381 | { | |
382 | 32 | return ssm.mapping; |
383 | } | |
384 | } | |
385 | 0 | return null; |
386 | } | |
387 | ||
388 | /** | |
389 | * Return the corresponding aligned or dataset aa sequence for given dna | |
390 | * sequence, null if not found. | |
391 | * | |
392 | * @param sequenceRef | |
393 | * @return | |
394 | */ | |
395 | 246749 | public SequenceI getAaForDnaSeq(SequenceI dnaSeqRef) |
396 | { | |
397 | 246749 | SequenceI dnads = dnaSeqRef.getDatasetSequence(); |
398 | 246749 | for (SequenceToSequenceMapping ssm : mappings) |
399 | { | |
400 | 354071 | if (ssm.fromSeq == dnaSeqRef || ssm.fromSeq == dnads) |
401 | { | |
402 | 126 | return ssm.mapping.to; |
403 | } | |
404 | } | |
405 | 246623 | return null; |
406 | } | |
407 | ||
408 | /** | |
409 | * Return the corresponding aligned or dataset dna sequence for given amino | |
410 | * acid sequence, or null if not found. returns the sequence from the first | |
411 | * mapping found that involves the protein sequence. | |
412 | * | |
413 | * @param aaSeqRef | |
414 | * @return | |
415 | */ | |
416 | 246639 | public SequenceI getDnaForAaSeq(SequenceI aaSeqRef) |
417 | { | |
418 | 246639 | SequenceI aads = aaSeqRef.getDatasetSequence(); |
419 | 246639 | for (SequenceToSequenceMapping ssm : mappings) |
420 | { | |
421 | 295718 | if (ssm.mapping.to == aaSeqRef || ssm.mapping.to == aads) |
422 | { | |
423 | 21461 | return ssm.fromSeq; |
424 | } | |
425 | } | |
426 | 225178 | return null; |
427 | } | |
428 | ||
429 | /** | |
430 | * test to see if codon frame involves seq in any way | |
431 | * | |
432 | * @param seq | |
433 | * a nucleotide or protein sequence | |
434 | * @return true if a mapping exists to or from this sequence to any translated | |
435 | * sequence | |
436 | */ | |
437 | 246736 | public boolean involvesSequence(SequenceI seq) |
438 | { | |
439 | 246736 | return getAaForDnaSeq(seq) != null || getDnaForAaSeq(seq) != null; |
440 | } | |
441 | ||
442 | /** | |
443 | * Add search results for regions in other sequences that translate or are | |
444 | * translated from a particular position in seq (which may be an aligned or | |
445 | * dataset sequence) | |
446 | * | |
447 | * @param seq | |
448 | * @param index | |
449 | * position in seq | |
450 | * @param results | |
451 | * where highlighted regions go | |
452 | */ | |
453 | 133 | public void markMappedRegion(SequenceI seq, int index, |
454 | SearchResultsI results) | |
455 | { | |
456 | 133 | SequenceI ds = seq.getDatasetSequence(); |
457 | 133 | if (ds == null) |
458 | { | |
459 | 3 | ds = seq; |
460 | } | |
461 | 133 | for (SequenceToSequenceMapping ssm : mappings) |
462 | { | |
463 | 299 | if (ssm.covers(seq, true, true)) |
464 | { | |
465 | 140 | ssm.markMappedRegion(ds, index, results); |
466 | } | |
467 | } | |
468 | } | |
469 | ||
470 | /** | |
471 | * Convenience method to return the first aligned sequence in the given | |
472 | * alignment whose dataset has a mapping with the given (aligned or dataset) | |
473 | * sequence. | |
474 | * | |
475 | * @param seq | |
476 | * | |
477 | * @param al | |
478 | * @return | |
479 | */ | |
480 | 1043 | public SequenceI findAlignedSequence(SequenceI seq, AlignmentI al) |
481 | { | |
482 | /* | |
483 | * Search mapped protein ('to') sequences first. | |
484 | */ | |
485 | 1043 | for (SequenceToSequenceMapping ssm : mappings) |
486 | { | |
487 | 1495 | if (ssm.fromSeq == seq || ssm.fromSeq == seq.getDatasetSequence()) |
488 | { | |
489 | 65 | for (SequenceI sourceAligned : al.getSequences()) |
490 | { | |
491 | 644 | if (ssm.mapping.to == sourceAligned.getDatasetSequence() |
492 | || ssm.mapping.to == sourceAligned) | |
493 | { | |
494 | 39 | return sourceAligned; |
495 | } | |
496 | } | |
497 | } | |
498 | } | |
499 | ||
500 | /* | |
501 | * Then try mapped dna sequences. | |
502 | */ | |
503 | 1004 | for (SequenceToSequenceMapping ssm : mappings) |
504 | { | |
505 | 1445 | if (ssm.mapping.to == seq |
506 | || ssm.mapping.to == seq.getDatasetSequence()) | |
507 | { | |
508 | 53 | for (SequenceI sourceAligned : al.getSequences()) |
509 | { | |
510 | 497 | if (ssm.fromSeq == sourceAligned.getDatasetSequence()) |
511 | { | |
512 | 31 | return sourceAligned; |
513 | } | |
514 | } | |
515 | } | |
516 | } | |
517 | ||
518 | 973 | return null; |
519 | } | |
520 | ||
521 | /** | |
522 | * Returns the region in the target sequence's dataset that is mapped to the | |
523 | * given position (base 1) in the query sequence's dataset. The region is a | |
524 | * set of start/end position pairs. | |
525 | * | |
526 | * @param target | |
527 | * @param query | |
528 | * @param queryPos | |
529 | * @return | |
530 | */ | |
531 | 161 | public int[] getMappedRegion(SequenceI target, SequenceI query, |
532 | int queryPos) | |
533 | { | |
534 | 161 | SequenceI targetDs = target.getDatasetSequence() == null ? target |
535 | : target.getDatasetSequence(); | |
536 | 161 | SequenceI queryDs = query.getDatasetSequence() == null ? query |
537 | : query.getDatasetSequence(); | |
538 | 161 | if (targetDs == null || queryDs == null /*|| dnaToProt == null*/) |
539 | { | |
540 | 0 | return null; |
541 | } | |
542 | 161 | for (SequenceToSequenceMapping ssm : mappings) |
543 | { | |
544 | /* | |
545 | * try mapping from target to query | |
546 | */ | |
547 | 172 | if (ssm.fromSeq == targetDs && ssm.mapping.to == queryDs) |
548 | { | |
549 | 40 | int[] codon = ssm.mapping.map.locateInFrom(queryPos, queryPos); |
550 | 40 | if (codon != null) |
551 | { | |
552 | 36 | return codon; |
553 | } | |
554 | } | |
555 | /* | |
556 | * else try mapping from query to target | |
557 | */ | |
558 | 132 | else if (ssm.fromSeq == queryDs && ssm.mapping.to == targetDs) |
559 | { | |
560 | 119 | int[] codon = ssm.mapping.map.locateInTo(queryPos, queryPos); |
561 | 119 | if (codon != null) |
562 | { | |
563 | 26 | return codon; |
564 | } | |
565 | } | |
566 | } | |
567 | 99 | return null; |
568 | } | |
569 | ||
570 | /** | |
571 | * Returns the mapped DNA codons for the given position in a protein sequence, | |
572 | * or null if no mapping is found. Returns a list of (e.g.) ['g', 'c', 't'] | |
573 | * codons. There may be more than one codon mapped to the protein if (for | |
574 | * example), there are mappings to cDNA variants. | |
575 | * | |
576 | * @param protein | |
577 | * the peptide dataset sequence | |
578 | * @param aaPos | |
579 | * residue position (base 1) in the peptide sequence | |
580 | * @return | |
581 | */ | |
582 | 21367 | public List<char[]> getMappedCodons(SequenceI protein, int aaPos) |
583 | { | |
584 | 21367 | MapList ml = null; |
585 | 21367 | SequenceI dnaSeq = null; |
586 | 21367 | List<char[]> result = new ArrayList<>(); |
587 | ||
588 | 21367 | for (SequenceToSequenceMapping ssm : mappings) |
589 | { | |
590 | 127897 | if (ssm.mapping.to == protein |
591 | && ssm.mapping.getMap().getFromRatio() == 3) | |
592 | { | |
593 | 21343 | ml = ssm.mapping.map; |
594 | 21343 | dnaSeq = ssm.fromSeq; |
595 | ||
596 | 21343 | int[] codonPos = ml.locateInFrom(aaPos, aaPos); |
597 | 21343 | if (codonPos == null) |
598 | { | |
599 | 25 | return null; |
600 | } | |
601 | ||
602 | /* | |
603 | * Read off the mapped nucleotides (converting to position base 0) | |
604 | */ | |
605 | 21318 | codonPos = MappingUtils.flattenRanges(codonPos); |
606 | 21318 | int start = dnaSeq.getStart(); |
607 | 21318 | char c1 = dnaSeq.getCharAt(codonPos[0] - start); |
608 | 21318 | char c2 = dnaSeq.getCharAt(codonPos[1] - start); |
609 | 21318 | char c3 = dnaSeq.getCharAt(codonPos[2] - start); |
610 | 21318 | result.add(new char[] { c1, c2, c3 }); |
611 | } | |
612 | } | |
613 | 21342 | return result.isEmpty() ? null : result; |
614 | } | |
615 | ||
616 | /** | |
617 | * Returns any mappings found which are from the given sequence, and to | |
618 | * distinct sequences. | |
619 | * | |
620 | * @param seq | |
621 | * @return | |
622 | */ | |
623 | 46 | public List<Mapping> getMappingsFromSequence(SequenceI seq) |
624 | { | |
625 | 46 | List<Mapping> result = new ArrayList<>(); |
626 | 46 | List<SequenceI> related = new ArrayList<>(); |
627 | 46 | SequenceI seqDs = seq.getDatasetSequence(); |
628 | 46 | seqDs = seqDs != null ? seqDs : seq; |
629 | ||
630 | 46 | for (SequenceToSequenceMapping ssm : mappings) |
631 | { | |
632 | 268 | final Mapping mapping = ssm.mapping; |
633 | 268 | if (ssm.fromSeq == seqDs) |
634 | { | |
635 | 48 | if (!related.contains(mapping.to)) |
636 | { | |
637 | 48 | result.add(mapping); |
638 | 48 | related.add(mapping.to); |
639 | } | |
640 | } | |
641 | } | |
642 | 46 | return result; |
643 | } | |
644 | ||
645 | /** | |
646 | * Test whether the given sequence is substitutable for one or more dummy | |
647 | * sequences in this mapping | |
648 | * | |
649 | * @param map | |
650 | * @param seq | |
651 | * @return | |
652 | */ | |
653 | 7 | public boolean isRealisableWith(SequenceI seq) |
654 | { | |
655 | 7 | return realiseWith(seq, false) > 0; |
656 | } | |
657 | ||
658 | /** | |
659 | * Replace any matchable mapped dummy sequences with the given real one. | |
660 | * Returns the count of sequence mappings instantiated. | |
661 | * | |
662 | * @param seq | |
663 | * @return | |
664 | */ | |
665 | 2 | public int realiseWith(SequenceI seq) |
666 | { | |
667 | 2 | return realiseWith(seq, true); |
668 | } | |
669 | ||
670 | /** | |
671 | * Returns the number of mapped dummy sequences that could be replaced with | |
672 | * the given real sequence. | |
673 | * | |
674 | * @param seq | |
675 | * a dataset sequence | |
676 | * @param doUpdate | |
677 | * if true, performs replacements, else only counts | |
678 | * @return | |
679 | */ | |
680 | 9 | protected int realiseWith(SequenceI seq, boolean doUpdate) |
681 | { | |
682 | 9 | SequenceI ds = seq.getDatasetSequence() != null |
683 | ? seq.getDatasetSequence() | |
684 | : seq; | |
685 | 9 | int count = 0; |
686 | ||
687 | /* | |
688 | * check for replaceable DNA ('map from') sequences | |
689 | */ | |
690 | 9 | for (SequenceToSequenceMapping ssm : mappings) |
691 | { | |
692 | 10 | SequenceI dna = ssm.fromSeq; |
693 | 10 | if (dna instanceof SequenceDummy |
694 | && dna.getName().equals(ds.getName())) | |
695 | { | |
696 | 8 | Mapping mapping = ssm.mapping; |
697 | 8 | int mapStart = mapping.getMap().getFromLowest(); |
698 | 8 | int mapEnd = mapping.getMap().getFromHighest(); |
699 | 8 | boolean mappable = couldRealiseSequence(dna, ds, mapStart, mapEnd); |
700 | 8 | if (mappable) |
701 | { | |
702 | 6 | count++; |
703 | 6 | if (doUpdate) |
704 | { | |
705 | // TODO: new method ? ds.realise(dna); | |
706 | // might want to copy database refs as well | |
707 | 3 | ds.setSequenceFeatures(dna.getSequenceFeatures()); |
708 | // dnaSeqs[i] = ds; | |
709 | 3 | ssm.fromSeq = ds; |
710 | 3 | jalview.bin.Console |
711 | .outPrintln("Realised mapped sequence " + ds.getName()); | |
712 | } | |
713 | } | |
714 | } | |
715 | ||
716 | /* | |
717 | * check for replaceable protein ('map to') sequences | |
718 | */ | |
719 | 10 | Mapping mapping = ssm.mapping; |
720 | 10 | SequenceI prot = mapping.getTo(); |
721 | 10 | int mapStart = mapping.getMap().getToLowest(); |
722 | 10 | int mapEnd = mapping.getMap().getToHighest(); |
723 | 10 | boolean mappable = couldRealiseSequence(prot, ds, mapStart, mapEnd); |
724 | 10 | if (mappable) |
725 | { | |
726 | 0 | count++; |
727 | 0 | if (doUpdate) |
728 | { | |
729 | // TODO: new method ? ds.realise(dna); | |
730 | // might want to copy database refs as well | |
731 | 0 | ds.setSequenceFeatures(dna.getSequenceFeatures()); |
732 | 0 | ssm.mapping.setTo(ds); |
733 | } | |
734 | } | |
735 | } | |
736 | 9 | return count; |
737 | } | |
738 | ||
739 | /** | |
740 | * Helper method to test whether a 'real' sequence could replace a 'dummy' | |
741 | * sequence in the map. The criteria are that they have the same name, and | |
742 | * that the mapped region overlaps the candidate sequence. | |
743 | * | |
744 | * @param existing | |
745 | * @param replacement | |
746 | * @param mapStart | |
747 | * @param mapEnd | |
748 | * @return | |
749 | */ | |
750 | 28 | protected static boolean couldRealiseSequence(SequenceI existing, |
751 | SequenceI replacement, int mapStart, int mapEnd) | |
752 | { | |
753 | 28 | if (existing instanceof SequenceDummy |
754 | && !(replacement instanceof SequenceDummy) | |
755 | && existing.getName().equals(replacement.getName())) | |
756 | { | |
757 | 13 | int start = replacement.getStart(); |
758 | 13 | int end = replacement.getEnd(); |
759 | 13 | boolean mappingOverlapsSequence = (mapStart >= start |
760 | && mapStart <= end) || (mapEnd >= start && mapEnd <= end); | |
761 | 13 | if (mappingOverlapsSequence) |
762 | { | |
763 | 9 | return true; |
764 | } | |
765 | } | |
766 | 19 | return false; |
767 | } | |
768 | ||
769 | /** | |
770 | * Change any mapping to the given sequence to be to its dataset sequence | |
771 | * instead. For use when mappings are created before their referenced | |
772 | * sequences are instantiated, for example when parsing GFF data. | |
773 | * | |
774 | * @param seq | |
775 | */ | |
776 | 8 | public void updateToDataset(SequenceI seq) |
777 | { | |
778 | 8 | if (seq == null || seq.getDatasetSequence() == null) |
779 | { | |
780 | 0 | return; |
781 | } | |
782 | 8 | SequenceI ds = seq.getDatasetSequence(); |
783 | ||
784 | 8 | for (SequenceToSequenceMapping ssm : mappings) |
785 | /* | |
786 | * 'from' sequences | |
787 | */ | |
788 | { | |
789 | 8 | if (ssm.fromSeq == seq) |
790 | { | |
791 | 4 | ssm.fromSeq = ds; |
792 | } | |
793 | ||
794 | /* | |
795 | * 'to' sequences | |
796 | */ | |
797 | 8 | if (ssm.mapping.to == seq) |
798 | { | |
799 | 4 | ssm.mapping.to = ds; |
800 | } | |
801 | } | |
802 | } | |
803 | ||
804 | /** | |
805 | * Answers true if this object contains no mappings | |
806 | * | |
807 | * @return | |
808 | */ | |
809 | 7 | public boolean isEmpty() |
810 | { | |
811 | 7 | return mappings.isEmpty(); |
812 | } | |
813 | ||
814 | /** | |
815 | * Method for debug / inspection purposes only, may change in future | |
816 | */ | |
817 | 0 | @Override |
818 | public String toString() | |
819 | { | |
820 | 0 | return mappings == null ? "null" : mappings.toString(); |
821 | } | |
822 | ||
823 | /** | |
824 | * Returns the first mapping found that is between 'fromSeq' and 'toSeq', or | |
825 | * null if none found | |
826 | * | |
827 | * @param fromSeq | |
828 | * aligned or dataset sequence | |
829 | * @param toSeq | |
830 | * aligned or dataset sequence | |
831 | * @return | |
832 | */ | |
833 | 42 | public Mapping getMappingBetween(SequenceI fromSeq, SequenceI toSeq) |
834 | { | |
835 | 42 | SequenceI dssFrom = fromSeq.getDatasetSequence() == null ? fromSeq |
836 | : fromSeq.getDatasetSequence(); | |
837 | 42 | SequenceI dssTo = toSeq.getDatasetSequence() == null ? toSeq |
838 | : toSeq.getDatasetSequence(); | |
839 | ||
840 | 42 | for (SequenceToSequenceMapping mapping : mappings) |
841 | { | |
842 | 89 | SequenceI from = mapping.fromSeq; |
843 | 89 | SequenceI to = mapping.mapping.to; |
844 | 89 | if ((from == dssFrom && to == dssTo) |
845 | || (from == dssTo && to == dssFrom)) | |
846 | { | |
847 | 31 | return mapping.mapping; |
848 | } | |
849 | } | |
850 | 11 | return null; |
851 | } | |
852 | ||
853 | /** | |
854 | * Returns a hashcode derived from the list of sequence mappings | |
855 | * | |
856 | * @see SequenceToSequenceMapping#hashCode() | |
857 | * @see AbstractList#hashCode() | |
858 | */ | |
859 | 0 | @Override |
860 | public int hashCode() | |
861 | { | |
862 | 0 | return this.mappings.hashCode(); |
863 | } | |
864 | ||
865 | /** | |
866 | * Two AlignedCodonFrame objects are equal if they hold the same ordered list | |
867 | * of mappings | |
868 | * | |
869 | * @see SequenceToSequenceMapping#equals | |
870 | */ | |
871 | 3217 | @Override |
872 | public boolean equals(Object obj) | |
873 | { | |
874 | 3217 | if (!(obj instanceof AlignedCodonFrame)) |
875 | { | |
876 | 0 | return false; |
877 | } | |
878 | 3217 | return this.mappings.equals(((AlignedCodonFrame) obj).mappings); |
879 | } | |
880 | ||
881 | 140 | public List<SequenceToSequenceMapping> getMappings() |
882 | { | |
883 | 140 | return mappings; |
884 | } | |
885 | ||
886 | /** | |
887 | * Returns the first mapping found which is between the two given sequences, | |
888 | * and covers the full extent of both. | |
889 | * | |
890 | * @param seq1 | |
891 | * @param seq2 | |
892 | * @return | |
893 | */ | |
894 | 11 | public SequenceToSequenceMapping getCoveringMapping(SequenceI seq1, |
895 | SequenceI seq2) | |
896 | { | |
897 | 11 | for (SequenceToSequenceMapping mapping : mappings) |
898 | { | |
899 | 7 | if (mapping.covers(seq2) && mapping.covers(seq1)) |
900 | { | |
901 | 2 | return mapping; |
902 | } | |
903 | } | |
904 | 9 | return null; |
905 | } | |
906 | ||
907 | /** | |
908 | * Returns the first mapping found which is between the given dataset sequence | |
909 | * and another, is a triplet mapping (3:1 or 1:3), and covers the full extent | |
910 | * of both sequences involved | |
911 | * | |
912 | * @param seq | |
913 | * @return | |
914 | */ | |
915 | 14 | public SequenceToSequenceMapping getCoveringCodonMapping(SequenceI seq) |
916 | { | |
917 | 14 | for (SequenceToSequenceMapping mapping : mappings) |
918 | { | |
919 | 11 | if (mapping.getMapping().getMap().isTripletMap() |
920 | && mapping.covers(seq)) | |
921 | { | |
922 | 8 | if (mapping.fromSeq == seq |
923 | && mapping.covers(mapping.getMapping().getTo())) | |
924 | { | |
925 | 2 | return mapping; |
926 | } | |
927 | 6 | else if (mapping.getMapping().getTo() == seq |
928 | && mapping.covers(mapping.fromSeq)) | |
929 | { | |
930 | 4 | return mapping; |
931 | } | |
932 | } | |
933 | } | |
934 | 8 | return null; |
935 | } | |
936 | } |