Class |
Line # |
Actions |
|||
---|---|---|---|---|---|
MapList | 39 | 410 | 167 |
1 | /* | |
2 | * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) | |
3 | * Copyright (C) $$Year-Rel$$ The Jalview Authors | |
4 | * | |
5 | * This file is part of Jalview. | |
6 | * | |
7 | * Jalview is free software: you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation, either version 3 | |
10 | * of the License, or (at your option) any later version. | |
11 | * | |
12 | * Jalview is distributed in the hope that it will be useful, but | |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty | |
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR | |
15 | * PURPOSE. See the GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with Jalview. If not, see <http://www.gnu.org/licenses/>. | |
19 | * The Jalview Authors are detailed in the 'AUTHORS' file. | |
20 | */ | |
21 | package jalview.util; | |
22 | ||
23 | import java.util.ArrayList; | |
24 | import java.util.Arrays; | |
25 | import java.util.BitSet; | |
26 | import java.util.List; | |
27 | ||
28 | import jalview.bin.Console; | |
29 | ||
30 | /** | |
31 | * A simple way of bijectively mapping a non-contiguous linear range to another | |
32 | * non-contiguous linear range. | |
33 | * | |
34 | * Use at your own risk! | |
35 | * | |
36 | * TODO: test/ensure that sense of from and to ratio start position is conserved | |
37 | * (codon start position recovery) | |
38 | */ | |
39 | public class MapList | |
40 | { | |
41 | ||
42 | /* | |
43 | * Subregions (base 1) described as { [start1, end1], [start2, end2], ...} | |
44 | */ | |
45 | private List<int[]> fromShifts; | |
46 | ||
47 | /* | |
48 | * Same format as fromShifts, for the 'mapped to' sequence | |
49 | */ | |
50 | private List<int[]> toShifts; | |
51 | ||
52 | /* | |
53 | * number of steps in fromShifts to one toRatio unit | |
54 | */ | |
55 | private int fromRatio; | |
56 | ||
57 | /* | |
58 | * number of steps in toShifts to one fromRatio | |
59 | */ | |
60 | private int toRatio; | |
61 | ||
62 | /* | |
63 | * lowest and highest value in the from Map | |
64 | */ | |
65 | private int fromLowest; | |
66 | ||
67 | private int fromHighest; | |
68 | ||
69 | /* | |
70 | * lowest and highest value in the to Map | |
71 | */ | |
72 | private int toLowest; | |
73 | ||
74 | private int toHighest; | |
75 | ||
76 | /** | |
77 | * Constructor | |
78 | */ | |
79 | 1238 | public MapList() |
80 | { | |
81 | 1238 | fromShifts = new ArrayList<>(); |
82 | 1238 | toShifts = new ArrayList<>(); |
83 | } | |
84 | ||
85 | /** | |
86 | * Two MapList objects are equal if they are the same object, or they both | |
87 | * have populated shift ranges and all values are the same. | |
88 | */ | |
89 | 108 | @Override |
90 | public boolean equals(Object o) | |
91 | { | |
92 | 108 | if (o == null || !(o instanceof MapList)) |
93 | { | |
94 | 2 | return false; |
95 | } | |
96 | ||
97 | 106 | MapList obj = (MapList) o; |
98 | 106 | if (obj == this) |
99 | { | |
100 | 18 | return true; |
101 | } | |
102 | 88 | if (obj.fromRatio != fromRatio || obj.toRatio != toRatio |
103 | || obj.fromShifts == null || obj.toShifts == null) | |
104 | { | |
105 | 25 | return false; |
106 | } | |
107 | 63 | return Arrays.deepEquals(fromShifts.toArray(), obj.fromShifts.toArray()) |
108 | && Arrays.deepEquals(toShifts.toArray(), | |
109 | obj.toShifts.toArray()); | |
110 | } | |
111 | ||
112 | /** | |
113 | * Returns a hashcode made from the fromRatio, toRatio, and from/to ranges | |
114 | */ | |
115 | 39 | @Override |
116 | public int hashCode() | |
117 | { | |
118 | 39 | int hashCode = 31 * fromRatio; |
119 | 39 | hashCode = 31 * hashCode + toRatio; |
120 | 39 | for (int[] shift : fromShifts) |
121 | { | |
122 | 59 | hashCode = 31 * hashCode + shift[0]; |
123 | 59 | hashCode = 31 * hashCode + shift[1]; |
124 | } | |
125 | 39 | for (int[] shift : toShifts) |
126 | { | |
127 | 39 | hashCode = 31 * hashCode + shift[0]; |
128 | 39 | hashCode = 31 * hashCode + shift[1]; |
129 | } | |
130 | ||
131 | 39 | return hashCode; |
132 | } | |
133 | ||
134 | /** | |
135 | * Returns the 'from' ranges as {[start1, end1], [start2, end2], ...} | |
136 | * | |
137 | * @return | |
138 | */ | |
139 | 635 | public List<int[]> getFromRanges() |
140 | { | |
141 | 635 | return fromShifts; |
142 | } | |
143 | ||
144 | /** | |
145 | * Returns the 'to' ranges as {[start1, end1], [start2, end2], ...} | |
146 | * | |
147 | * @return | |
148 | */ | |
149 | 604 | public List<int[]> getToRanges() |
150 | { | |
151 | 604 | return toShifts; |
152 | } | |
153 | ||
154 | /** | |
155 | * Flattens a list of [start, end] into a single [start1, end1, start2, | |
156 | * end2,...] array. | |
157 | * | |
158 | * @param shifts | |
159 | * @return | |
160 | */ | |
161 | 1 | protected static int[] getRanges(List<int[]> shifts) |
162 | { | |
163 | 1 | int[] rnges = new int[2 * shifts.size()]; |
164 | 1 | int i = 0; |
165 | 1 | for (int[] r : shifts) |
166 | { | |
167 | 2 | rnges[i++] = r[0]; |
168 | 2 | rnges[i++] = r[1]; |
169 | } | |
170 | 1 | return rnges; |
171 | } | |
172 | ||
173 | /** | |
174 | * | |
175 | * @return length of mapped phrase in from | |
176 | */ | |
177 | 22114 | public int getFromRatio() |
178 | { | |
179 | 22114 | return fromRatio; |
180 | } | |
181 | ||
182 | /** | |
183 | * | |
184 | * @return length of mapped phrase in to | |
185 | */ | |
186 | 432 | public int getToRatio() |
187 | { | |
188 | 432 | return toRatio; |
189 | } | |
190 | ||
191 | 89 | public int getFromLowest() |
192 | { | |
193 | 89 | return fromLowest; |
194 | } | |
195 | ||
196 | 90 | public int getFromHighest() |
197 | { | |
198 | 90 | return fromHighest; |
199 | } | |
200 | ||
201 | 28 | public int getToLowest() |
202 | { | |
203 | 28 | return toLowest; |
204 | } | |
205 | ||
206 | 3698 | public int getToHighest() |
207 | { | |
208 | 3698 | return toHighest; |
209 | } | |
210 | ||
211 | /** | |
212 | * Constructor given from and to ranges as [start1, end1, start2, end2,...]. | |
213 | * There is no validation check that the ranges do not overlap each other. | |
214 | * | |
215 | * @param from | |
216 | * contiguous regions as [start1, end1, start2, end2, ...] | |
217 | * @param to | |
218 | * same format as 'from' | |
219 | * @param fromRatio | |
220 | * phrase length in 'from' (e.g. 3 for dna) | |
221 | * @param toRatio | |
222 | * phrase length in 'to' (e.g. 1 for protein) | |
223 | */ | |
224 | 1021 | public MapList(int from[], int to[], int fromRatio, int toRatio) |
225 | { | |
226 | 1021 | this(); |
227 | 1021 | this.fromRatio = fromRatio; |
228 | 1021 | this.toRatio = toRatio; |
229 | 1021 | fromLowest = Integer.MAX_VALUE; |
230 | 1021 | fromHighest = Integer.MIN_VALUE; |
231 | ||
232 | 2279 | for (int i = 0; i < from.length; i += 2) |
233 | { | |
234 | /* | |
235 | * note lowest and highest values - bearing in mind the | |
236 | * direction may be reversed | |
237 | */ | |
238 | 1259 | fromLowest = Math.min(fromLowest, Math.min(from[i], from[i + 1])); |
239 | 1259 | fromHighest = Math.max(fromHighest, Math.max(from[i], from[i + 1])); |
240 | 1259 | fromShifts.add(new int[] { from[i], from[i + 1] }); |
241 | } | |
242 | ||
243 | 1020 | toLowest = Integer.MAX_VALUE; |
244 | 1020 | toHighest = Integer.MIN_VALUE; |
245 | 2354 | for (int i = 0; i < to.length; i += 2) |
246 | { | |
247 | 1335 | toLowest = Math.min(toLowest, Math.min(to[i], to[i + 1])); |
248 | 1335 | toHighest = Math.max(toHighest, Math.max(to[i], to[i + 1])); |
249 | 1335 | toShifts.add(new int[] { to[i], to[i + 1] }); |
250 | } | |
251 | } | |
252 | ||
253 | /** | |
254 | * Copy constructor. Creates an identical mapping. | |
255 | * | |
256 | * @param map | |
257 | */ | |
258 | 23 | public MapList(MapList map) |
259 | { | |
260 | 23 | this(); |
261 | // TODO not used - remove? | |
262 | 23 | this.fromLowest = map.fromLowest; |
263 | 23 | this.fromHighest = map.fromHighest; |
264 | 23 | this.toLowest = map.toLowest; |
265 | 23 | this.toHighest = map.toHighest; |
266 | ||
267 | 23 | this.fromRatio = map.fromRatio; |
268 | 23 | this.toRatio = map.toRatio; |
269 | 23 | if (map.fromShifts != null) |
270 | { | |
271 | 23 | for (int[] r : map.fromShifts) |
272 | { | |
273 | 35 | fromShifts.add(new int[] { r[0], r[1] }); |
274 | } | |
275 | } | |
276 | 23 | if (map.toShifts != null) |
277 | { | |
278 | 23 | for (int[] r : map.toShifts) |
279 | { | |
280 | 28 | toShifts.add(new int[] { r[0], r[1] }); |
281 | } | |
282 | } | |
283 | } | |
284 | ||
285 | /** | |
286 | * Constructor given ranges as lists of [start, end] positions. There is no | |
287 | * validation check that the ranges do not overlap each other. | |
288 | * | |
289 | * @param fromRange | |
290 | * @param toRange | |
291 | * @param fromRatio | |
292 | * @param toRatio | |
293 | */ | |
294 | 194 | public MapList(List<int[]> fromRange, List<int[]> toRange, int fromRatio, |
295 | int toRatio) | |
296 | { | |
297 | 194 | this(); |
298 | 194 | fromRange = coalesceRanges(fromRange); |
299 | 194 | toRange = coalesceRanges(toRange); |
300 | 194 | this.fromShifts = fromRange; |
301 | 194 | this.toShifts = toRange; |
302 | 194 | this.fromRatio = fromRatio; |
303 | 194 | this.toRatio = toRatio; |
304 | ||
305 | 194 | fromLowest = Integer.MAX_VALUE; |
306 | 194 | fromHighest = Integer.MIN_VALUE; |
307 | 194 | for (int[] range : fromRange) |
308 | { | |
309 | 239 | if (range.length != 2) |
310 | { | |
311 | // throw new IllegalArgumentException(range); | |
312 | 0 | Console.error("Invalid format for fromRange " |
313 | + Arrays.toString(range) + " may cause errors"); | |
314 | } | |
315 | 239 | fromLowest = Math.min(fromLowest, Math.min(range[0], range[1])); |
316 | 239 | fromHighest = Math.max(fromHighest, Math.max(range[0], range[1])); |
317 | } | |
318 | ||
319 | 194 | toLowest = Integer.MAX_VALUE; |
320 | 194 | toHighest = Integer.MIN_VALUE; |
321 | 194 | for (int[] range : toRange) |
322 | { | |
323 | 346 | if (range.length != 2) |
324 | { | |
325 | // throw new IllegalArgumentException(range); | |
326 | 0 | Console.error("Invalid format for toRange " + Arrays.toString(range) |
327 | + " may cause errors"); | |
328 | } | |
329 | 346 | toLowest = Math.min(toLowest, Math.min(range[0], range[1])); |
330 | 346 | toHighest = Math.max(toHighest, Math.max(range[0], range[1])); |
331 | } | |
332 | } | |
333 | ||
334 | /** | |
335 | * Consolidates a list of ranges so that any contiguous ranges are merged. | |
336 | * This assumes the ranges are already in start order (does not sort them). | |
337 | * <p> | |
338 | * The main use case for this method is when mapping cDNA sequence to its | |
339 | * protein product, based on CDS feature ranges which derive from spliced | |
340 | * exons, but are contiguous on the cDNA sequence. For example | |
341 | * | |
342 | * <pre> | |
343 | * CDS 1-20 // from exon1 | |
344 | * CDS 21-35 // from exon2 | |
345 | * CDS 36-71 // from exon3 | |
346 | * 'coalesce' to range 1-71 | |
347 | * </pre> | |
348 | * | |
349 | * @param ranges | |
350 | * @return the same list (if unchanged), else a new merged list, leaving the | |
351 | * input list unchanged | |
352 | */ | |
353 | 397 | public static List<int[]> coalesceRanges(final List<int[]> ranges) |
354 | { | |
355 | 397 | if (ranges == null || ranges.size() < 2) |
356 | { | |
357 | 327 | return ranges; |
358 | } | |
359 | ||
360 | 70 | boolean changed = false; |
361 | 70 | List<int[]> merged = new ArrayList<>(); |
362 | 70 | int[] lastRange = ranges.get(0); |
363 | 70 | int lastDirection = lastRange[1] >= lastRange[0] ? 1 : -1; |
364 | 70 | lastRange = new int[] { lastRange[0], lastRange[1] }; |
365 | 70 | merged.add(lastRange); |
366 | 70 | boolean first = true; |
367 | ||
368 | 70 | for (final int[] range : ranges) |
369 | { | |
370 | 287 | if (first) |
371 | { | |
372 | 70 | first = false; |
373 | 70 | continue; |
374 | } | |
375 | ||
376 | 217 | int direction = range[1] >= range[0] ? 1 : -1; |
377 | ||
378 | /* | |
379 | * if next range is in the same direction as last and contiguous, | |
380 | * just update the end position of the last range | |
381 | */ | |
382 | 217 | boolean sameDirection = range[1] == range[0] |
383 | || direction == lastDirection; | |
384 | 217 | boolean extending = range[0] == lastRange[1] + lastDirection; |
385 | 217 | if (sameDirection && extending) |
386 | { | |
387 | 10 | lastRange[1] = range[1]; |
388 | 10 | changed = true; |
389 | } | |
390 | else | |
391 | { | |
392 | 207 | lastRange = new int[] { range[0], range[1] }; |
393 | 207 | merged.add(lastRange); |
394 | // careful: merging [5, 5] after [7, 6] should keep negative direction | |
395 | 207 | lastDirection = (range[1] == range[0]) ? lastDirection : direction; |
396 | } | |
397 | } | |
398 | ||
399 | 70 | return changed ? merged : ranges; |
400 | } | |
401 | ||
402 | /** | |
403 | * get all mapped positions from 'from' to 'to' | |
404 | * | |
405 | * @return int[][] { int[] { fromStart, fromFinish, toStart, toFinish }, int | |
406 | * [fromFinish-fromStart+2] { toStart..toFinish mappings}} | |
407 | */ | |
408 | 0 | protected int[][] makeFromMap() |
409 | { | |
410 | // TODO only used for test - remove?? | |
411 | 0 | return posMap(fromShifts, fromRatio, toShifts, toRatio); |
412 | } | |
413 | ||
414 | /** | |
415 | * get all mapped positions from 'to' to 'from' | |
416 | * | |
417 | * @return int[to position]=position mapped in from | |
418 | */ | |
419 | 0 | protected int[][] makeToMap() |
420 | { | |
421 | // TODO only used for test - remove?? | |
422 | 0 | return posMap(toShifts, toRatio, fromShifts, fromRatio); |
423 | } | |
424 | ||
425 | /** | |
426 | * construct an int map for intervals in intVals | |
427 | * | |
428 | * @param shiftTo | |
429 | * @return int[] { from, to pos in range }, int[range.to-range.from+1] | |
430 | * returning mapped position | |
431 | */ | |
432 | 0 | private int[][] posMap(List<int[]> shiftTo, int sourceRatio, |
433 | List<int[]> shiftFrom, int targetRatio) | |
434 | { | |
435 | // TODO only used for test - remove?? | |
436 | 0 | int iv = 0, ivSize = shiftTo.size(); |
437 | 0 | if (iv >= ivSize) |
438 | { | |
439 | 0 | return null; |
440 | } | |
441 | 0 | int[] intv = shiftTo.get(iv++); |
442 | 0 | int from = intv[0], to = intv[1]; |
443 | 0 | if (from > to) |
444 | { | |
445 | 0 | from = intv[1]; |
446 | 0 | to = intv[0]; |
447 | } | |
448 | 0 | while (iv < ivSize) |
449 | { | |
450 | 0 | intv = shiftTo.get(iv++); |
451 | 0 | if (intv[0] < from) |
452 | { | |
453 | 0 | from = intv[0]; |
454 | } | |
455 | 0 | if (intv[1] < from) |
456 | { | |
457 | 0 | from = intv[1]; |
458 | } | |
459 | 0 | if (intv[0] > to) |
460 | { | |
461 | 0 | to = intv[0]; |
462 | } | |
463 | 0 | if (intv[1] > to) |
464 | { | |
465 | 0 | to = intv[1]; |
466 | } | |
467 | } | |
468 | 0 | int tF = 0, tT = 0; |
469 | 0 | int mp[][] = new int[to - from + 2][]; |
470 | 0 | for (int i = 0; i < mp.length; i++) |
471 | { | |
472 | 0 | int[] m = shift(i + from, shiftTo, sourceRatio, shiftFrom, |
473 | targetRatio); | |
474 | 0 | if (m != null) |
475 | { | |
476 | 0 | if (i == 0) |
477 | { | |
478 | 0 | tF = tT = m[0]; |
479 | } | |
480 | else | |
481 | { | |
482 | 0 | if (m[0] < tF) |
483 | { | |
484 | 0 | tF = m[0]; |
485 | } | |
486 | 0 | if (m[0] > tT) |
487 | { | |
488 | 0 | tT = m[0]; |
489 | } | |
490 | } | |
491 | } | |
492 | 0 | mp[i] = m; |
493 | } | |
494 | 0 | int[][] map = new int[][] { new int[] { from, to, tF, tT }, |
495 | new int[to - from + 2] }; | |
496 | ||
497 | 0 | map[0][2] = tF; |
498 | 0 | map[0][3] = tT; |
499 | ||
500 | 0 | for (int i = 0; i < mp.length; i++) |
501 | { | |
502 | 0 | if (mp[i] != null) |
503 | { | |
504 | 0 | map[1][i] = mp[i][0] - tF; |
505 | } | |
506 | else | |
507 | { | |
508 | 0 | map[1][i] = -1; // indicates an out of range mapping |
509 | } | |
510 | } | |
511 | 0 | return map; |
512 | } | |
513 | ||
514 | /** | |
515 | * addShift | |
516 | * | |
517 | * @param pos | |
518 | * start position for shift (in original reference frame) | |
519 | * @param shift | |
520 | * length of shift | |
521 | * | |
522 | * public void addShift(int pos, int shift) { int sidx = 0; int[] | |
523 | * rshift=null; while (sidx<shifts.size() && (rshift=(int[]) | |
524 | * shifts.elementAt(sidx))[0]<pos) sidx++; if (sidx==shifts.size()) | |
525 | * shifts.insertElementAt(new int[] { pos, shift}, sidx); else | |
526 | * rshift[1]+=shift; } | |
527 | */ | |
528 | ||
529 | /** | |
530 | * shift from pos to To(pos) | |
531 | * | |
532 | * @param pos | |
533 | * int | |
534 | * @return int shifted position in To, frameshift in From, direction of mapped | |
535 | * symbol in To | |
536 | */ | |
537 | 9 | public int[] shiftFrom(int pos) |
538 | { | |
539 | 9 | return shift(pos, fromShifts, fromRatio, toShifts, toRatio); |
540 | } | |
541 | ||
542 | /** | |
543 | * inverse of shiftFrom - maps pos in To to a position in From | |
544 | * | |
545 | * @param pos | |
546 | * (in To) | |
547 | * @return shifted position in From, frameshift in To, direction of mapped | |
548 | * symbol in From | |
549 | */ | |
550 | 16338 | public int[] shiftTo(int pos) |
551 | { | |
552 | 16338 | return shift(pos, toShifts, toRatio, fromShifts, fromRatio); |
553 | } | |
554 | ||
555 | /** | |
556 | * | |
557 | * @param shiftTo | |
558 | * @param fromRatio | |
559 | * @param shiftFrom | |
560 | * @param toRatio | |
561 | * @return | |
562 | */ | |
563 | 16347 | protected static int[] shift(int pos, List<int[]> shiftTo, int fromRatio, |
564 | List<int[]> shiftFrom, int toRatio) | |
565 | { | |
566 | // TODO: javadoc; tests | |
567 | 16347 | int[] fromCount = countPositions(shiftTo, pos); |
568 | 16347 | if (fromCount == null) |
569 | { | |
570 | 20 | return null; |
571 | } | |
572 | 16327 | int fromRemainder = (fromCount[0] - 1) % fromRatio; |
573 | 16327 | int toCount = 1 + (((fromCount[0] - 1) / fromRatio) * toRatio); |
574 | 16327 | int[] toPos = traverseToPosition(shiftFrom, toCount); |
575 | 16327 | if (toPos == null) |
576 | { | |
577 | 0 | return null; |
578 | } | |
579 | 16327 | return new int[] { toPos[0], fromRemainder, toPos[1] }; |
580 | } | |
581 | ||
582 | /** | |
583 | * Counts how many positions pos is along the series of intervals. Returns an | |
584 | * array of two values: | |
585 | * <ul> | |
586 | * <li>the number of positions traversed (inclusive) to reach {@code pos}</li> | |
587 | * <li>+1 if the last interval traversed is forward, -1 if in a negative | |
588 | * direction</li> | |
589 | * </ul> | |
590 | * Returns null if {@code pos} does not lie in any of the given intervals. | |
591 | * | |
592 | * @param intervals | |
593 | * a list of start-end intervals | |
594 | * @param pos | |
595 | * a position that may lie in one (or more) of the intervals | |
596 | * @return | |
597 | */ | |
598 | 16367 | protected static int[] countPositions(List<int[]> intervals, int pos) |
599 | { | |
600 | 16367 | int count = 0; |
601 | 16367 | int iv = 0; |
602 | 16367 | int ivSize = intervals.size(); |
603 | ||
604 | 16493 | while (iv < ivSize) |
605 | { | |
606 | 16468 | int[] intv = intervals.get(iv++); |
607 | 16468 | if (intv[0] <= intv[1]) |
608 | { | |
609 | /* | |
610 | * forwards interval | |
611 | */ | |
612 | 16457 | if (pos >= intv[0] && pos <= intv[1]) |
613 | { | |
614 | 16336 | return new int[] { count + pos - intv[0] + 1, +1 }; |
615 | } | |
616 | else | |
617 | { | |
618 | 121 | count += intv[1] - intv[0] + 1; |
619 | } | |
620 | } | |
621 | else | |
622 | { | |
623 | /* | |
624 | * reverse interval | |
625 | */ | |
626 | 11 | if (pos >= intv[1] && pos <= intv[0]) |
627 | { | |
628 | 5 | return new int[] { count + intv[0] - pos + 1, -1 }; |
629 | } | |
630 | else | |
631 | { | |
632 | 6 | count += intv[0] - intv[1] + 1; |
633 | } | |
634 | } | |
635 | } | |
636 | 25 | return null; |
637 | } | |
638 | ||
639 | /** | |
640 | * Reads through the given intervals until {@code count} positions have been | |
641 | * traversed, and returns an array consisting of two values: | |
642 | * <ul> | |
643 | * <li>the value at the {@code count'th} position</li> | |
644 | * <li>+1 if the last interval read is forwards, -1 if reverse direction</li> | |
645 | * </ul> | |
646 | * Returns null if the ranges include less than {@code count} positions, or if | |
647 | * {@code count < 1}. | |
648 | * | |
649 | * @param intervals | |
650 | * a list of [start, end] ranges | |
651 | * @param count | |
652 | * the number of positions to traverse | |
653 | * @return | |
654 | */ | |
655 | 16329 | protected static int[] traverseToPosition(List<int[]> intervals, |
656 | final int count) | |
657 | { | |
658 | 16329 | int traversed = 0; |
659 | 16329 | int ivSize = intervals.size(); |
660 | 16329 | int iv = 0; |
661 | ||
662 | 16329 | if (count < 1) |
663 | { | |
664 | 2 | return null; |
665 | } | |
666 | ||
667 | 16410 | while (iv < ivSize) |
668 | { | |
669 | 16410 | int[] intv = intervals.get(iv++); |
670 | 16410 | int diff = intv[1] - intv[0]; |
671 | 16410 | if (diff >= 0) |
672 | { | |
673 | 16410 | if (count <= traversed + 1 + diff) |
674 | { | |
675 | 16327 | return new int[] { intv[0] + (count - traversed - 1), +1 }; |
676 | } | |
677 | else | |
678 | { | |
679 | 83 | traversed += 1 + diff; |
680 | } | |
681 | } | |
682 | else | |
683 | { | |
684 | 0 | if (count <= traversed + 1 - diff) |
685 | { | |
686 | 0 | return new int[] { intv[0] - (count - traversed - 1), -1 }; |
687 | } | |
688 | else | |
689 | { | |
690 | 0 | traversed += 1 - diff; |
691 | } | |
692 | } | |
693 | } | |
694 | 0 | return null; |
695 | } | |
696 | ||
697 | /** | |
698 | * like shift - except returns the intervals in the given vector of shifts | |
699 | * which were spanned in traversing fromStart to fromEnd | |
700 | * | |
701 | * @param shiftFrom | |
702 | * @param fromStart | |
703 | * @param fromEnd | |
704 | * @param fromRatio2 | |
705 | * @return series of from,to intervals from from first position of starting | |
706 | * region to final position of ending region inclusive | |
707 | */ | |
708 | 0 | protected static int[] getIntervals(List<int[]> shiftFrom, |
709 | int[] fromStart, int[] fromEnd, int fromRatio2) | |
710 | { | |
711 | 0 | if (fromStart == null || fromEnd == null) |
712 | { | |
713 | 0 | return null; |
714 | } | |
715 | 0 | int startpos, endpos; |
716 | 0 | startpos = fromStart[0]; // first position in fromStart |
717 | 0 | endpos = fromEnd[0]; // last position in fromEnd |
718 | 0 | int endindx = (fromRatio2 - 1); // additional positions to get to last |
719 | // position from endpos | |
720 | 0 | int intv = 0, intvSize = shiftFrom.size(); |
721 | 0 | int iv[], i = 0, fs = -1, fe_s = -1, fe = -1; // containing intervals |
722 | // search intervals to locate ones containing startpos and count endindx | |
723 | // positions on from endpos | |
724 | 0 | while (intv < intvSize && (fs == -1 || fe == -1)) |
725 | { | |
726 | 0 | iv = shiftFrom.get(intv++); |
727 | 0 | if (fe_s > -1) |
728 | { | |
729 | 0 | endpos = iv[0]; // start counting from beginning of interval |
730 | 0 | endindx--; // inclusive of endpos |
731 | } | |
732 | 0 | if (iv[0] <= iv[1]) |
733 | { | |
734 | 0 | if (fs == -1 && startpos >= iv[0] && startpos <= iv[1]) |
735 | { | |
736 | 0 | fs = i; |
737 | } | |
738 | 0 | if (endpos >= iv[0] && endpos <= iv[1]) |
739 | { | |
740 | 0 | if (fe_s == -1) |
741 | { | |
742 | 0 | fe_s = i; |
743 | } | |
744 | 0 | if (fe_s != -1) |
745 | { | |
746 | 0 | if (endpos + endindx <= iv[1]) |
747 | { | |
748 | 0 | fe = i; |
749 | 0 | endpos = endpos + endindx; // end of end token is within this |
750 | // interval | |
751 | } | |
752 | else | |
753 | { | |
754 | 0 | endindx -= iv[1] - endpos; // skip all this interval too |
755 | } | |
756 | } | |
757 | } | |
758 | } | |
759 | else | |
760 | { | |
761 | 0 | if (fs == -1 && startpos <= iv[0] && startpos >= iv[1]) |
762 | { | |
763 | 0 | fs = i; |
764 | } | |
765 | 0 | if (endpos <= iv[0] && endpos >= iv[1]) |
766 | { | |
767 | 0 | if (fe_s == -1) |
768 | { | |
769 | 0 | fe_s = i; |
770 | } | |
771 | 0 | if (fe_s != -1) |
772 | { | |
773 | 0 | if (endpos - endindx >= iv[1]) |
774 | { | |
775 | 0 | fe = i; |
776 | 0 | endpos = endpos - endindx; // end of end token is within this |
777 | // interval | |
778 | } | |
779 | else | |
780 | { | |
781 | 0 | endindx -= endpos - iv[1]; // skip all this interval too |
782 | } | |
783 | } | |
784 | } | |
785 | } | |
786 | 0 | i++; |
787 | } | |
788 | 0 | if (fs == fe && fe == -1) |
789 | { | |
790 | 0 | return null; |
791 | } | |
792 | 0 | List<int[]> ranges = new ArrayList<>(); |
793 | 0 | if (fs <= fe) |
794 | { | |
795 | 0 | intv = fs; |
796 | 0 | i = fs; |
797 | // truncate initial interval | |
798 | 0 | iv = shiftFrom.get(intv++); |
799 | 0 | iv = new int[] { iv[0], iv[1] };// clone |
800 | 0 | if (i == fs) |
801 | { | |
802 | 0 | iv[0] = startpos; |
803 | } | |
804 | 0 | while (i != fe) |
805 | { | |
806 | 0 | ranges.add(iv); // add initial range |
807 | 0 | iv = shiftFrom.get(intv++); // get next interval |
808 | 0 | iv = new int[] { iv[0], iv[1] };// clone |
809 | 0 | i++; |
810 | } | |
811 | 0 | if (i == fe) |
812 | { | |
813 | 0 | iv[1] = endpos; |
814 | } | |
815 | 0 | ranges.add(iv); // add only - or final range |
816 | } | |
817 | else | |
818 | { | |
819 | // walk from end of interval. | |
820 | 0 | i = shiftFrom.size() - 1; |
821 | 0 | while (i > fs) |
822 | { | |
823 | 0 | i--; |
824 | } | |
825 | 0 | iv = shiftFrom.get(i); |
826 | 0 | iv = new int[] { iv[1], iv[0] };// reverse and clone |
827 | // truncate initial interval | |
828 | 0 | if (i == fs) |
829 | { | |
830 | 0 | iv[0] = startpos; |
831 | } | |
832 | 0 | while (--i != fe) |
833 | { // fix apparent logic bug when fe==-1 | |
834 | 0 | ranges.add(iv); // add (truncated) reversed interval |
835 | 0 | iv = shiftFrom.get(i); |
836 | 0 | iv = new int[] { iv[1], iv[0] }; // reverse and clone |
837 | } | |
838 | 0 | if (i == fe) |
839 | { | |
840 | // interval is already reversed | |
841 | 0 | iv[1] = endpos; |
842 | } | |
843 | 0 | ranges.add(iv); // add only - or final range |
844 | } | |
845 | // create array of start end intervals. | |
846 | 0 | int[] range = null; |
847 | 0 | if (ranges != null && ranges.size() > 0) |
848 | { | |
849 | 0 | range = new int[ranges.size() * 2]; |
850 | 0 | intv = 0; |
851 | 0 | intvSize = ranges.size(); |
852 | 0 | i = 0; |
853 | 0 | while (intv < intvSize) |
854 | { | |
855 | 0 | iv = ranges.get(intv); |
856 | 0 | range[i++] = iv[0]; |
857 | 0 | range[i++] = iv[1]; |
858 | 0 | ranges.set(intv++, null); // remove |
859 | } | |
860 | } | |
861 | 0 | return range; |
862 | } | |
863 | ||
864 | /** | |
865 | * get the 'initial' position of mpos in To | |
866 | * | |
867 | * @param mpos | |
868 | * position in from | |
869 | * @return position of first word in to reference frame | |
870 | */ | |
871 | 3 | public int getToPosition(int mpos) |
872 | { | |
873 | 3 | int[] mp = shiftTo(mpos); |
874 | 3 | if (mp != null) |
875 | { | |
876 | 3 | return mp[0]; |
877 | } | |
878 | 0 | return mpos; |
879 | } | |
880 | ||
881 | /** | |
882 | * | |
883 | * @return a MapList whose From range is this maplist's To Range, and vice | |
884 | * versa | |
885 | */ | |
886 | 68 | public MapList getInverse() |
887 | { | |
888 | 68 | return new MapList(getToRanges(), getFromRanges(), getToRatio(), |
889 | getFromRatio()); | |
890 | } | |
891 | ||
892 | /** | |
893 | * String representation - for debugging, not guaranteed not to change | |
894 | */ | |
895 | 12 | @Override |
896 | public String toString() | |
897 | { | |
898 | 12 | StringBuilder sb = new StringBuilder(64); |
899 | 12 | sb.append("["); |
900 | 12 | for (int[] shift : fromShifts) |
901 | { | |
902 | 26 | sb.append(" ").append(Arrays.toString(shift)); |
903 | } | |
904 | 12 | sb.append(" ] "); |
905 | 12 | sb.append(fromRatio).append(":").append(toRatio); |
906 | 12 | sb.append(" to ["); |
907 | 12 | for (int[] shift : toShifts) |
908 | { | |
909 | 17 | sb.append(" ").append(Arrays.toString(shift)); |
910 | } | |
911 | 12 | sb.append(" ]"); |
912 | 12 | return sb.toString(); |
913 | } | |
914 | ||
915 | /** | |
916 | * Extend this map list by adding the given map's ranges. There is no | |
917 | * validation check that the ranges do not overlap existing ranges (or each | |
918 | * other), but contiguous ranges are merged. | |
919 | * | |
920 | * @param map | |
921 | */ | |
922 | 12 | public void addMapList(MapList map) |
923 | { | |
924 | 12 | if (this.equals(map)) |
925 | { | |
926 | 3 | return; |
927 | } | |
928 | 9 | this.fromLowest = Math.min(fromLowest, map.fromLowest); |
929 | 9 | this.toLowest = Math.min(toLowest, map.toLowest); |
930 | 9 | this.fromHighest = Math.max(fromHighest, map.fromHighest); |
931 | 9 | this.toHighest = Math.max(toHighest, map.toHighest); |
932 | ||
933 | 9 | for (int[] range : map.getFromRanges()) |
934 | { | |
935 | 10 | addRange(range, fromShifts); |
936 | } | |
937 | 9 | for (int[] range : map.getToRanges()) |
938 | { | |
939 | 11 | addRange(range, toShifts); |
940 | } | |
941 | } | |
942 | ||
943 | /** | |
944 | * Adds the given range to a list of ranges. If the new range just extends | |
945 | * existing ranges, the current endpoint is updated instead. | |
946 | * | |
947 | * @param range | |
948 | * @param addTo | |
949 | */ | |
950 | 29 | static void addRange(int[] range, List<int[]> addTo) |
951 | { | |
952 | /* | |
953 | * list is empty - add to it! | |
954 | */ | |
955 | 29 | if (addTo.size() == 0) |
956 | { | |
957 | 1 | addTo.add(range); |
958 | 1 | return; |
959 | } | |
960 | ||
961 | 28 | int[] last = addTo.get(addTo.size() - 1); |
962 | 28 | boolean lastForward = last[1] >= last[0]; |
963 | 28 | boolean newForward = range[1] >= range[0]; |
964 | ||
965 | /* | |
966 | * contiguous range in the same direction - just update endpoint | |
967 | */ | |
968 | 28 | if (lastForward == newForward && last[1] == range[0]) |
969 | { | |
970 | 4 | last[1] = range[1]; |
971 | 4 | return; |
972 | } | |
973 | ||
974 | /* | |
975 | * next range starts at +1 in forward sense - update endpoint | |
976 | */ | |
977 | 24 | if (lastForward && newForward && range[0] == last[1] + 1) |
978 | { | |
979 | 3 | last[1] = range[1]; |
980 | 3 | return; |
981 | } | |
982 | ||
983 | /* | |
984 | * next range starts at -1 in reverse sense - update endpoint | |
985 | */ | |
986 | 21 | if (!lastForward && !newForward && range[0] == last[1] - 1) |
987 | { | |
988 | 4 | last[1] = range[1]; |
989 | 4 | return; |
990 | } | |
991 | ||
992 | /* | |
993 | * just add the new range | |
994 | */ | |
995 | 17 | addTo.add(range); |
996 | } | |
997 | ||
998 | /** | |
999 | * Returns true if mapping is from forward strand, false if from reverse | |
1000 | * strand. Result is just based on the first 'from' range that is not a single | |
1001 | * position. Default is true unless proven to be false. Behaviour is not well | |
1002 | * defined if the mapping has a mixture of forward and reverse ranges. | |
1003 | * | |
1004 | * @return | |
1005 | */ | |
1006 | 3 | public boolean isFromForwardStrand() |
1007 | { | |
1008 | 3 | return isForwardStrand(getFromRanges()); |
1009 | } | |
1010 | ||
1011 | /** | |
1012 | * Returns true if mapping is to forward strand, false if to reverse strand. | |
1013 | * Result is just based on the first 'to' range that is not a single position. | |
1014 | * Default is true unless proven to be false. Behaviour is not well defined if | |
1015 | * the mapping has a mixture of forward and reverse ranges. | |
1016 | * | |
1017 | * @return | |
1018 | */ | |
1019 | 27 | public boolean isToForwardStrand() |
1020 | { | |
1021 | 27 | return isForwardStrand(getToRanges()); |
1022 | } | |
1023 | ||
1024 | /** | |
1025 | * A helper method that returns true unless at least one range has start > | |
1026 | * end. Behaviour is undefined for a mixture of forward and reverse ranges. | |
1027 | * | |
1028 | * @param ranges | |
1029 | * @return | |
1030 | */ | |
1031 | 30 | private boolean isForwardStrand(List<int[]> ranges) |
1032 | { | |
1033 | 30 | boolean forwardStrand = true; |
1034 | 30 | for (int[] range : ranges) |
1035 | { | |
1036 | 38 | if (range[1] > range[0]) |
1037 | { | |
1038 | 20 | break; // forward strand confirmed |
1039 | } | |
1040 | 18 | else if (range[1] < range[0]) |
1041 | { | |
1042 | 8 | forwardStrand = false; |
1043 | 8 | break; // reverse strand confirmed |
1044 | } | |
1045 | } | |
1046 | 30 | return forwardStrand; |
1047 | } | |
1048 | ||
1049 | /** | |
1050 | * | |
1051 | * @return true if from, or to is a three to 1 mapping | |
1052 | */ | |
1053 | 94 | public boolean isTripletMap() |
1054 | { | |
1055 | 94 | return (toRatio == 3 && fromRatio == 1) |
1056 | || (fromRatio == 3 && toRatio == 1); | |
1057 | } | |
1058 | ||
1059 | /** | |
1060 | * Returns a map which is the composite of this one and the input map. That | |
1061 | * is, the output map has the fromRanges of this map, and its toRanges are the | |
1062 | * toRanges of this map as transformed by the input map. | |
1063 | * <p> | |
1064 | * Returns null if the mappings cannot be traversed (not all toRanges of this | |
1065 | * map correspond to fromRanges of the input), or if this.toRatio does not | |
1066 | * match map.fromRatio. | |
1067 | * | |
1068 | * <pre> | |
1069 | * Example 1: | |
1070 | * this: from [1-100] to [501-600] | |
1071 | * input: from [10-40] to [60-90] | |
1072 | * output: from [10-40] to [560-590] | |
1073 | * Example 2 ('reverse strand exons'): | |
1074 | * this: from [1-100] to [2000-1951], [1000-951] // transcript to loci | |
1075 | * input: from [1-50] to [41-90] // CDS to transcript | |
1076 | * output: from [10-40] to [1960-1951], [1000-971] // CDS to gene loci | |
1077 | * </pre> | |
1078 | * | |
1079 | * @param map | |
1080 | * @return | |
1081 | */ | |
1082 | 19 | public MapList traverse(MapList map) |
1083 | { | |
1084 | 19 | if (map == null) |
1085 | { | |
1086 | 0 | return null; |
1087 | } | |
1088 | ||
1089 | /* | |
1090 | * compound the ratios by this rule: | |
1091 | * A:B with M:N gives A*M:B*N | |
1092 | * reduced by greatest common divisor | |
1093 | * so 1:3 with 3:3 is 3:9 or 1:3 | |
1094 | * 1:3 with 3:1 is 3:3 or 1:1 | |
1095 | * 1:3 with 1:3 is 1:9 | |
1096 | * 2:5 with 3:7 is 6:35 | |
1097 | */ | |
1098 | 19 | int outFromRatio = getFromRatio() * map.getFromRatio(); |
1099 | 19 | int outToRatio = getToRatio() * map.getToRatio(); |
1100 | 19 | int gcd = MathUtils.gcd(outFromRatio, outToRatio); |
1101 | 19 | outFromRatio /= gcd; |
1102 | 19 | outToRatio /= gcd; |
1103 | ||
1104 | 19 | List<int[]> toRanges = new ArrayList<>(); |
1105 | 19 | for (int[] range : getToRanges()) |
1106 | { | |
1107 | 20 | int fromLength = Math.abs(range[1] - range[0]) + 1; |
1108 | 20 | int[] transferred = map.locateInTo(range[0], range[1]); |
1109 | 20 | if (transferred == null || transferred.length % 2 != 0) |
1110 | { | |
1111 | 0 | return null; |
1112 | } | |
1113 | ||
1114 | /* | |
1115 | * convert [start1, end1, start2, end2, ...] | |
1116 | * to [[start1, end1], [start2, end2], ...] | |
1117 | */ | |
1118 | 20 | int toLength = 0; |
1119 | 169 | for (int i = 0; i < transferred.length;) |
1120 | { | |
1121 | 149 | toRanges.add(new int[] { transferred[i], transferred[i + 1] }); |
1122 | 149 | toLength += Math.abs(transferred[i + 1] - transferred[i]) + 1; |
1123 | 149 | i += 2; |
1124 | } | |
1125 | ||
1126 | /* | |
1127 | * check we mapped the full range - if not, abort | |
1128 | */ | |
1129 | 20 | if (fromLength * map.getToRatio() != toLength * map.getFromRatio()) |
1130 | { | |
1131 | 1 | return null; |
1132 | } | |
1133 | } | |
1134 | ||
1135 | 18 | return new MapList(getFromRanges(), toRanges, outFromRatio, outToRatio); |
1136 | } | |
1137 | ||
1138 | /** | |
1139 | * Answers true if the mapping is from one contiguous range to another, else | |
1140 | * false | |
1141 | * | |
1142 | * @return | |
1143 | */ | |
1144 | 0 | public boolean isContiguous() |
1145 | { | |
1146 | 0 | return fromShifts.size() == 1 && toShifts.size() == 1; |
1147 | } | |
1148 | ||
1149 | /** | |
1150 | * <<<<<<< HEAD Returns the [start1, end1, start2, end2, ...] positions in the | |
1151 | * 'from' range that map to positions between {@code start} and {@code end} in | |
1152 | * the 'to' range. Note that for a reverse strand mapping this will return | |
1153 | * ranges with end < start. Returns null if no mapped positions are found in | |
1154 | * start-end. | |
1155 | * | |
1156 | * @param start | |
1157 | * @param end | |
1158 | * @return | |
1159 | */ | |
1160 | 167104 | public int[] locateInFrom(int start, int end) |
1161 | { | |
1162 | 167104 | return mapPositions(start, end, toShifts, fromShifts, toRatio, |
1163 | fromRatio); | |
1164 | } | |
1165 | ||
1166 | /** | |
1167 | * Returns the [start1, end1, start2, end2, ...] positions in the 'to' range | |
1168 | * that map to positions between {@code start} and {@code end} in the 'from' | |
1169 | * range. Note that for a reverse strand mapping this will return ranges with | |
1170 | * end < start. Returns null if no mapped positions are found in start-end. | |
1171 | * | |
1172 | * @param start | |
1173 | * @param end | |
1174 | * @return | |
1175 | */ | |
1176 | 26038 | public int[] locateInTo(int start, int end) |
1177 | { | |
1178 | 26038 | return mapPositions(start, end, fromShifts, toShifts, fromRatio, |
1179 | toRatio); | |
1180 | } | |
1181 | ||
1182 | /** | |
1183 | * Helper method that returns the [start1, end1, start2, end2, ...] positions | |
1184 | * in {@code targetRange} that map to positions between {@code start} and | |
1185 | * {@code end} in {@code sourceRange}. Note that for a reverse strand mapping | |
1186 | * this will return ranges with end < start. Returns null if no mapped | |
1187 | * positions are found in start-end. | |
1188 | * | |
1189 | * @param start | |
1190 | * @param end | |
1191 | * @param sourceRange | |
1192 | * @param targetRange | |
1193 | * @param sourceWordLength | |
1194 | * @param targetWordLength | |
1195 | * @return | |
1196 | */ | |
1197 | 193136 | final static int[] mapPositions(int start, int end, |
1198 | List<int[]> sourceRange, List<int[]> targetRange, | |
1199 | int sourceWordLength, int targetWordLength) | |
1200 | { | |
1201 | 193135 | if (end < start) |
1202 | { | |
1203 | 2 | int tmp = end; |
1204 | 2 | end = start; |
1205 | 2 | start = tmp; |
1206 | } | |
1207 | ||
1208 | /* | |
1209 | * traverse sourceRange and mark offsets in targetRange | |
1210 | * of any positions that lie in [start, end] | |
1211 | */ | |
1212 | 193136 | BitSet offsets = getMappedOffsetsForPositions(start, end, sourceRange, |
1213 | sourceWordLength, targetWordLength); | |
1214 | ||
1215 | /* | |
1216 | * traverse targetRange and collect positions at the marked offsets | |
1217 | */ | |
1218 | 193143 | List<int[]> mapped = getPositionsForOffsets(targetRange, offsets); |
1219 | ||
1220 | // TODO: or just return the List and adjust calling code to match | |
1221 | 193133 | return mapped.isEmpty() ? null : MappingUtils.rangeListToArray(mapped); |
1222 | } | |
1223 | ||
1224 | /** | |
1225 | * Scans the list of {@code ranges} for any values (positions) that lie | |
1226 | * between start and end (inclusive), and records the <em>offsets</em> from | |
1227 | * the start of the list as a BitSet. The offset positions are converted to | |
1228 | * corresponding words in blocks of {@code wordLength2}. | |
1229 | * | |
1230 | * <pre> | |
1231 | * For example: | |
1232 | * 1:1 (e.g. gene to CDS): | |
1233 | * ranges { [10-20], [31-40] }, wordLengthFrom = wordLength 2 = 1 | |
1234 | * for start = 1, end = 9, returns a BitSet with no bits set | |
1235 | * for start = 1, end = 11, returns a BitSet with bits 0-1 set | |
1236 | * for start = 15, end = 35, returns a BitSet with bits 5-15 set | |
1237 | * 1:3 (peptide to codon): | |
1238 | * ranges { [1-200] }, wordLengthFrom = 1, wordLength 2 = 3 | |
1239 | * for start = 9, end = 9, returns a BitSet with bits 24-26 set | |
1240 | * 3:1 (codon to peptide): | |
1241 | * ranges { [101-150], [171-180] }, wordLengthFrom = 3, wordLength 2 = 1 | |
1242 | * for start = 101, end = 102 (partial first codon), returns a BitSet with bit 0 set | |
1243 | * for start = 150, end = 171 (partial 17th codon), returns a BitSet with bit 16 set | |
1244 | * 3:1 (circular DNA to peptide): | |
1245 | * ranges { [101-150], [21-30] }, wordLengthFrom = 3, wordLength 2 = 1 | |
1246 | * for start = 24, end = 40 (spans codons 18-20), returns a BitSet with bits 17-19 set | |
1247 | * </pre> | |
1248 | * | |
1249 | * @param start | |
1250 | * @param end | |
1251 | * @param sourceRange | |
1252 | * @param sourceWordLength | |
1253 | * @param targetWordLength | |
1254 | * @return | |
1255 | */ | |
1256 | 193149 | protected final static BitSet getMappedOffsetsForPositions(int start, |
1257 | int end, List<int[]> sourceRange, int sourceWordLength, | |
1258 | int targetWordLength) | |
1259 | { | |
1260 | 193149 | BitSet overlaps = new BitSet(); |
1261 | 193146 | int offset = 0; |
1262 | 193145 | final int s1 = sourceRange.size(); |
1263 | 386587 | for (int i = 0; i < s1; i++) |
1264 | { | |
1265 | 193431 | int[] range = sourceRange.get(i); |
1266 | 193431 | final int offset1 = offset; |
1267 | 193431 | int overlapStartOffset = -1; |
1268 | 193430 | int overlapEndOffset = -1; |
1269 | ||
1270 | 193431 | if (range[1] >= range[0]) |
1271 | { | |
1272 | /* | |
1273 | * forward direction range | |
1274 | */ | |
1275 | 193317 | if (start <= range[1] && end >= range[0]) |
1276 | { | |
1277 | /* | |
1278 | * overlap | |
1279 | */ | |
1280 | 179655 | int overlapStart = Math.max(start, range[0]); |
1281 | 179655 | overlapStartOffset = offset1 + overlapStart - range[0]; |
1282 | 179655 | int overlapEnd = Math.min(end, range[1]); |
1283 | 179655 | overlapEndOffset = offset1 + overlapEnd - range[0]; |
1284 | } | |
1285 | } | |
1286 | else | |
1287 | { | |
1288 | /* | |
1289 | * reverse direction range | |
1290 | */ | |
1291 | 114 | if (start <= range[0] && end >= range[1]) |
1292 | { | |
1293 | /* | |
1294 | * overlap | |
1295 | */ | |
1296 | 29 | int overlapStart = Math.max(start, range[1]); |
1297 | 29 | int overlapEnd = Math.min(end, range[0]); |
1298 | 29 | overlapStartOffset = offset1 + range[0] - overlapEnd; |
1299 | 29 | overlapEndOffset = offset1 + range[0] - overlapStart; |
1300 | } | |
1301 | } | |
1302 | ||
1303 | 193439 | if (overlapStartOffset > -1) |
1304 | { | |
1305 | /* | |
1306 | * found an overlap | |
1307 | */ | |
1308 | 179690 | if (sourceWordLength != targetWordLength) |
1309 | { | |
1310 | /* | |
1311 | * convert any overlap found to whole words in the target range | |
1312 | * (e.g. treat any partial codon overlap as if the whole codon) | |
1313 | */ | |
1314 | 21658 | overlapStartOffset -= overlapStartOffset % sourceWordLength; |
1315 | 21658 | overlapStartOffset = overlapStartOffset / sourceWordLength |
1316 | * targetWordLength; | |
1317 | ||
1318 | /* | |
1319 | * similar calculation for range end, adding | |
1320 | * (wordLength2 - 1) for end of mapped word | |
1321 | */ | |
1322 | 21658 | overlapEndOffset -= overlapEndOffset % sourceWordLength; |
1323 | 21658 | overlapEndOffset = overlapEndOffset / sourceWordLength |
1324 | * targetWordLength; | |
1325 | 21658 | overlapEndOffset += targetWordLength - 1; |
1326 | } | |
1327 | 179690 | overlaps.set(overlapStartOffset, overlapEndOffset + 1); |
1328 | } | |
1329 | 193440 | offset += 1 + Math.abs(range[1] - range[0]); |
1330 | } | |
1331 | 193156 | return overlaps; |
1332 | } | |
1333 | ||
1334 | /** | |
1335 | * Returns a (possibly empty) list of the [start-end] values (positions) at | |
1336 | * offsets in the {@code targetRange} list that are marked by 'on' bits in the | |
1337 | * {@code offsets} bitset. | |
1338 | * | |
1339 | * @param targetRange | |
1340 | * @param offsets | |
1341 | * @return | |
1342 | */ | |
1343 | 193151 | protected final static List<int[]> getPositionsForOffsets( |
1344 | List<int[]> targetRange, BitSet offsets) | |
1345 | { | |
1346 | 193151 | List<int[]> mapped = new ArrayList<>(); |
1347 | 193149 | if (offsets.isEmpty()) |
1348 | { | |
1349 | 13501 | return mapped; |
1350 | } | |
1351 | ||
1352 | /* | |
1353 | * count of positions preceding ranges[i] | |
1354 | */ | |
1355 | 179648 | int traversed = 0; |
1356 | ||
1357 | /* | |
1358 | * for each [from-to] range in ranges: | |
1359 | * - find subranges (if any) at marked offsets | |
1360 | * - add the start-end values at the marked positions | |
1361 | */ | |
1362 | 179648 | final int toAdd = offsets.cardinality(); |
1363 | 179648 | int added = 0; |
1364 | 179648 | final int s2 = targetRange.size(); |
1365 | 359543 | for (int i = 0; added < toAdd && i < s2; i++) |
1366 | { | |
1367 | 179903 | int[] range = targetRange.get(i); |
1368 | 179904 | added += addOffsetPositions(mapped, traversed, range, offsets); |
1369 | 179895 | traversed += Math.abs(range[1] - range[0]) + 1; |
1370 | } | |
1371 | 179640 | return mapped; |
1372 | } | |
1373 | ||
1374 | /** | |
1375 | * Helper method that adds any start-end subranges of {@code range} that are | |
1376 | * at offsets in {@code range} marked by set bits in overlaps. | |
1377 | * {@code mapOffset} is added to {@code range} offset positions. Returns the | |
1378 | * count of positions added. | |
1379 | * | |
1380 | * @param mapped | |
1381 | * @param mapOffset | |
1382 | * @param range | |
1383 | * @param overlaps | |
1384 | * @return | |
1385 | */ | |
1386 | 179909 | final static int addOffsetPositions(List<int[]> mapped, |
1387 | final int mapOffset, final int[] range, final BitSet overlaps) | |
1388 | { | |
1389 | 179909 | final int rangeLength = 1 + Math.abs(range[1] - range[0]); |
1390 | 179906 | final int step = range[1] < range[0] ? -1 : 1; |
1391 | 179902 | int offsetStart = 0; // offset into range |
1392 | 179901 | int added = 0; |
1393 | ||
1394 | 359727 | while (offsetStart < rangeLength) |
1395 | { | |
1396 | /* | |
1397 | * find the start of the next marked overlap offset; | |
1398 | * if there is none, or it is beyond range, then finished | |
1399 | */ | |
1400 | 351923 | int overlapStart = overlaps.nextSetBit(mapOffset + offsetStart); |
1401 | 351946 | if (overlapStart == -1 || overlapStart - mapOffset >= rangeLength) |
1402 | { | |
1403 | /* | |
1404 | * no more overlaps, or no more within range[] | |
1405 | */ | |
1406 | 172107 | return added; |
1407 | } | |
1408 | 179839 | overlapStart -= mapOffset; |
1409 | ||
1410 | /* | |
1411 | * end of the overlap range is just before the next clear bit; | |
1412 | * restrict it to end of range if necessary; | |
1413 | * note we may add a reverse strand range here (end < start) | |
1414 | */ | |
1415 | 179839 | int overlapEnd = overlaps.nextClearBit(mapOffset + overlapStart + 1); |
1416 | 179840 | overlapEnd = (overlapEnd == -1) ? rangeLength - 1 |
1417 | : Math.min(rangeLength - 1, overlapEnd - mapOffset - 1); | |
1418 | 179840 | int startPosition = range[0] + step * overlapStart; |
1419 | 179841 | int endPosition = range[0] + step * overlapEnd; |
1420 | 179840 | mapped.add(new int[] { startPosition, endPosition }); |
1421 | 179849 | offsetStart = overlapEnd + 1; |
1422 | 179849 | added += Math.abs(endPosition - startPosition) + 1; |
1423 | } | |
1424 | ||
1425 | 7801 | return added; |
1426 | } | |
1427 | ||
1428 | /* | |
1429 | * Returns the [start, end...] positions in the range mapped from, that are | |
1430 | * mapped to by part or all of the given begin-end of the range mapped to. | |
1431 | * Returns null if begin-end does not overlap any position mapped to. | |
1432 | * | |
1433 | * @param begin | |
1434 | * @param end | |
1435 | * @return | |
1436 | */ | |
1437 | 7 | public int[] getOverlapsInFrom(final int begin, final int end) |
1438 | { | |
1439 | 7 | int[] overlaps = MappingUtils.findOverlap(toShifts, begin, end); |
1440 | ||
1441 | 7 | return overlaps == null ? null : locateInFrom(overlaps[0], overlaps[1]); |
1442 | } | |
1443 | ||
1444 | /** | |
1445 | * Returns the [start, end...] positions in the range mapped to, that are | |
1446 | * mapped to by part or all of the given begin-end of the range mapped from. | |
1447 | * Returns null if begin-end does not overlap any position mapped from. | |
1448 | * | |
1449 | * @param begin | |
1450 | * @param end | |
1451 | * @return | |
1452 | */ | |
1453 | 15 | public int[] getOverlapsInTo(final int begin, final int end) |
1454 | { | |
1455 | 15 | int[] overlaps = MappingUtils.findOverlap(fromShifts, begin, end); |
1456 | ||
1457 | 15 | return overlaps == null ? null : locateInTo(overlaps[0], overlaps[1]); |
1458 | } | |
1459 | } |