Class |
Line # |
Actions |
|||
---|---|---|---|---|---|
MappingUtils | 58 | 290 | 120 |
1 | /* | |
2 | * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) | |
3 | * Copyright (C) $$Year-Rel$$ The Jalview Authors | |
4 | * | |
5 | * This file is part of Jalview. | |
6 | * | |
7 | * Jalview is free software: you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation, either version 3 | |
10 | * of the License, or (at your option) any later version. | |
11 | * | |
12 | * Jalview is distributed in the hope that it will be useful, but | |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty | |
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR | |
15 | * PURPOSE. See the GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with Jalview. If not, see <http://www.gnu.org/licenses/>. | |
19 | * The Jalview Authors are detailed in the 'AUTHORS' file. | |
20 | */ | |
21 | package jalview.util; | |
22 | ||
23 | import java.util.ArrayList; | |
24 | import java.util.Arrays; | |
25 | import java.util.HashMap; | |
26 | import java.util.Iterator; | |
27 | import java.util.List; | |
28 | import java.util.Map; | |
29 | ||
30 | import jalview.analysis.AlignmentSorter; | |
31 | import jalview.api.AlignViewportI; | |
32 | import jalview.bin.Console; | |
33 | import jalview.commands.CommandI; | |
34 | import jalview.commands.EditCommand; | |
35 | import jalview.commands.EditCommand.Action; | |
36 | import jalview.commands.EditCommand.Edit; | |
37 | import jalview.commands.OrderCommand; | |
38 | import jalview.datamodel.AlignedCodonFrame; | |
39 | import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping; | |
40 | import jalview.datamodel.AlignmentI; | |
41 | import jalview.datamodel.AlignmentOrder; | |
42 | import jalview.datamodel.ColumnSelection; | |
43 | import jalview.datamodel.HiddenColumns; | |
44 | import jalview.datamodel.Mapping; | |
45 | import jalview.datamodel.SearchResultMatchI; | |
46 | import jalview.datamodel.SearchResults; | |
47 | import jalview.datamodel.SearchResultsI; | |
48 | import jalview.datamodel.Sequence; | |
49 | import jalview.datamodel.SequenceGroup; | |
50 | import jalview.datamodel.SequenceI; | |
51 | ||
52 | /** | |
53 | * Helper methods for manipulations involving sequence mappings. | |
54 | * | |
55 | * @author gmcarstairs | |
56 | * | |
57 | */ | |
58 | public final class MappingUtils | |
59 | { | |
60 | ||
61 | /** | |
62 | * Helper method to map a CUT or PASTE command. | |
63 | * | |
64 | * @param edit | |
65 | * the original command | |
66 | * @param undo | |
67 | * if true, the command is to be undone | |
68 | * @param targetSeqs | |
69 | * the mapped sequences to apply the mapped command to | |
70 | * @param result | |
71 | * the mapped EditCommand to add to | |
72 | * @param mappings | |
73 | */ | |
74 | 0 | protected static void mapCutOrPaste(Edit edit, boolean undo, |
75 | List<SequenceI> targetSeqs, EditCommand result, | |
76 | List<AlignedCodonFrame> mappings) | |
77 | { | |
78 | 0 | Action action = edit.getAction(); |
79 | 0 | if (undo) |
80 | { | |
81 | 0 | action = action.getUndoAction(); |
82 | } | |
83 | // TODO write this | |
84 | 0 | Console.error("MappingUtils.mapCutOrPaste not yet implemented"); |
85 | } | |
86 | ||
87 | /** | |
88 | * Returns a new EditCommand representing the given command as mapped to the | |
89 | * given sequences. If there is no mapping, returns null. | |
90 | * | |
91 | * @param command | |
92 | * @param undo | |
93 | * @param mapTo | |
94 | * @param gapChar | |
95 | * @param mappings | |
96 | * @return | |
97 | */ | |
98 | 1 | public static EditCommand mapEditCommand(EditCommand command, |
99 | boolean undo, final AlignmentI mapTo, char gapChar, | |
100 | List<AlignedCodonFrame> mappings) | |
101 | { | |
102 | /* | |
103 | * For now, only support mapping from protein edits to cDna | |
104 | */ | |
105 | 1 | if (!mapTo.isNucleotide()) |
106 | { | |
107 | 0 | return null; |
108 | } | |
109 | ||
110 | /* | |
111 | * Cache a copy of the target sequences so we can mimic successive edits on | |
112 | * them. This lets us compute mappings for all edits in the set. | |
113 | */ | |
114 | 1 | Map<SequenceI, SequenceI> targetCopies = new HashMap<>(); |
115 | 1 | for (SequenceI seq : mapTo.getSequences()) |
116 | { | |
117 | 1 | SequenceI ds = seq.getDatasetSequence(); |
118 | 1 | if (ds != null) |
119 | { | |
120 | 1 | final SequenceI copy = new Sequence(seq); |
121 | 1 | copy.setDatasetSequence(ds); |
122 | 1 | targetCopies.put(ds, copy); |
123 | } | |
124 | } | |
125 | ||
126 | /* | |
127 | * Compute 'source' sequences as they were before applying edits: | |
128 | */ | |
129 | 1 | Map<SequenceI, SequenceI> originalSequences = command.priorState(undo); |
130 | ||
131 | 1 | EditCommand result = new EditCommand(); |
132 | 1 | Iterator<Edit> edits = command.getEditIterator(!undo); |
133 | 2 | while (edits.hasNext()) |
134 | { | |
135 | 1 | Edit edit = edits.next(); |
136 | 1 | if (edit.getAction() == Action.CUT |
137 | || edit.getAction() == Action.PASTE) | |
138 | { | |
139 | 0 | mapCutOrPaste(edit, undo, mapTo.getSequences(), result, mappings); |
140 | } | |
141 | 1 | else if (edit.getAction() == Action.INSERT_GAP |
142 | || edit.getAction() == Action.DELETE_GAP) | |
143 | { | |
144 | 1 | mapInsertOrDelete(edit, undo, originalSequences, |
145 | mapTo.getSequences(), targetCopies, gapChar, result, | |
146 | mappings); | |
147 | } | |
148 | } | |
149 | 1 | return result.getSize() > 0 ? result : null; |
150 | } | |
151 | ||
152 | /** | |
153 | * Helper method to map an edit command to insert or delete gaps. | |
154 | * | |
155 | * @param edit | |
156 | * the original command | |
157 | * @param undo | |
158 | * if true, the action is to undo the command | |
159 | * @param originalSequences | |
160 | * the sequences the command acted on | |
161 | * @param targetSeqs | |
162 | * @param targetCopies | |
163 | * @param gapChar | |
164 | * @param result | |
165 | * the new EditCommand to add mapped commands to | |
166 | * @param mappings | |
167 | */ | |
168 | 1 | protected static void mapInsertOrDelete(Edit edit, boolean undo, |
169 | Map<SequenceI, SequenceI> originalSequences, | |
170 | final List<SequenceI> targetSeqs, | |
171 | Map<SequenceI, SequenceI> targetCopies, char gapChar, | |
172 | EditCommand result, List<AlignedCodonFrame> mappings) | |
173 | { | |
174 | 1 | Action action = edit.getAction(); |
175 | ||
176 | /* | |
177 | * Invert sense of action if an Undo. | |
178 | */ | |
179 | 1 | if (undo) |
180 | { | |
181 | 0 | action = action.getUndoAction(); |
182 | } | |
183 | 1 | final int count = edit.getNumber(); |
184 | 1 | final int editPos = edit.getPosition(); |
185 | 1 | for (SequenceI seq : edit.getSequences()) |
186 | { | |
187 | /* | |
188 | * Get residue position at (or to right of) edit location. Note we use our | |
189 | * 'copy' of the sequence before editing for this. | |
190 | */ | |
191 | 1 | SequenceI ds = seq.getDatasetSequence(); |
192 | 1 | if (ds == null) |
193 | { | |
194 | 0 | continue; |
195 | } | |
196 | 1 | final SequenceI actedOn = originalSequences.get(ds); |
197 | 1 | final int seqpos = actedOn.findPosition(editPos); |
198 | ||
199 | /* | |
200 | * Determine all mappings from this position to mapped sequences. | |
201 | */ | |
202 | 1 | SearchResultsI sr = buildSearchResults(seq, seqpos, mappings); |
203 | ||
204 | 1 | if (!sr.isEmpty()) |
205 | { | |
206 | 1 | for (SequenceI targetSeq : targetSeqs) |
207 | { | |
208 | 1 | ds = targetSeq.getDatasetSequence(); |
209 | 1 | if (ds == null) |
210 | { | |
211 | 0 | continue; |
212 | } | |
213 | 1 | SequenceI copyTarget = targetCopies.get(ds); |
214 | 1 | final int[] match = sr.getResults(copyTarget, 0, |
215 | copyTarget.getLength()); | |
216 | 1 | if (match != null) |
217 | { | |
218 | 1 | final int ratio = 3; // TODO: compute this - how? |
219 | 1 | final int mappedCount = count * ratio; |
220 | ||
221 | /* | |
222 | * Shift Delete start position left, as it acts on positions to its | |
223 | * right. | |
224 | */ | |
225 | 1 | int mappedEditPos = action == Action.DELETE_GAP |
226 | ? match[0] - mappedCount | |
227 | : match[0]; | |
228 | 1 | Edit e = result.new Edit(action, new SequenceI[] { targetSeq }, |
229 | mappedEditPos, mappedCount, gapChar); | |
230 | 1 | result.addEdit(e); |
231 | ||
232 | /* | |
233 | * and 'apply' the edit to our copy of its target sequence | |
234 | */ | |
235 | 1 | if (action == Action.INSERT_GAP) |
236 | { | |
237 | 1 | copyTarget.setSequence(new String( |
238 | StringUtils.insertCharAt(copyTarget.getSequence(), | |
239 | mappedEditPos, mappedCount, gapChar))); | |
240 | } | |
241 | 0 | else if (action == Action.DELETE_GAP) |
242 | { | |
243 | 0 | copyTarget.setSequence(new String( |
244 | StringUtils.deleteChars(copyTarget.getSequence(), | |
245 | mappedEditPos, mappedEditPos + mappedCount))); | |
246 | } | |
247 | } | |
248 | } | |
249 | } | |
250 | /* | |
251 | * and 'apply' the edit to our copy of its source sequence | |
252 | */ | |
253 | 1 | if (action == Action.INSERT_GAP) |
254 | { | |
255 | 1 | actedOn.setSequence(new String(StringUtils.insertCharAt( |
256 | actedOn.getSequence(), editPos, count, gapChar))); | |
257 | } | |
258 | 0 | else if (action == Action.DELETE_GAP) |
259 | { | |
260 | 0 | actedOn.setSequence(new String(StringUtils.deleteChars( |
261 | actedOn.getSequence(), editPos, editPos + count))); | |
262 | } | |
263 | } | |
264 | } | |
265 | ||
266 | /** | |
267 | * Returns a SearchResults object describing the mapped region corresponding | |
268 | * to the specified sequence position. | |
269 | * | |
270 | * @param seq | |
271 | * @param index | |
272 | * @param seqmappings | |
273 | * @return | |
274 | */ | |
275 | 121 | public static SearchResultsI buildSearchResults(SequenceI seq, int index, |
276 | List<AlignedCodonFrame> seqmappings) | |
277 | { | |
278 | 121 | SearchResultsI results = new SearchResults(); |
279 | 121 | addSearchResults(results, seq, index, seqmappings); |
280 | 121 | return results; |
281 | } | |
282 | ||
283 | /** | |
284 | * Adds entries to a SearchResults object describing the mapped region | |
285 | * corresponding to the specified sequence position. | |
286 | * | |
287 | * @param results | |
288 | * @param seq | |
289 | * @param index | |
290 | * @param seqmappings | |
291 | */ | |
292 | 125 | public static void addSearchResults(SearchResultsI results, SequenceI seq, |
293 | int index, List<AlignedCodonFrame> seqmappings) | |
294 | { | |
295 | 125 | if (index >= seq.getStart() && index <= seq.getEnd()) |
296 | { | |
297 | 125 | for (AlignedCodonFrame acf : seqmappings) |
298 | { | |
299 | 125 | acf.markMappedRegion(seq, index, results); |
300 | } | |
301 | } | |
302 | } | |
303 | ||
304 | /** | |
305 | * Returns a (possibly empty) SequenceGroup containing any sequences in the | |
306 | * mapped viewport corresponding to the given group in the source viewport. | |
307 | * | |
308 | * @param sg | |
309 | * @param mapFrom | |
310 | * @param mapTo | |
311 | * @return | |
312 | */ | |
313 | 11 | public static SequenceGroup mapSequenceGroup(final SequenceGroup sg, |
314 | final AlignViewportI mapFrom, final AlignViewportI mapTo) | |
315 | { | |
316 | /* | |
317 | * Note the SequenceGroup holds aligned sequences, the mappings hold dataset | |
318 | * sequences. | |
319 | */ | |
320 | 11 | boolean targetIsNucleotide = mapTo.isNucleotide(); |
321 | 11 | AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo; |
322 | 11 | List<AlignedCodonFrame> codonFrames = protein.getAlignment() |
323 | .getCodonFrames(); | |
324 | /* | |
325 | * Copy group name, colours etc, but not sequences or sequence colour scheme | |
326 | */ | |
327 | 11 | SequenceGroup mappedGroup = new SequenceGroup(sg); |
328 | 11 | mappedGroup.setColourScheme(mapTo.getGlobalColourScheme()); |
329 | 11 | mappedGroup.clear(); |
330 | ||
331 | 11 | int minStartCol = -1; |
332 | 11 | int maxEndCol = -1; |
333 | 11 | final int selectionStartRes = sg.getStartRes(); |
334 | 11 | final int selectionEndRes = sg.getEndRes(); |
335 | 11 | for (SequenceI selected : sg.getSequences()) |
336 | { | |
337 | /* | |
338 | * Find the widest range of non-gapped positions in the selection range | |
339 | */ | |
340 | 24 | int firstUngappedPos = selectionStartRes; |
341 | 28 | while (firstUngappedPos <= selectionEndRes |
342 | && Comparison.isGap(selected.getCharAt(firstUngappedPos))) | |
343 | { | |
344 | 4 | firstUngappedPos++; |
345 | } | |
346 | ||
347 | /* | |
348 | * If this sequence is only gaps in the selected range, skip it | |
349 | */ | |
350 | 24 | if (firstUngappedPos > selectionEndRes) |
351 | { | |
352 | 1 | continue; |
353 | } | |
354 | ||
355 | 23 | int lastUngappedPos = selectionEndRes; |
356 | 23 | while (lastUngappedPos >= selectionStartRes |
357 | && Comparison.isGap(selected.getCharAt(lastUngappedPos))) | |
358 | { | |
359 | 0 | lastUngappedPos--; |
360 | } | |
361 | ||
362 | /* | |
363 | * Find the selected start/end residue positions in sequence | |
364 | */ | |
365 | 23 | int startResiduePos = selected.findPosition(firstUngappedPos); |
366 | 23 | int endResiduePos = selected.findPosition(lastUngappedPos); |
367 | 23 | for (SequenceI seq : mapTo.getAlignment().getSequences()) |
368 | { | |
369 | 69 | int mappedStartResidue = 0; |
370 | 69 | int mappedEndResidue = 0; |
371 | 69 | for (AlignedCodonFrame acf : codonFrames) |
372 | { | |
373 | // rather than use acf.getCoveringMapping() we iterate through all | |
374 | // mappings to make sure all CDS are selected for a protein | |
375 | 69 | for (SequenceToSequenceMapping map : acf.getMappings()) |
376 | { | |
377 | 178 | if (map.covers(selected) && map.covers(seq)) |
378 | { | |
379 | /* | |
380 | * Found a sequence mapping. Locate the start/end mapped residues. | |
381 | */ | |
382 | 23 | List<AlignedCodonFrame> mapping = Arrays |
383 | .asList(new AlignedCodonFrame[] | |
384 | { acf }); | |
385 | // locate start | |
386 | 23 | SearchResultsI sr = buildSearchResults(selected, |
387 | startResiduePos, mapping); | |
388 | 23 | for (SearchResultMatchI m : sr.getResults()) |
389 | { | |
390 | 24 | mappedStartResidue = m.getStart(); |
391 | 24 | mappedEndResidue = m.getEnd(); |
392 | } | |
393 | // locate end - allowing for adjustment of start range | |
394 | 23 | sr = buildSearchResults(selected, endResiduePos, mapping); |
395 | 23 | for (SearchResultMatchI m : sr.getResults()) |
396 | { | |
397 | 24 | mappedStartResidue = Math.min(mappedStartResidue, |
398 | m.getStart()); | |
399 | 24 | mappedEndResidue = Math.max(mappedEndResidue, m.getEnd()); |
400 | } | |
401 | ||
402 | /* | |
403 | * Find the mapped aligned columns, save the range. Note findIndex | |
404 | * returns a base 1 position, SequenceGroup uses base 0 | |
405 | */ | |
406 | 23 | int mappedStartCol = seq.findIndex(mappedStartResidue) - 1; |
407 | 23 | minStartCol = minStartCol == -1 ? mappedStartCol |
408 | : Math.min(minStartCol, mappedStartCol); | |
409 | 23 | int mappedEndCol = seq.findIndex(mappedEndResidue) - 1; |
410 | 23 | maxEndCol = maxEndCol == -1 ? mappedEndCol |
411 | : Math.max(maxEndCol, mappedEndCol); | |
412 | 23 | mappedGroup.addSequence(seq, false); |
413 | 23 | break; |
414 | } | |
415 | } | |
416 | } | |
417 | } | |
418 | } | |
419 | 11 | mappedGroup.setStartRes(minStartCol < 0 ? 0 : minStartCol); |
420 | 11 | mappedGroup.setEndRes(maxEndCol < 0 ? 0 : maxEndCol); |
421 | 11 | return mappedGroup; |
422 | } | |
423 | ||
424 | /** | |
425 | * Returns an OrderCommand equivalent to the given one, but acting on mapped | |
426 | * sequences as described by the mappings, or null if no mapping can be made. | |
427 | * | |
428 | * @param command | |
429 | * the original order command | |
430 | * @param undo | |
431 | * if true, the action is to undo the sort | |
432 | * @param mapTo | |
433 | * the alignment we are mapping to | |
434 | * @param mappings | |
435 | * the mappings available | |
436 | * @return | |
437 | */ | |
438 | 0 | public static CommandI mapOrderCommand(OrderCommand command, boolean undo, |
439 | AlignmentI mapTo, List<AlignedCodonFrame> mappings) | |
440 | { | |
441 | 0 | SequenceI[] sortOrder = command.getSequenceOrder(undo); |
442 | 0 | List<SequenceI> mappedOrder = new ArrayList<>(); |
443 | 0 | int j = 0; |
444 | ||
445 | /* | |
446 | * Assumption: we are only interested in a cDNA/protein mapping; refactor in | |
447 | * future if we want to support sorting (c)dna as (c)dna or protein as | |
448 | * protein | |
449 | */ | |
450 | 0 | boolean mappingToNucleotide = mapTo.isNucleotide(); |
451 | 0 | for (SequenceI seq : sortOrder) |
452 | { | |
453 | 0 | for (AlignedCodonFrame acf : mappings) |
454 | { | |
455 | 0 | for (SequenceI seq2 : mapTo.getSequences()) |
456 | { | |
457 | /* | |
458 | * the corresponding peptide / CDS is the one for which there is | |
459 | * a complete ('covering') mapping to 'seq' | |
460 | */ | |
461 | 0 | SequenceI peptide = mappingToNucleotide ? seq2 : seq; |
462 | 0 | SequenceI cds = mappingToNucleotide ? seq : seq2; |
463 | 0 | SequenceToSequenceMapping s2s = acf.getCoveringMapping(cds, |
464 | peptide); | |
465 | 0 | if (s2s != null) |
466 | { | |
467 | 0 | mappedOrder.add(seq2); |
468 | 0 | j++; |
469 | 0 | break; |
470 | } | |
471 | } | |
472 | } | |
473 | } | |
474 | ||
475 | /* | |
476 | * Return null if no mappings made. | |
477 | */ | |
478 | 0 | if (j == 0) |
479 | { | |
480 | 0 | return null; |
481 | } | |
482 | ||
483 | /* | |
484 | * Add any unmapped sequences on the end of the sort in their original | |
485 | * ordering. | |
486 | */ | |
487 | 0 | if (j < mapTo.getHeight()) |
488 | { | |
489 | 0 | for (SequenceI seq : mapTo.getSequences()) |
490 | { | |
491 | 0 | if (!mappedOrder.contains(seq)) |
492 | { | |
493 | 0 | mappedOrder.add(seq); |
494 | } | |
495 | } | |
496 | } | |
497 | ||
498 | /* | |
499 | * Have to sort the sequences before constructing the OrderCommand - which | |
500 | * then resorts them?!? | |
501 | */ | |
502 | 0 | final SequenceI[] mappedOrderArray = mappedOrder |
503 | .toArray(new SequenceI[mappedOrder.size()]); | |
504 | 0 | SequenceI[] oldOrder = mapTo.getSequencesArray(); |
505 | 0 | AlignmentSorter.sortBy(mapTo, new AlignmentOrder(mappedOrderArray)); |
506 | 0 | final OrderCommand result = new OrderCommand(command.getDescription(), |
507 | oldOrder, mapTo); | |
508 | 0 | return result; |
509 | } | |
510 | ||
511 | /** | |
512 | * Returns a ColumnSelection in the 'mapTo' view which corresponds to the | |
513 | * given selection in the 'mapFrom' view. We assume one is nucleotide, the | |
514 | * other is protein (and holds the mappings from codons to protein residues). | |
515 | * | |
516 | * @param colsel | |
517 | * @param mapFrom | |
518 | * @param mapTo | |
519 | * @return | |
520 | */ | |
521 | 13 | public static void mapColumnSelection(ColumnSelection colsel, |
522 | HiddenColumns hiddencols, AlignViewportI mapFrom, | |
523 | AlignViewportI mapTo, ColumnSelection newColSel, | |
524 | HiddenColumns newHidden) | |
525 | { | |
526 | 13 | boolean targetIsNucleotide = mapTo.isNucleotide(); |
527 | 13 | AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo; |
528 | 13 | List<AlignedCodonFrame> codonFrames = protein.getAlignment() |
529 | .getCodonFrames(); | |
530 | ||
531 | 13 | if (colsel == null) |
532 | { | |
533 | 1 | return; |
534 | } | |
535 | ||
536 | 12 | char fromGapChar = mapFrom.getAlignment().getGapCharacter(); |
537 | ||
538 | /* | |
539 | * For each mapped column, find the range of columns that residues in that | |
540 | * column map to. | |
541 | */ | |
542 | 12 | List<SequenceI> fromSequences = mapFrom.getAlignment().getSequences(); |
543 | 12 | List<SequenceI> toSequences = mapTo.getAlignment().getSequences(); |
544 | ||
545 | 12 | for (Integer sel : colsel.getSelected()) |
546 | { | |
547 | 12 | mapColumn(sel.intValue(), codonFrames, newColSel, fromSequences, |
548 | toSequences, fromGapChar); | |
549 | } | |
550 | ||
551 | 12 | Iterator<int[]> regions = hiddencols.iterator(); |
552 | 18 | while (regions.hasNext()) |
553 | { | |
554 | 6 | mapHiddenColumns(regions.next(), codonFrames, newHidden, |
555 | fromSequences, toSequences, fromGapChar); | |
556 | } | |
557 | 12 | return; |
558 | } | |
559 | ||
560 | /** | |
561 | * Helper method that maps a [start, end] hidden column range to its mapped | |
562 | * equivalent | |
563 | * | |
564 | * @param hidden | |
565 | * @param mappings | |
566 | * @param mappedColumns | |
567 | * @param fromSequences | |
568 | * @param toSequences | |
569 | * @param fromGapChar | |
570 | */ | |
571 | 6 | protected static void mapHiddenColumns(int[] hidden, |
572 | List<AlignedCodonFrame> mappings, HiddenColumns mappedColumns, | |
573 | List<SequenceI> fromSequences, List<SequenceI> toSequences, | |
574 | char fromGapChar) | |
575 | { | |
576 | 12 | for (int col = hidden[0]; col <= hidden[1]; col++) |
577 | { | |
578 | 6 | int[] mappedTo = findMappedColumns(col, mappings, fromSequences, |
579 | toSequences, fromGapChar); | |
580 | ||
581 | /* | |
582 | * Add the range of hidden columns to the mapped selection (converting | |
583 | * base 1 to base 0). | |
584 | */ | |
585 | 6 | if (mappedTo != null) |
586 | { | |
587 | 5 | mappedColumns.hideColumns(mappedTo[0] - 1, mappedTo[1] - 1); |
588 | } | |
589 | } | |
590 | } | |
591 | ||
592 | /** | |
593 | * Helper method to map one column selection | |
594 | * | |
595 | * @param col | |
596 | * the column number (base 0) | |
597 | * @param mappings | |
598 | * the sequence mappings | |
599 | * @param mappedColumns | |
600 | * the mapped column selections to add to | |
601 | * @param fromSequences | |
602 | * @param toSequences | |
603 | * @param fromGapChar | |
604 | */ | |
605 | 12 | protected static void mapColumn(int col, List<AlignedCodonFrame> mappings, |
606 | ColumnSelection mappedColumns, List<SequenceI> fromSequences, | |
607 | List<SequenceI> toSequences, char fromGapChar) | |
608 | { | |
609 | 12 | int[] mappedTo = findMappedColumns(col, mappings, fromSequences, |
610 | toSequences, fromGapChar); | |
611 | ||
612 | /* | |
613 | * Add the range of mapped columns to the mapped selection (converting | |
614 | * base 1 to base 0). Note that this may include intron-only regions which | |
615 | * lie between the start and end ranges of the selection. | |
616 | */ | |
617 | 12 | if (mappedTo != null) |
618 | { | |
619 | 48 | for (int i = mappedTo[0]; i <= mappedTo[1]; i++) |
620 | { | |
621 | 37 | mappedColumns.addElement(i - 1); |
622 | } | |
623 | } | |
624 | } | |
625 | ||
626 | /** | |
627 | * Helper method to find the range of columns mapped to from one column. | |
628 | * Returns the maximal range of columns mapped to from all sequences in the | |
629 | * source column, or null if no mappings were found. | |
630 | * | |
631 | * @param col | |
632 | * @param mappings | |
633 | * @param fromSequences | |
634 | * @param toSequences | |
635 | * @param fromGapChar | |
636 | * @return | |
637 | */ | |
638 | 18 | protected static int[] findMappedColumns(int col, |
639 | List<AlignedCodonFrame> mappings, List<SequenceI> fromSequences, | |
640 | List<SequenceI> toSequences, char fromGapChar) | |
641 | { | |
642 | 18 | int[] mappedTo = new int[] { Integer.MAX_VALUE, Integer.MIN_VALUE }; |
643 | 18 | boolean found = false; |
644 | ||
645 | /* | |
646 | * For each sequence in the 'from' alignment | |
647 | */ | |
648 | 18 | for (SequenceI fromSeq : fromSequences) |
649 | { | |
650 | /* | |
651 | * Ignore gaps (unmapped anyway) | |
652 | */ | |
653 | 54 | if (fromSeq.getCharAt(col) == fromGapChar) |
654 | { | |
655 | 20 | continue; |
656 | } | |
657 | ||
658 | /* | |
659 | * Get the residue position and find the mapped position. | |
660 | */ | |
661 | 34 | int residuePos = fromSeq.findPosition(col); |
662 | 34 | SearchResultsI sr = buildSearchResults(fromSeq, residuePos, mappings); |
663 | 34 | for (SearchResultMatchI m : sr.getResults()) |
664 | { | |
665 | 44 | int mappedStartResidue = m.getStart(); |
666 | 44 | int mappedEndResidue = m.getEnd(); |
667 | 44 | SequenceI mappedSeq = m.getSequence(); |
668 | ||
669 | /* | |
670 | * Locate the aligned sequence whose dataset is mappedSeq. TODO a | |
671 | * datamodel that can do this efficiently. | |
672 | */ | |
673 | 44 | for (SequenceI toSeq : toSequences) |
674 | { | |
675 | 88 | if (toSeq.getDatasetSequence() == mappedSeq |
676 | && mappedStartResidue >= toSeq.getStart() | |
677 | && mappedEndResidue <= toSeq.getEnd()) | |
678 | { | |
679 | 44 | int mappedStartCol = toSeq.findIndex(mappedStartResidue); |
680 | 44 | int mappedEndCol = toSeq.findIndex(mappedEndResidue); |
681 | 44 | mappedTo[0] = Math.min(mappedTo[0], mappedStartCol); |
682 | 44 | mappedTo[1] = Math.max(mappedTo[1], mappedEndCol); |
683 | 44 | found = true; |
684 | 44 | break; |
685 | // note: remove break if we ever want to map one to many sequences | |
686 | } | |
687 | } | |
688 | } | |
689 | } | |
690 | 18 | return found ? mappedTo : null; |
691 | } | |
692 | ||
693 | /** | |
694 | * Returns the mapped codon or codons for a given aligned sequence column | |
695 | * position (base 0). | |
696 | * | |
697 | * @param seq | |
698 | * an aligned peptide sequence | |
699 | * @param col | |
700 | * an aligned column position (base 0) | |
701 | * @param mappings | |
702 | * a set of codon mappings | |
703 | * @return the bases of the mapped codon(s) in the cDNA dataset sequence(s), | |
704 | * or an empty list if none found | |
705 | */ | |
706 | 10704 | public static List<char[]> findCodonsFor(SequenceI seq, int col, |
707 | List<AlignedCodonFrame> mappings) | |
708 | { | |
709 | 10704 | List<char[]> result = new ArrayList<>(); |
710 | 10704 | int dsPos = seq.findPosition(col); |
711 | 10704 | for (AlignedCodonFrame mapping : mappings) |
712 | { | |
713 | 245092 | if (mapping.involvesSequence(seq)) |
714 | { | |
715 | 21358 | List<char[]> codons = mapping |
716 | .getMappedCodons(seq.getDatasetSequence(), dsPos); | |
717 | 21358 | if (codons != null) |
718 | { | |
719 | 21307 | result.addAll(codons); |
720 | } | |
721 | } | |
722 | } | |
723 | 10704 | return result; |
724 | } | |
725 | ||
726 | /** | |
727 | * Converts a series of [start, end] range pairs into an array of individual | |
728 | * positions. This also caters for 'reverse strand' (start > end) cases. | |
729 | * | |
730 | * @param ranges | |
731 | * @return | |
732 | */ | |
733 | 21336 | public static int[] flattenRanges(int[] ranges) |
734 | { | |
735 | /* | |
736 | * Count how many positions altogether | |
737 | */ | |
738 | 21336 | int count = 0; |
739 | 42695 | for (int i = 0; i < ranges.length - 1; i += 2) |
740 | { | |
741 | 21359 | count += Math.abs(ranges[i + 1] - ranges[i]) + 1; |
742 | } | |
743 | ||
744 | 21336 | int[] result = new int[count]; |
745 | 21336 | int k = 0; |
746 | 42695 | for (int i = 0; i < ranges.length - 1; i += 2) |
747 | { | |
748 | 21359 | int from = ranges[i]; |
749 | 21359 | final int to = ranges[i + 1]; |
750 | 21359 | int step = from <= to ? 1 : -1; |
751 | 21359 | do |
752 | { | |
753 | 64039 | result[k++] = from; |
754 | 64039 | from += step; |
755 | 64039 | } while (from != to + step); |
756 | } | |
757 | 21336 | return result; |
758 | } | |
759 | ||
760 | /** | |
761 | * Returns a list of any mappings that are from or to the given (aligned or | |
762 | * dataset) sequence. | |
763 | * | |
764 | * @param sequence | |
765 | * @param mappings | |
766 | * @return | |
767 | */ | |
768 | 135 | public static List<AlignedCodonFrame> findMappingsForSequence( |
769 | SequenceI sequence, List<AlignedCodonFrame> mappings) | |
770 | { | |
771 | 135 | return findMappingsForSequenceAndOthers(sequence, mappings, null); |
772 | } | |
773 | ||
774 | /** | |
775 | * Returns a list of any mappings that are from or to the given (aligned or | |
776 | * dataset) sequence, optionally limited to mappings involving one of a given | |
777 | * list of sequences. | |
778 | * | |
779 | * @param sequence | |
780 | * @param mappings | |
781 | * @param filterList | |
782 | * @return | |
783 | */ | |
784 | 143 | public static List<AlignedCodonFrame> findMappingsForSequenceAndOthers( |
785 | SequenceI sequence, List<AlignedCodonFrame> mappings, | |
786 | List<SequenceI> filterList) | |
787 | { | |
788 | 143 | List<AlignedCodonFrame> result = new ArrayList<>(); |
789 | 143 | if (sequence == null || mappings == null) |
790 | { | |
791 | 5 | return result; |
792 | } | |
793 | 138 | for (AlignedCodonFrame mapping : mappings) |
794 | { | |
795 | 1448 | if (mapping.involvesSequence(sequence)) |
796 | { | |
797 | 162 | if (filterList != null) |
798 | { | |
799 | 11 | for (SequenceI otherseq : filterList) |
800 | { | |
801 | 95 | SequenceI otherDataset = otherseq.getDatasetSequence(); |
802 | 95 | if (otherseq == sequence |
803 | || otherseq == sequence.getDatasetSequence() | |
804 | || (otherDataset != null && (otherDataset == sequence | |
805 | || otherDataset == sequence | |
806 | .getDatasetSequence()))) | |
807 | { | |
808 | // skip sequences in subset which directly relate to sequence | |
809 | 4 | continue; |
810 | } | |
811 | 91 | if (mapping.involvesSequence(otherseq)) |
812 | { | |
813 | // selected a mapping contained in subselect alignment | |
814 | 6 | result.add(mapping); |
815 | 6 | break; |
816 | } | |
817 | } | |
818 | } | |
819 | else | |
820 | { | |
821 | 151 | result.add(mapping); |
822 | } | |
823 | } | |
824 | } | |
825 | 138 | return result; |
826 | } | |
827 | ||
828 | /** | |
829 | * Returns the total length of the supplied ranges, which may be as single | |
830 | * [start, end] or multiple [start, end, start, end ...] | |
831 | * | |
832 | * @param ranges | |
833 | * @return | |
834 | */ | |
835 | 153 | public static int getLength(List<int[]> ranges) |
836 | { | |
837 | 153 | if (ranges == null) |
838 | { | |
839 | 1 | return 0; |
840 | } | |
841 | 152 | int length = 0; |
842 | 152 | for (int[] range : ranges) |
843 | { | |
844 | 175 | if (range.length % 2 != 0) |
845 | { | |
846 | 0 | Console.error( |
847 | "Error unbalance start/end ranges: " + ranges.toString()); | |
848 | 0 | return 0; |
849 | } | |
850 | 379 | for (int i = 0; i < range.length - 1; i += 2) |
851 | { | |
852 | 204 | length += Math.abs(range[i + 1] - range[i]) + 1; |
853 | } | |
854 | } | |
855 | 152 | return length; |
856 | } | |
857 | ||
858 | /** | |
859 | * Answers true if any range includes the given value | |
860 | * | |
861 | * @param ranges | |
862 | * @param value | |
863 | * @return | |
864 | */ | |
865 | 22 | public static boolean contains(List<int[]> ranges, int value) |
866 | { | |
867 | 22 | if (ranges == null) |
868 | { | |
869 | 1 | return false; |
870 | } | |
871 | 21 | for (int[] range : ranges) |
872 | { | |
873 | 72 | if (range[1] >= range[0] && value >= range[0] && value <= range[1]) |
874 | { | |
875 | /* | |
876 | * value within ascending range | |
877 | */ | |
878 | 8 | return true; |
879 | } | |
880 | 64 | if (range[1] < range[0] && value <= range[0] && value >= range[1]) |
881 | { | |
882 | /* | |
883 | * value within descending range | |
884 | */ | |
885 | 5 | return true; |
886 | } | |
887 | } | |
888 | 8 | return false; |
889 | } | |
890 | ||
891 | /** | |
892 | * Removes a specified number of positions from the start of a ranges list. | |
893 | * For example, could be used to adjust cds ranges to allow for an incomplete | |
894 | * start codon. Subranges are removed completely, or their start positions | |
895 | * adjusted, until the required number of positions has been removed from the | |
896 | * range. Reverse strand ranges are supported. The input array is not | |
897 | * modified. | |
898 | * | |
899 | * @param removeCount | |
900 | * @param ranges | |
901 | * an array of [start, end, start, end...] positions | |
902 | * @return a new array with the first removeCount positions removed | |
903 | */ | |
904 | 21 | public static int[] removeStartPositions(int removeCount, |
905 | final int[] ranges) | |
906 | { | |
907 | 21 | if (removeCount <= 0) |
908 | { | |
909 | 5 | return ranges; |
910 | } | |
911 | ||
912 | 16 | int[] copy = Arrays.copyOf(ranges, ranges.length); |
913 | 16 | int sxpos = -1; |
914 | 16 | int cdspos = 0; |
915 | 28 | for (int x = 0; x < copy.length && sxpos == -1; x += 2) |
916 | { | |
917 | 28 | cdspos += Math.abs(copy[x + 1] - copy[x]) + 1; |
918 | 28 | if (removeCount < cdspos) |
919 | { | |
920 | /* | |
921 | * we have removed enough, time to finish | |
922 | */ | |
923 | 16 | sxpos = x; |
924 | ||
925 | /* | |
926 | * increment start of first exon, or decrement if reverse strand | |
927 | */ | |
928 | 16 | if (copy[x] <= copy[x + 1]) |
929 | { | |
930 | 9 | copy[x] = copy[x + 1] - cdspos + removeCount + 1; |
931 | } | |
932 | else | |
933 | { | |
934 | 7 | copy[x] = copy[x + 1] + cdspos - removeCount - 1; |
935 | } | |
936 | 16 | break; |
937 | } | |
938 | } | |
939 | ||
940 | 16 | if (sxpos > 0) |
941 | { | |
942 | /* | |
943 | * we dropped at least one entire sub-range - compact the array | |
944 | */ | |
945 | 10 | int[] nxon = new int[copy.length - sxpos]; |
946 | 10 | System.arraycopy(copy, sxpos, nxon, 0, copy.length - sxpos); |
947 | 10 | return nxon; |
948 | } | |
949 | 6 | return copy; |
950 | } | |
951 | ||
952 | /** | |
953 | * Answers true if range's start-end positions include those of queryRange, | |
954 | * where either range might be in reverse direction, else false | |
955 | * | |
956 | * @param range | |
957 | * a start-end range | |
958 | * @param queryRange | |
959 | * a candidate subrange of range (start2-end2) | |
960 | * @return | |
961 | */ | |
962 | 33 | public static boolean rangeContains(int[] range, int[] queryRange) |
963 | { | |
964 | 33 | if (range == null || queryRange == null || range.length != 2 |
965 | || queryRange.length != 2) | |
966 | { | |
967 | /* | |
968 | * invalid arguments | |
969 | */ | |
970 | 4 | return false; |
971 | } | |
972 | ||
973 | 29 | int min = Math.min(range[0], range[1]); |
974 | 29 | int max = Math.max(range[0], range[1]); |
975 | ||
976 | 29 | return (min <= queryRange[0] && max >= queryRange[0] |
977 | && min <= queryRange[1] && max >= queryRange[1]); | |
978 | } | |
979 | ||
980 | /** | |
981 | * Removes the specified number of positions from the given ranges. Provided | |
982 | * to allow a stop codon to be stripped from a CDS sequence so that it matches | |
983 | * the peptide translation length. | |
984 | * | |
985 | * @param positions | |
986 | * @param ranges | |
987 | * a list of (single) [start, end] ranges | |
988 | * @return | |
989 | */ | |
990 | 8 | public static void removeEndPositions(int positions, List<int[]> ranges) |
991 | { | |
992 | 8 | int toRemove = positions; |
993 | 8 | Iterator<int[]> it = new ReverseListIterator<>(ranges); |
994 | 19 | while (toRemove > 0) |
995 | { | |
996 | 11 | int[] endRange = it.next(); |
997 | 11 | if (endRange.length != 2) |
998 | { | |
999 | /* | |
1000 | * not coded for [start1, end1, start2, end2, ...] | |
1001 | */ | |
1002 | 0 | Console.error( |
1003 | "MappingUtils.removeEndPositions doesn't handle multiple ranges"); | |
1004 | 0 | return; |
1005 | } | |
1006 | ||
1007 | 11 | int length = endRange[1] - endRange[0] + 1; |
1008 | 11 | if (length <= 0) |
1009 | { | |
1010 | /* | |
1011 | * not coded for a reverse strand range (end < start) | |
1012 | */ | |
1013 | 0 | Console.error( |
1014 | "MappingUtils.removeEndPositions doesn't handle reverse strand"); | |
1015 | 0 | return; |
1016 | } | |
1017 | 11 | if (length > toRemove) |
1018 | { | |
1019 | 7 | endRange[1] -= toRemove; |
1020 | 7 | toRemove = 0; |
1021 | } | |
1022 | else | |
1023 | { | |
1024 | 4 | toRemove -= length; |
1025 | 4 | it.remove(); |
1026 | } | |
1027 | } | |
1028 | } | |
1029 | ||
1030 | /** | |
1031 | * Converts a list of {@code start-end} ranges to a single array of | |
1032 | * {@code start1, end1, start2, ... } ranges | |
1033 | * | |
1034 | * @param ranges | |
1035 | * @return | |
1036 | */ | |
1037 | 179656 | public static int[] rangeListToArray(List<int[]> ranges) |
1038 | { | |
1039 | 179656 | int rangeCount = ranges.size(); |
1040 | 179656 | int[] result = new int[rangeCount * 2]; |
1041 | 179655 | int j = 0; |
1042 | 359509 | for (int i = 0; i < rangeCount; i++) |
1043 | { | |
1044 | 179854 | int[] range = ranges.get(i); |
1045 | 179854 | result[j++] = range[0]; |
1046 | 179854 | result[j++] = range[1]; |
1047 | } | |
1048 | 179655 | return result; |
1049 | } | |
1050 | ||
1051 | /* | |
1052 | * Returns the maximal start-end positions in the given (ordered) list of | |
1053 | * ranges which is overlapped by the given begin-end range, or null if there | |
1054 | * is no overlap. | |
1055 | * | |
1056 | * <pre> | |
1057 | * Examples: | |
1058 | * if ranges is {[4, 8], [10, 12], [16, 19]} | |
1059 | * then | |
1060 | * findOverlap(ranges, 1, 20) == [4, 19] | |
1061 | * findOverlap(ranges, 6, 11) == [6, 11] | |
1062 | * findOverlap(ranges, 9, 15) == [10, 12] | |
1063 | * findOverlap(ranges, 13, 15) == null | |
1064 | * </pre> | |
1065 | * | |
1066 | * @param ranges | |
1067 | * @param begin | |
1068 | * @param end | |
1069 | * @return | |
1070 | */ | |
1071 | 26 | protected static int[] findOverlap(List<int[]> ranges, final int begin, |
1072 | final int end) | |
1073 | { | |
1074 | 26 | boolean foundStart = false; |
1075 | 26 | int from = 0; |
1076 | 26 | int to = 0; |
1077 | ||
1078 | /* | |
1079 | * traverse the ranges to find the first position (if any) >= begin, | |
1080 | * and the last position (if any) <= end | |
1081 | */ | |
1082 | 26 | for (int[] range : ranges) |
1083 | { | |
1084 | 89 | if (!foundStart) |
1085 | { | |
1086 | 51 | if (range[0] >= begin) |
1087 | { | |
1088 | /* | |
1089 | * first range that starts with, or follows, begin | |
1090 | */ | |
1091 | 16 | foundStart = true; |
1092 | 16 | from = Math.max(range[0], begin); |
1093 | } | |
1094 | 35 | else if (range[1] >= begin) |
1095 | { | |
1096 | /* | |
1097 | * first range that contains begin | |
1098 | */ | |
1099 | 9 | foundStart = true; |
1100 | 9 | from = begin; |
1101 | } | |
1102 | } | |
1103 | ||
1104 | 89 | if (range[0] <= end) |
1105 | { | |
1106 | 54 | to = Math.min(end, range[1]); |
1107 | } | |
1108 | } | |
1109 | ||
1110 | 26 | return foundStart && to >= from ? new int[] { from, to } : null; |
1111 | } | |
1112 | } |