Clover icon

Coverage Report

  1. Project Clover database Mon Nov 18 2024 09:56:54 GMT
  2. Package jalview.util

File MappingUtils.java

 

Coverage histogram

../../img/srcFileCovDistChart8.png
21% of files have more coverage

Code metrics

156
305
23
1
1,161
672
125
0.41
13.26
23
5.43

Classes

Class Line # Actions
MappingUtils 58 305 125
0.7809917378.1%
 

Contributing tests

This file is covered by 134 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.util;
22   
23    import java.util.ArrayList;
24    import java.util.Arrays;
25    import java.util.HashMap;
26    import java.util.Iterator;
27    import java.util.List;
28    import java.util.Map;
29   
30    import jalview.analysis.AlignmentSorter;
31    import jalview.api.AlignViewportI;
32    import jalview.bin.Console;
33    import jalview.commands.CommandI;
34    import jalview.commands.EditCommand;
35    import jalview.commands.EditCommand.Action;
36    import jalview.commands.EditCommand.Edit;
37    import jalview.commands.OrderCommand;
38    import jalview.datamodel.AlignedCodonFrame;
39    import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
40    import jalview.datamodel.AlignmentI;
41    import jalview.datamodel.AlignmentOrder;
42    import jalview.datamodel.ColumnSelection;
43    import jalview.datamodel.HiddenColumns;
44    import jalview.datamodel.Mapping;
45    import jalview.datamodel.SearchResultMatchI;
46    import jalview.datamodel.SearchResults;
47    import jalview.datamodel.SearchResultsI;
48    import jalview.datamodel.Sequence;
49    import jalview.datamodel.SequenceGroup;
50    import jalview.datamodel.SequenceI;
51   
52    /**
53    * Helper methods for manipulations involving sequence mappings.
54    *
55    * @author gmcarstairs
56    *
57    */
 
58    public final class MappingUtils
59    {
60   
61    /**
62    * Helper method to map a CUT or PASTE command.
63    *
64    * @param edit
65    * the original command
66    * @param undo
67    * if true, the command is to be undone
68    * @param targetSeqs
69    * the mapped sequences to apply the mapped command to
70    * @param result
71    * the mapped EditCommand to add to
72    * @param mappings
73    */
 
74  0 toggle protected static void mapCutOrPaste(Edit edit, boolean undo,
75    List<SequenceI> targetSeqs, EditCommand result,
76    List<AlignedCodonFrame> mappings)
77    {
78  0 Action action = edit.getAction();
79  0 if (undo)
80    {
81  0 action = action.getUndoAction();
82    }
83    // TODO write this
84  0 Console.error("MappingUtils.mapCutOrPaste not yet implemented");
85    }
86   
87    /**
88    * Returns a new EditCommand representing the given command as mapped to the
89    * given sequences. If there is no mapping, returns null.
90    *
91    * @param command
92    * @param undo
93    * @param mapTo
94    * @param gapChar
95    * @param mappings
96    * @return
97    */
 
98  1 toggle public static EditCommand mapEditCommand(EditCommand command,
99    boolean undo, final AlignmentI mapTo, char gapChar,
100    List<AlignedCodonFrame> mappings)
101    {
102    /*
103    * For now, only support mapping from protein edits to cDna
104    */
105  1 if (!mapTo.isNucleotide())
106    {
107  0 return null;
108    }
109   
110    /*
111    * Cache a copy of the target sequences so we can mimic successive edits on
112    * them. This lets us compute mappings for all edits in the set.
113    */
114  1 Map<SequenceI, SequenceI> targetCopies = new HashMap<>();
115  1 for (SequenceI seq : mapTo.getSequences())
116    {
117  1 SequenceI ds = seq.getDatasetSequence();
118  1 if (ds != null)
119    {
120  1 final SequenceI copy = new Sequence(seq);
121  1 copy.setDatasetSequence(ds);
122  1 targetCopies.put(ds, copy);
123    }
124    }
125   
126    /*
127    * Compute 'source' sequences as they were before applying edits:
128    */
129  1 Map<SequenceI, SequenceI> originalSequences = command.priorState(undo);
130   
131  1 EditCommand result = new EditCommand();
132  1 Iterator<Edit> edits = command.getEditIterator(!undo);
133  2 while (edits.hasNext())
134    {
135  1 Edit edit = edits.next();
136  1 if (edit.getAction() == Action.CUT
137    || edit.getAction() == Action.PASTE)
138    {
139  0 mapCutOrPaste(edit, undo, mapTo.getSequences(), result, mappings);
140    }
141  1 else if (edit.getAction() == Action.INSERT_GAP
142    || edit.getAction() == Action.DELETE_GAP)
143    {
144  1 mapInsertOrDelete(edit, undo, originalSequences,
145    mapTo.getSequences(), targetCopies, gapChar, result,
146    mappings);
147    }
148    }
149  1 return result.getSize() > 0 ? result : null;
150    }
151   
152    /**
153    * Helper method to map an edit command to insert or delete gaps.
154    *
155    * @param edit
156    * the original command
157    * @param undo
158    * if true, the action is to undo the command
159    * @param originalSequences
160    * the sequences the command acted on
161    * @param targetSeqs
162    * @param targetCopies
163    * @param gapChar
164    * @param result
165    * the new EditCommand to add mapped commands to
166    * @param mappings
167    */
 
168  1 toggle protected static void mapInsertOrDelete(Edit edit, boolean undo,
169    Map<SequenceI, SequenceI> originalSequences,
170    final List<SequenceI> targetSeqs,
171    Map<SequenceI, SequenceI> targetCopies, char gapChar,
172    EditCommand result, List<AlignedCodonFrame> mappings)
173    {
174  1 Action action = edit.getAction();
175   
176    /*
177    * Invert sense of action if an Undo.
178    */
179  1 if (undo)
180    {
181  0 action = action.getUndoAction();
182    }
183  1 final int count = edit.getNumber();
184  1 final int editPos = edit.getPosition();
185  1 for (SequenceI seq : edit.getSequences())
186    {
187    /*
188    * Get residue position at (or to right of) edit location. Note we use our
189    * 'copy' of the sequence before editing for this.
190    */
191  1 SequenceI ds = seq.getDatasetSequence();
192  1 if (ds == null)
193    {
194  0 continue;
195    }
196  1 final SequenceI actedOn = originalSequences.get(ds);
197  1 final int seqpos = actedOn.findPosition(editPos);
198   
199    /*
200    * Determine all mappings from this position to mapped sequences.
201    */
202  1 SearchResultsI sr = buildSearchResults(seq, seqpos, mappings);
203   
204  1 if (!sr.isEmpty())
205    {
206  1 for (SequenceI targetSeq : targetSeqs)
207    {
208  1 ds = targetSeq.getDatasetSequence();
209  1 if (ds == null)
210    {
211  0 continue;
212    }
213  1 SequenceI copyTarget = targetCopies.get(ds);
214  1 final int[] match = sr.getResults(copyTarget, 0,
215    copyTarget.getLength());
216  1 if (match != null)
217    {
218  1 final int ratio = 3; // TODO: compute this - how?
219  1 final int mappedCount = count * ratio;
220   
221    /*
222    * Shift Delete start position left, as it acts on positions to its
223    * right.
224    */
225  1 int mappedEditPos = action == Action.DELETE_GAP
226    ? match[0] - mappedCount
227    : match[0];
228  1 Edit e = result.new Edit(action, new SequenceI[] { targetSeq },
229    mappedEditPos, mappedCount, gapChar);
230  1 result.addEdit(e);
231   
232    /*
233    * and 'apply' the edit to our copy of its target sequence
234    */
235  1 if (action == Action.INSERT_GAP)
236    {
237  1 copyTarget.setSequence(new String(
238    StringUtils.insertCharAt(copyTarget.getSequence(),
239    mappedEditPos, mappedCount, gapChar)));
240    }
241  0 else if (action == Action.DELETE_GAP)
242    {
243  0 copyTarget.setSequence(new String(
244    StringUtils.deleteChars(copyTarget.getSequence(),
245    mappedEditPos, mappedEditPos + mappedCount)));
246    }
247    }
248    }
249    }
250    /*
251    * and 'apply' the edit to our copy of its source sequence
252    */
253  1 if (action == Action.INSERT_GAP)
254    {
255  1 actedOn.setSequence(new String(StringUtils.insertCharAt(
256    actedOn.getSequence(), editPos, count, gapChar)));
257    }
258  0 else if (action == Action.DELETE_GAP)
259    {
260  0 actedOn.setSequence(new String(StringUtils.deleteChars(
261    actedOn.getSequence(), editPos, editPos + count)));
262    }
263    }
264    }
265   
266    /**
267    * Returns a SearchResults object describing the mapped region corresponding
268    * to the specified sequence position.
269    *
270    * @param seq
271    * @param index
272    * @param seqmappings
273    * @return
274    */
 
275  121 toggle public static SearchResultsI buildSearchResults(SequenceI seq, int index,
276    List<AlignedCodonFrame> seqmappings)
277    {
278  121 SearchResultsI results = new SearchResults();
279  121 addSearchResults(results, seq, index, seqmappings);
280  121 return results;
281    }
282   
283    /**
284    * Adds entries to a SearchResults object describing the mapped region
285    * corresponding to the specified sequence position.
286    *
287    * @param results
288    * @param seq
289    * @param index
290    * @param seqmappings
291    */
 
292  123 toggle public static void addSearchResults(SearchResultsI results, SequenceI seq,
293    int index, List<AlignedCodonFrame> seqmappings)
294    {
295  123 if (index >= seq.getStart() && index <= seq.getEnd())
296    {
297  123 for (AlignedCodonFrame acf : seqmappings)
298    {
299  123 acf.markMappedRegion(seq, index, results);
300    }
301    }
302    }
303   
304    /**
305    * Returns a (possibly empty) SequenceGroup containing any sequences in the
306    * mapped viewport corresponding to the given group in the source viewport.
307    *
308    * @param sg
309    * @param mapFrom
310    * @param mapTo
311    * @return
312    */
 
313  11 toggle public static SequenceGroup mapSequenceGroup(final SequenceGroup sg,
314    final AlignViewportI mapFrom, final AlignViewportI mapTo)
315    {
316    /*
317    * Note the SequenceGroup holds aligned sequences, the mappings hold dataset
318    * sequences.
319    */
320  11 boolean targetIsNucleotide = mapTo.isNucleotide();
321  11 AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
322  11 List<AlignedCodonFrame> codonFrames = protein.getAlignment()
323    .getCodonFrames();
324    /*
325    * Copy group name, colours etc, but not sequences or sequence colour scheme
326    */
327  11 SequenceGroup mappedGroup = new SequenceGroup(sg);
328  11 mappedGroup.setColourScheme(mapTo.getGlobalColourScheme());
329  11 mappedGroup.clear();
330   
331  11 int minStartCol = -1;
332  11 int maxEndCol = -1;
333  11 final int selectionStartRes = sg.getStartRes();
334  11 final int selectionEndRes = sg.getEndRes();
335  11 for (SequenceI selected : sg.getSequences())
336    {
337    /*
338    * Find the widest range of non-gapped positions in the selection range
339    */
340  24 int firstUngappedPos = selectionStartRes;
341  28 while (firstUngappedPos <= selectionEndRes
342    && Comparison.isGap(selected.getCharAt(firstUngappedPos)))
343    {
344  4 firstUngappedPos++;
345    }
346   
347    /*
348    * If this sequence is only gaps in the selected range, skip it
349    */
350  24 if (firstUngappedPos > selectionEndRes)
351    {
352  1 continue;
353    }
354   
355  23 int lastUngappedPos = selectionEndRes;
356  23 while (lastUngappedPos >= selectionStartRes
357    && Comparison.isGap(selected.getCharAt(lastUngappedPos)))
358    {
359  0 lastUngappedPos--;
360    }
361   
362    /*
363    * Find the selected start/end residue positions in sequence
364    */
365  23 int startResiduePos = selected.findPosition(firstUngappedPos);
366  23 int endResiduePos = selected.findPosition(lastUngappedPos);
367  23 for (SequenceI seq : mapTo.getAlignment().getSequences())
368    {
369  69 int mappedStartResidue = 0;
370  69 int mappedEndResidue = 0;
371  69 for (AlignedCodonFrame acf : codonFrames)
372    {
373    // rather than use acf.getCoveringMapping() we iterate through all
374    // mappings to make sure all CDS are selected for a protein
375  69 for (SequenceToSequenceMapping map : acf.getMappings())
376    {
377  178 if (map.covers(selected) && map.covers(seq))
378    {
379    /*
380    * Found a sequence mapping. Locate the start/end mapped residues.
381    */
382  23 List<AlignedCodonFrame> mapping = Arrays
383    .asList(new AlignedCodonFrame[]
384    { acf });
385    // locate start
386  23 SearchResultsI sr = buildSearchResults(selected,
387    startResiduePos, mapping);
388  23 for (SearchResultMatchI m : sr.getResults())
389    {
390  24 mappedStartResidue = m.getStart();
391  24 mappedEndResidue = m.getEnd();
392    }
393    // locate end - allowing for adjustment of start range
394  23 sr = buildSearchResults(selected, endResiduePos, mapping);
395  23 for (SearchResultMatchI m : sr.getResults())
396    {
397  24 mappedStartResidue = Math.min(mappedStartResidue,
398    m.getStart());
399  24 mappedEndResidue = Math.max(mappedEndResidue, m.getEnd());
400    }
401   
402    /*
403    * Find the mapped aligned columns, save the range. Note findIndex
404    * returns a base 1 position, SequenceGroup uses base 0
405    */
406  23 int mappedStartCol = seq.findIndex(mappedStartResidue) - 1;
407  23 minStartCol = minStartCol == -1 ? mappedStartCol
408    : Math.min(minStartCol, mappedStartCol);
409  23 int mappedEndCol = seq.findIndex(mappedEndResidue) - 1;
410  23 maxEndCol = maxEndCol == -1 ? mappedEndCol
411    : Math.max(maxEndCol, mappedEndCol);
412  23 mappedGroup.addSequence(seq, false);
413  23 break;
414    }
415    }
416    }
417    }
418    }
419  11 mappedGroup.setStartRes(minStartCol < 0 ? 0 : minStartCol);
420  11 mappedGroup.setEndRes(maxEndCol < 0 ? 0 : maxEndCol);
421  11 return mappedGroup;
422    }
423   
424    /**
425    * Returns an OrderCommand equivalent to the given one, but acting on mapped
426    * sequences as described by the mappings, or null if no mapping can be made.
427    *
428    * @param command
429    * the original order command
430    * @param undo
431    * if true, the action is to undo the sort
432    * @param mapTo
433    * the alignment we are mapping to
434    * @param mappings
435    * the mappings available
436    * @return
437    */
 
438  0 toggle public static CommandI mapOrderCommand(OrderCommand command, boolean undo,
439    AlignmentI mapTo, List<AlignedCodonFrame> mappings)
440    {
441  0 SequenceI[] sortOrder = command.getSequenceOrder(undo);
442  0 List<SequenceI> mappedOrder = new ArrayList<>();
443  0 int j = 0;
444   
445    /*
446    * Assumption: we are only interested in a cDNA/protein mapping; refactor in
447    * future if we want to support sorting (c)dna as (c)dna or protein as
448    * protein
449    */
450  0 boolean mappingToNucleotide = mapTo.isNucleotide();
451  0 for (SequenceI seq : sortOrder)
452    {
453  0 for (AlignedCodonFrame acf : mappings)
454    {
455  0 for (SequenceI seq2 : mapTo.getSequences())
456    {
457    /*
458    * the corresponding peptide / CDS is the one for which there is
459    * a complete ('covering') mapping to 'seq'
460    */
461  0 SequenceI peptide = mappingToNucleotide ? seq2 : seq;
462  0 SequenceI cds = mappingToNucleotide ? seq : seq2;
463  0 SequenceToSequenceMapping s2s = acf.getCoveringMapping(cds,
464    peptide);
465  0 if (s2s != null)
466    {
467  0 mappedOrder.add(seq2);
468  0 j++;
469  0 break;
470    }
471    }
472    }
473    }
474   
475    /*
476    * Return null if no mappings made.
477    */
478  0 if (j == 0)
479    {
480  0 return null;
481    }
482   
483    /*
484    * Add any unmapped sequences on the end of the sort in their original
485    * ordering.
486    */
487  0 if (j < mapTo.getHeight())
488    {
489  0 for (SequenceI seq : mapTo.getSequences())
490    {
491  0 if (!mappedOrder.contains(seq))
492    {
493  0 mappedOrder.add(seq);
494    }
495    }
496    }
497   
498    /*
499    * Have to sort the sequences before constructing the OrderCommand - which
500    * then resorts them?!?
501    */
502  0 final SequenceI[] mappedOrderArray = mappedOrder
503    .toArray(new SequenceI[mappedOrder.size()]);
504  0 SequenceI[] oldOrder = mapTo.getSequencesArray();
505  0 AlignmentSorter.sortBy(mapTo, new AlignmentOrder(mappedOrderArray));
506  0 final OrderCommand result = new OrderCommand(command.getDescription(),
507    oldOrder, mapTo);
508  0 return result;
509    }
510   
511    /**
512    * Returns a ColumnSelection in the 'mapTo' view which corresponds to the
513    * given selection in the 'mapFrom' view. We assume one is nucleotide, the
514    * other is protein (and holds the mappings from codons to protein residues).
515    *
516    * @param colsel
517    * @param mapFrom
518    * @param mapTo
519    * @return
520    */
 
521  13 toggle public static void mapColumnSelection(ColumnSelection colsel,
522    HiddenColumns hiddencols, AlignViewportI mapFrom,
523    AlignViewportI mapTo, ColumnSelection newColSel,
524    HiddenColumns newHidden)
525    {
526  13 boolean targetIsNucleotide = mapTo.isNucleotide();
527  13 AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
528  13 List<AlignedCodonFrame> codonFrames = protein.getAlignment()
529    .getCodonFrames();
530   
531  13 if (colsel == null)
532    {
533  1 return;
534    }
535   
536  12 char fromGapChar = mapFrom.getAlignment().getGapCharacter();
537   
538    /*
539    * For each mapped column, find the range of columns that residues in that
540    * column map to.
541    */
542  12 List<SequenceI> fromSequences = mapFrom.getAlignment().getSequences();
543  12 List<SequenceI> toSequences = mapTo.getAlignment().getSequences();
544   
545  12 for (Integer sel : colsel.getSelected())
546    {
547  12 mapColumn(sel.intValue(), codonFrames, newColSel, fromSequences,
548    toSequences, fromGapChar);
549    }
550   
551  12 Iterator<int[]> regions = hiddencols.iterator();
552  18 while (regions.hasNext())
553    {
554  6 mapHiddenColumns(regions.next(), codonFrames, newHidden,
555    fromSequences, toSequences, fromGapChar);
556    }
557  12 return;
558    }
559   
560    /**
561    * Helper method that maps a [start, end] hidden column range to its mapped
562    * equivalent
563    *
564    * @param hidden
565    * @param mappings
566    * @param mappedColumns
567    * @param fromSequences
568    * @param toSequences
569    * @param fromGapChar
570    */
 
571  6 toggle protected static void mapHiddenColumns(int[] hidden,
572    List<AlignedCodonFrame> mappings, HiddenColumns mappedColumns,
573    List<SequenceI> fromSequences, List<SequenceI> toSequences,
574    char fromGapChar)
575    {
576  12 for (int col = hidden[0]; col <= hidden[1]; col++)
577    {
578  6 int[] mappedTo = findMappedColumns(col, mappings, fromSequences,
579    toSequences, fromGapChar);
580   
581    /*
582    * Add the range of hidden columns to the mapped selection (converting
583    * base 1 to base 0).
584    */
585  6 if (mappedTo != null)
586    {
587  5 mappedColumns.hideColumns(mappedTo[0] - 1, mappedTo[1] - 1);
588    }
589    }
590    }
591   
592    /**
593    * Helper method to map one column selection
594    *
595    * @param col
596    * the column number (base 0)
597    * @param mappings
598    * the sequence mappings
599    * @param mappedColumns
600    * the mapped column selections to add to
601    * @param fromSequences
602    * @param toSequences
603    * @param fromGapChar
604    */
 
605  12 toggle protected static void mapColumn(int col, List<AlignedCodonFrame> mappings,
606    ColumnSelection mappedColumns, List<SequenceI> fromSequences,
607    List<SequenceI> toSequences, char fromGapChar)
608    {
609  12 int[] mappedTo = findMappedColumns(col, mappings, fromSequences,
610    toSequences, fromGapChar);
611   
612    /*
613    * Add the range of mapped columns to the mapped selection (converting
614    * base 1 to base 0). Note that this may include intron-only regions which
615    * lie between the start and end ranges of the selection.
616    */
617  12 if (mappedTo != null)
618    {
619  48 for (int i = mappedTo[0]; i <= mappedTo[1]; i++)
620    {
621  37 mappedColumns.addElement(i - 1);
622    }
623    }
624    }
625   
626    /**
627    * Helper method to find the range of columns mapped to from one column.
628    * Returns the maximal range of columns mapped to from all sequences in the
629    * source column, or null if no mappings were found.
630    *
631    * @param col
632    * @param mappings
633    * @param fromSequences
634    * @param toSequences
635    * @param fromGapChar
636    * @return
637    */
 
638  18 toggle public static int[] findMappedColumns(int col,
639    List<AlignedCodonFrame> mappings, List<SequenceI> fromSequences,
640    List<SequenceI> toSequences, char fromGapChar)
641    {
642  18 int[] mappedTo = new int[] { Integer.MAX_VALUE, Integer.MIN_VALUE };
643  18 boolean found = false;
644   
645    /*
646    * For each sequence in the 'from' alignment
647    */
648  18 for (SequenceI fromSeq : fromSequences)
649    {
650    /*
651    * Ignore gaps (unmapped anyway)
652    */
653  54 if (fromSeq.getCharAt(col) == fromGapChar)
654    {
655  20 continue;
656    }
657   
658    /*
659    * Get the residue position and find the mapped position.
660    */
661  34 int residuePos = fromSeq.findPosition(col);
662  34 SearchResultsI sr = buildSearchResults(fromSeq, residuePos, mappings);
663  34 for (SearchResultMatchI m : sr.getResults())
664    {
665  44 int mappedStartResidue = m.getStart();
666  44 int mappedEndResidue = m.getEnd();
667  44 SequenceI mappedSeq = m.getSequence();
668   
669    /*
670    * Locate the aligned sequence whose dataset is mappedSeq. TODO a
671    * datamodel that can do this efficiently.
672    */
673  44 for (SequenceI toSeq : toSequences)
674    {
675  88 if (toSeq.getDatasetSequence() == mappedSeq
676    && mappedStartResidue >= toSeq.getStart()
677    && mappedEndResidue <= toSeq.getEnd())
678    {
679  44 int mappedStartCol = toSeq.findIndex(mappedStartResidue);
680  44 int mappedEndCol = toSeq.findIndex(mappedEndResidue);
681  44 mappedTo[0] = Math.min(mappedTo[0], mappedStartCol);
682  44 mappedTo[1] = Math.max(mappedTo[1], mappedEndCol);
683  44 found = true;
684  44 break;
685    // note: remove break if we ever want to map one to many sequences
686    }
687    }
688    }
689    }
690  18 return found ? mappedTo : null;
691    }
692   
 
693  0 toggle public static SearchResultsI allMappedRegionsForColumn(int col,List<AlignedCodonFrame> mappings, List<SequenceI> fromSequences,
694    List<SequenceI> toSequences, char fromGapChar)
695    {
696  0 int[] mappedTo = new int[] { Integer.MAX_VALUE, Integer.MIN_VALUE };
697   
698  0 SearchResultsI allsr=new SearchResults();
699    /*
700    * For each sequence in the 'from' alignment
701    */
702  0 for (SequenceI fromSeq : fromSequences)
703    {
704    /*
705    * Ignore gaps (unmapped anyway)
706    */
707  0 if (fromSeq.getCharAt(col) == fromGapChar)
708    {
709  0 continue;
710    }
711   
712    /*
713    * Get the residue position and find the mapped position.
714    */
715  0 int residuePos = fromSeq.findPosition(col);
716  0 SearchResultsI sr = buildSearchResults(fromSeq, residuePos, mappings);
717   
718  0 for (SearchResultMatchI m : sr.getResults())
719    {
720  0 int mappedStartResidue = m.getStart();
721  0 int mappedEndResidue = m.getEnd();
722  0 SequenceI mappedSeq = m.getSequence();
723   
724    /*
725    * Locate the aligned sequence whose dataset is mappedSeq. TODO a
726    * datamodel that can do this efficiently.
727    */
728  0 for (SequenceI toSeq : toSequences)
729    {
730  0 if (toSeq.getDatasetSequence() == mappedSeq
731    && mappedStartResidue >= toSeq.getStart()
732    && mappedEndResidue <= toSeq.getEnd())
733    {
734  0 allsr.addResult(toSeq, new int[] { mappedStartResidue,mappedEndResidue});
735    }
736    }
737    }
738    }
739  0 return allsr;
740    }
741   
742    /**
743    * Returns the mapped codon or codons for a given aligned sequence column
744    * position (base 0).
745    *
746    * @param seq
747    * an aligned peptide sequence
748    * @param col
749    * an aligned column position (base 0)
750    * @param mappings
751    * a set of codon mappings
752    * @return the bases of the mapped codon(s) in the cDNA dataset sequence(s),
753    * or an empty list if none found
754    */
 
755  10704 toggle public static List<char[]> findCodonsFor(SequenceI seq, int col,
756    List<AlignedCodonFrame> mappings)
757    {
758  10704 List<char[]> result = new ArrayList<>();
759  10704 int dsPos = seq.findPosition(col);
760  10704 for (AlignedCodonFrame mapping : mappings)
761    {
762  245092 if (mapping.involvesSequence(seq))
763    {
764  21358 List<char[]> codons = mapping
765    .getMappedCodons(seq.getDatasetSequence(), dsPos);
766  21358 if (codons != null)
767    {
768  21307 result.addAll(codons);
769    }
770    }
771    }
772  10704 return result;
773    }
774   
775    /**
776    * Converts a series of [start, end] range pairs into an array of individual
777    * positions. This also caters for 'reverse strand' (start > end) cases.
778    *
779    * @param ranges
780    * @return
781    */
 
782  21336 toggle public static int[] flattenRanges(int[] ranges)
783    {
784    /*
785    * Count how many positions altogether
786    */
787  21336 int count = 0;
788  42695 for (int i = 0; i < ranges.length - 1; i += 2)
789    {
790  21359 count += Math.abs(ranges[i + 1] - ranges[i]) + 1;
791    }
792   
793  21336 int[] result = new int[count];
794  21336 int k = 0;
795  42695 for (int i = 0; i < ranges.length - 1; i += 2)
796    {
797  21359 int from = ranges[i];
798  21359 final int to = ranges[i + 1];
799  21359 int step = from <= to ? 1 : -1;
800  21359 do
801    {
802  64039 result[k++] = from;
803  64039 from += step;
804  64039 } while (from != to + step);
805    }
806  21336 return result;
807    }
808   
809    /**
810    * Returns a list of any mappings that are from or to the given (aligned or
811    * dataset) sequence.
812    *
813    * @param sequence
814    * @param mappings
815    * @return
816    */
 
817  135 toggle public static List<AlignedCodonFrame> findMappingsForSequence(
818    SequenceI sequence, List<AlignedCodonFrame> mappings)
819    {
820  135 return findMappingsForSequenceAndOthers(sequence, mappings, null);
821    }
822   
823    /**
824    * Returns a list of any mappings that are from or to the given (aligned or
825    * dataset) sequence, optionally limited to mappings involving one of a given
826    * list of sequences.
827    *
828    * @param sequence
829    * @param mappings
830    * @param filterList
831    * @return
832    */
 
833  141 toggle public static List<AlignedCodonFrame> findMappingsForSequenceAndOthers(
834    SequenceI sequence, List<AlignedCodonFrame> mappings,
835    List<SequenceI> filterList)
836    {
837  141 List<AlignedCodonFrame> result = new ArrayList<>();
838  141 if (sequence == null || mappings == null)
839    {
840  5 return result;
841    }
842  136 for (AlignedCodonFrame mapping : mappings)
843    {
844  1402 if (mapping.involvesSequence(sequence))
845    {
846  158 if (filterList != null)
847    {
848  7 for (SequenceI otherseq : filterList)
849    {
850  51 SequenceI otherDataset = otherseq.getDatasetSequence();
851  51 if (otherseq == sequence
852    || otherseq == sequence.getDatasetSequence()
853    || (otherDataset != null && (otherDataset == sequence
854    || otherDataset == sequence
855    .getDatasetSequence())))
856    {
857    // skip sequences in subset which directly relate to sequence
858  4 continue;
859    }
860  47 if (mapping.involvesSequence(otherseq))
861    {
862    // selected a mapping contained in subselect alignment
863  4 result.add(mapping);
864  4 break;
865    }
866    }
867    }
868    else
869    {
870  151 result.add(mapping);
871    }
872    }
873    }
874  136 return result;
875    }
876   
877    /**
878    * Returns the total length of the supplied ranges, which may be as single
879    * [start, end] or multiple [start, end, start, end ...]
880    *
881    * @param ranges
882    * @return
883    */
 
884  154 toggle public static int getLength(List<int[]> ranges)
885    {
886  154 if (ranges == null)
887    {
888  1 return 0;
889    }
890  153 int length = 0;
891  153 for (int[] range : ranges)
892    {
893  175 if (range.length % 2 != 0)
894    {
895  0 Console.error(
896    "Error unbalance start/end ranges: " + ranges.toString());
897  0 return 0;
898    }
899  379 for (int i = 0; i < range.length - 1; i += 2)
900    {
901  204 length += Math.abs(range[i + 1] - range[i]) + 1;
902    }
903    }
904  153 return length;
905    }
906   
907    /**
908    * Answers true if any range includes the given value
909    *
910    * @param ranges
911    * @param value
912    * @return
913    */
 
914  22 toggle public static boolean contains(List<int[]> ranges, int value)
915    {
916  22 if (ranges == null)
917    {
918  1 return false;
919    }
920  21 for (int[] range : ranges)
921    {
922  72 if (range[1] >= range[0] && value >= range[0] && value <= range[1])
923    {
924    /*
925    * value within ascending range
926    */
927  8 return true;
928    }
929  64 if (range[1] < range[0] && value <= range[0] && value >= range[1])
930    {
931    /*
932    * value within descending range
933    */
934  5 return true;
935    }
936    }
937  8 return false;
938    }
939   
940    /**
941    * Removes a specified number of positions from the start of a ranges list.
942    * For example, could be used to adjust cds ranges to allow for an incomplete
943    * start codon. Subranges are removed completely, or their start positions
944    * adjusted, until the required number of positions has been removed from the
945    * range. Reverse strand ranges are supported. The input array is not
946    * modified.
947    *
948    * @param removeCount
949    * @param ranges
950    * an array of [start, end, start, end...] positions
951    * @return a new array with the first removeCount positions removed
952    */
 
953  21 toggle public static int[] removeStartPositions(int removeCount,
954    final int[] ranges)
955    {
956  21 if (removeCount <= 0)
957    {
958  5 return ranges;
959    }
960   
961  16 int[] copy = Arrays.copyOf(ranges, ranges.length);
962  16 int sxpos = -1;
963  16 int cdspos = 0;
964  28 for (int x = 0; x < copy.length && sxpos == -1; x += 2)
965    {
966  28 cdspos += Math.abs(copy[x + 1] - copy[x]) + 1;
967  28 if (removeCount < cdspos)
968    {
969    /*
970    * we have removed enough, time to finish
971    */
972  16 sxpos = x;
973   
974    /*
975    * increment start of first exon, or decrement if reverse strand
976    */
977  16 if (copy[x] <= copy[x + 1])
978    {
979  9 copy[x] = copy[x + 1] - cdspos + removeCount + 1;
980    }
981    else
982    {
983  7 copy[x] = copy[x + 1] + cdspos - removeCount - 1;
984    }
985  16 break;
986    }
987    }
988   
989  16 if (sxpos > 0)
990    {
991    /*
992    * we dropped at least one entire sub-range - compact the array
993    */
994  10 int[] nxon = new int[copy.length - sxpos];
995  10 System.arraycopy(copy, sxpos, nxon, 0, copy.length - sxpos);
996  10 return nxon;
997    }
998  6 return copy;
999    }
1000   
1001    /**
1002    * Answers true if range's start-end positions include those of queryRange,
1003    * where either range might be in reverse direction, else false
1004    *
1005    * @param range
1006    * a start-end range
1007    * @param queryRange
1008    * a candidate subrange of range (start2-end2)
1009    * @return
1010    */
 
1011  33 toggle public static boolean rangeContains(int[] range, int[] queryRange)
1012    {
1013  33 if (range == null || queryRange == null || range.length != 2
1014    || queryRange.length != 2)
1015    {
1016    /*
1017    * invalid arguments
1018    */
1019  4 return false;
1020    }
1021   
1022  29 int min = Math.min(range[0], range[1]);
1023  29 int max = Math.max(range[0], range[1]);
1024   
1025  29 return (min <= queryRange[0] && max >= queryRange[0]
1026    && min <= queryRange[1] && max >= queryRange[1]);
1027    }
1028   
1029    /**
1030    * Removes the specified number of positions from the given ranges. Provided
1031    * to allow a stop codon to be stripped from a CDS sequence so that it matches
1032    * the peptide translation length.
1033    *
1034    * @param positions
1035    * @param ranges
1036    * a list of (single) [start, end] ranges
1037    * @return
1038    */
 
1039  8 toggle public static void removeEndPositions(int positions, List<int[]> ranges)
1040    {
1041  8 int toRemove = positions;
1042  8 Iterator<int[]> it = new ReverseListIterator<>(ranges);
1043  19 while (toRemove > 0)
1044    {
1045  11 int[] endRange = it.next();
1046  11 if (endRange.length != 2)
1047    {
1048    /*
1049    * not coded for [start1, end1, start2, end2, ...]
1050    */
1051  0 Console.error(
1052    "MappingUtils.removeEndPositions doesn't handle multiple ranges");
1053  0 return;
1054    }
1055   
1056  11 int length = endRange[1] - endRange[0] + 1;
1057  11 if (length <= 0)
1058    {
1059    /*
1060    * not coded for a reverse strand range (end < start)
1061    */
1062  0 Console.error(
1063    "MappingUtils.removeEndPositions doesn't handle reverse strand");
1064  0 return;
1065    }
1066  11 if (length > toRemove)
1067    {
1068  7 endRange[1] -= toRemove;
1069  7 toRemove = 0;
1070    }
1071    else
1072    {
1073  4 toRemove -= length;
1074  4 it.remove();
1075    }
1076    }
1077    }
1078   
1079    /**
1080    * Converts a list of {@code start-end} ranges to a single array of
1081    * {@code start1, end1, start2, ... } ranges
1082    *
1083    * @param ranges
1084    * @return
1085    */
 
1086  181340 toggle public static int[] rangeListToArray(List<int[]> ranges)
1087    {
1088  181339 int rangeCount = ranges.size();
1089  181339 int[] result = new int[rangeCount * 2];
1090  181335 int j = 0;
1091  362866 for (int i = 0; i < rangeCount; i++)
1092    {
1093  181536 int[] range = ranges.get(i);
1094  181534 result[j++] = range[0];
1095  181531 result[j++] = range[1];
1096    }
1097  181331 return result;
1098    }
1099   
1100    /*
1101    * Returns the maximal start-end positions in the given (ordered) list of
1102    * ranges which is overlapped by the given begin-end range, or null if there
1103    * is no overlap.
1104    *
1105    * <pre>
1106    * Examples:
1107    * if ranges is {[4, 8], [10, 12], [16, 19]}
1108    * then
1109    * findOverlap(ranges, 1, 20) == [4, 19]
1110    * findOverlap(ranges, 6, 11) == [6, 11]
1111    * findOverlap(ranges, 9, 15) == [10, 12]
1112    * findOverlap(ranges, 13, 15) == null
1113    * </pre>
1114    *
1115    * @param ranges
1116    * @param begin
1117    * @param end
1118    * @return
1119    */
 
1120  26 toggle protected static int[] findOverlap(List<int[]> ranges, final int begin,
1121    final int end)
1122    {
1123  26 boolean foundStart = false;
1124  26 int from = 0;
1125  26 int to = 0;
1126   
1127    /*
1128    * traverse the ranges to find the first position (if any) >= begin,
1129    * and the last position (if any) <= end
1130    */
1131  26 for (int[] range : ranges)
1132    {
1133  89 if (!foundStart)
1134    {
1135  51 if (range[0] >= begin)
1136    {
1137    /*
1138    * first range that starts with, or follows, begin
1139    */
1140  16 foundStart = true;
1141  16 from = Math.max(range[0], begin);
1142    }
1143  35 else if (range[1] >= begin)
1144    {
1145    /*
1146    * first range that contains begin
1147    */
1148  9 foundStart = true;
1149  9 from = begin;
1150    }
1151    }
1152   
1153  89 if (range[0] <= end)
1154    {
1155  54 to = Math.min(end, range[1]);
1156    }
1157    }
1158   
1159  26 return foundStart && to >= from ? new int[] { from, to } : null;
1160    }
1161    }