Clover icon

Coverage Report

  1. Project Clover database Thu Nov 28 2024 11:45:30 GMT
  2. Package jalview.util

File MappingUtils.java

 

Coverage histogram

../../img/srcFileCovDistChart9.png
13% of files have more coverage

Code metrics

152
290
22
1
1,112
641
120
0.41
13.18
22
5.45

Classes

Class Line # Actions
MappingUtils 58 290 120
0.814655281.5%
 

Contributing tests

This file is covered by 131 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.util;
22   
23    import java.util.ArrayList;
24    import java.util.Arrays;
25    import java.util.HashMap;
26    import java.util.Iterator;
27    import java.util.List;
28    import java.util.Map;
29   
30    import jalview.analysis.AlignmentSorter;
31    import jalview.api.AlignViewportI;
32    import jalview.bin.Console;
33    import jalview.commands.CommandI;
34    import jalview.commands.EditCommand;
35    import jalview.commands.EditCommand.Action;
36    import jalview.commands.EditCommand.Edit;
37    import jalview.commands.OrderCommand;
38    import jalview.datamodel.AlignedCodonFrame;
39    import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
40    import jalview.datamodel.AlignmentI;
41    import jalview.datamodel.AlignmentOrder;
42    import jalview.datamodel.ColumnSelection;
43    import jalview.datamodel.HiddenColumns;
44    import jalview.datamodel.Mapping;
45    import jalview.datamodel.SearchResultMatchI;
46    import jalview.datamodel.SearchResults;
47    import jalview.datamodel.SearchResultsI;
48    import jalview.datamodel.Sequence;
49    import jalview.datamodel.SequenceGroup;
50    import jalview.datamodel.SequenceI;
51   
52    /**
53    * Helper methods for manipulations involving sequence mappings.
54    *
55    * @author gmcarstairs
56    *
57    */
 
58    public final class MappingUtils
59    {
60   
61    /**
62    * Helper method to map a CUT or PASTE command.
63    *
64    * @param edit
65    * the original command
66    * @param undo
67    * if true, the command is to be undone
68    * @param targetSeqs
69    * the mapped sequences to apply the mapped command to
70    * @param result
71    * the mapped EditCommand to add to
72    * @param mappings
73    */
 
74  0 toggle protected static void mapCutOrPaste(Edit edit, boolean undo,
75    List<SequenceI> targetSeqs, EditCommand result,
76    List<AlignedCodonFrame> mappings)
77    {
78  0 Action action = edit.getAction();
79  0 if (undo)
80    {
81  0 action = action.getUndoAction();
82    }
83    // TODO write this
84  0 Console.error("MappingUtils.mapCutOrPaste not yet implemented");
85    }
86   
87    /**
88    * Returns a new EditCommand representing the given command as mapped to the
89    * given sequences. If there is no mapping, returns null.
90    *
91    * @param command
92    * @param undo
93    * @param mapTo
94    * @param gapChar
95    * @param mappings
96    * @return
97    */
 
98  1 toggle public static EditCommand mapEditCommand(EditCommand command,
99    boolean undo, final AlignmentI mapTo, char gapChar,
100    List<AlignedCodonFrame> mappings)
101    {
102    /*
103    * For now, only support mapping from protein edits to cDna
104    */
105  1 if (!mapTo.isNucleotide())
106    {
107  0 return null;
108    }
109   
110    /*
111    * Cache a copy of the target sequences so we can mimic successive edits on
112    * them. This lets us compute mappings for all edits in the set.
113    */
114  1 Map<SequenceI, SequenceI> targetCopies = new HashMap<>();
115  1 for (SequenceI seq : mapTo.getSequences())
116    {
117  1 SequenceI ds = seq.getDatasetSequence();
118  1 if (ds != null)
119    {
120  1 final SequenceI copy = new Sequence(seq);
121  1 copy.setDatasetSequence(ds);
122  1 targetCopies.put(ds, copy);
123    }
124    }
125   
126    /*
127    * Compute 'source' sequences as they were before applying edits:
128    */
129  1 Map<SequenceI, SequenceI> originalSequences = command.priorState(undo);
130   
131  1 EditCommand result = new EditCommand();
132  1 Iterator<Edit> edits = command.getEditIterator(!undo);
133  2 while (edits.hasNext())
134    {
135  1 Edit edit = edits.next();
136  1 if (edit.getAction() == Action.CUT
137    || edit.getAction() == Action.PASTE)
138    {
139  0 mapCutOrPaste(edit, undo, mapTo.getSequences(), result, mappings);
140    }
141  1 else if (edit.getAction() == Action.INSERT_GAP
142    || edit.getAction() == Action.DELETE_GAP)
143    {
144  1 mapInsertOrDelete(edit, undo, originalSequences,
145    mapTo.getSequences(), targetCopies, gapChar, result,
146    mappings);
147    }
148    }
149  1 return result.getSize() > 0 ? result : null;
150    }
151   
152    /**
153    * Helper method to map an edit command to insert or delete gaps.
154    *
155    * @param edit
156    * the original command
157    * @param undo
158    * if true, the action is to undo the command
159    * @param originalSequences
160    * the sequences the command acted on
161    * @param targetSeqs
162    * @param targetCopies
163    * @param gapChar
164    * @param result
165    * the new EditCommand to add mapped commands to
166    * @param mappings
167    */
 
168  1 toggle protected static void mapInsertOrDelete(Edit edit, boolean undo,
169    Map<SequenceI, SequenceI> originalSequences,
170    final List<SequenceI> targetSeqs,
171    Map<SequenceI, SequenceI> targetCopies, char gapChar,
172    EditCommand result, List<AlignedCodonFrame> mappings)
173    {
174  1 Action action = edit.getAction();
175   
176    /*
177    * Invert sense of action if an Undo.
178    */
179  1 if (undo)
180    {
181  0 action = action.getUndoAction();
182    }
183  1 final int count = edit.getNumber();
184  1 final int editPos = edit.getPosition();
185  1 for (SequenceI seq : edit.getSequences())
186    {
187    /*
188    * Get residue position at (or to right of) edit location. Note we use our
189    * 'copy' of the sequence before editing for this.
190    */
191  1 SequenceI ds = seq.getDatasetSequence();
192  1 if (ds == null)
193    {
194  0 continue;
195    }
196  1 final SequenceI actedOn = originalSequences.get(ds);
197  1 final int seqpos = actedOn.findPosition(editPos);
198   
199    /*
200    * Determine all mappings from this position to mapped sequences.
201    */
202  1 SearchResultsI sr = buildSearchResults(seq, seqpos, mappings);
203   
204  1 if (!sr.isEmpty())
205    {
206  1 for (SequenceI targetSeq : targetSeqs)
207    {
208  1 ds = targetSeq.getDatasetSequence();
209  1 if (ds == null)
210    {
211  0 continue;
212    }
213  1 SequenceI copyTarget = targetCopies.get(ds);
214  1 final int[] match = sr.getResults(copyTarget, 0,
215    copyTarget.getLength());
216  1 if (match != null)
217    {
218  1 final int ratio = 3; // TODO: compute this - how?
219  1 final int mappedCount = count * ratio;
220   
221    /*
222    * Shift Delete start position left, as it acts on positions to its
223    * right.
224    */
225  1 int mappedEditPos = action == Action.DELETE_GAP
226    ? match[0] - mappedCount
227    : match[0];
228  1 Edit e = result.new Edit(action, new SequenceI[] { targetSeq },
229    mappedEditPos, mappedCount, gapChar);
230  1 result.addEdit(e);
231   
232    /*
233    * and 'apply' the edit to our copy of its target sequence
234    */
235  1 if (action == Action.INSERT_GAP)
236    {
237  1 copyTarget.setSequence(new String(
238    StringUtils.insertCharAt(copyTarget.getSequence(),
239    mappedEditPos, mappedCount, gapChar)));
240    }
241  0 else if (action == Action.DELETE_GAP)
242    {
243  0 copyTarget.setSequence(new String(
244    StringUtils.deleteChars(copyTarget.getSequence(),
245    mappedEditPos, mappedEditPos + mappedCount)));
246    }
247    }
248    }
249    }
250    /*
251    * and 'apply' the edit to our copy of its source sequence
252    */
253  1 if (action == Action.INSERT_GAP)
254    {
255  1 actedOn.setSequence(new String(StringUtils.insertCharAt(
256    actedOn.getSequence(), editPos, count, gapChar)));
257    }
258  0 else if (action == Action.DELETE_GAP)
259    {
260  0 actedOn.setSequence(new String(StringUtils.deleteChars(
261    actedOn.getSequence(), editPos, editPos + count)));
262    }
263    }
264    }
265   
266    /**
267    * Returns a SearchResults object describing the mapped region corresponding
268    * to the specified sequence position.
269    *
270    * @param seq
271    * @param index
272    * @param seqmappings
273    * @return
274    */
 
275  121 toggle public static SearchResultsI buildSearchResults(SequenceI seq, int index,
276    List<AlignedCodonFrame> seqmappings)
277    {
278  121 SearchResultsI results = new SearchResults();
279  121 addSearchResults(results, seq, index, seqmappings);
280  121 return results;
281    }
282   
283    /**
284    * Adds entries to a SearchResults object describing the mapped region
285    * corresponding to the specified sequence position.
286    *
287    * @param results
288    * @param seq
289    * @param index
290    * @param seqmappings
291    */
 
292  125 toggle public static void addSearchResults(SearchResultsI results, SequenceI seq,
293    int index, List<AlignedCodonFrame> seqmappings)
294    {
295  125 if (index >= seq.getStart() && index <= seq.getEnd())
296    {
297  125 for (AlignedCodonFrame acf : seqmappings)
298    {
299  125 acf.markMappedRegion(seq, index, results);
300    }
301    }
302    }
303   
304    /**
305    * Returns a (possibly empty) SequenceGroup containing any sequences in the
306    * mapped viewport corresponding to the given group in the source viewport.
307    *
308    * @param sg
309    * @param mapFrom
310    * @param mapTo
311    * @return
312    */
 
313  11 toggle public static SequenceGroup mapSequenceGroup(final SequenceGroup sg,
314    final AlignViewportI mapFrom, final AlignViewportI mapTo)
315    {
316    /*
317    * Note the SequenceGroup holds aligned sequences, the mappings hold dataset
318    * sequences.
319    */
320  11 boolean targetIsNucleotide = mapTo.isNucleotide();
321  11 AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
322  11 List<AlignedCodonFrame> codonFrames = protein.getAlignment()
323    .getCodonFrames();
324    /*
325    * Copy group name, colours etc, but not sequences or sequence colour scheme
326    */
327  11 SequenceGroup mappedGroup = new SequenceGroup(sg);
328  11 mappedGroup.setColourScheme(mapTo.getGlobalColourScheme());
329  11 mappedGroup.clear();
330   
331  11 int minStartCol = -1;
332  11 int maxEndCol = -1;
333  11 final int selectionStartRes = sg.getStartRes();
334  11 final int selectionEndRes = sg.getEndRes();
335  11 for (SequenceI selected : sg.getSequences())
336    {
337    /*
338    * Find the widest range of non-gapped positions in the selection range
339    */
340  24 int firstUngappedPos = selectionStartRes;
341  28 while (firstUngappedPos <= selectionEndRes
342    && Comparison.isGap(selected.getCharAt(firstUngappedPos)))
343    {
344  4 firstUngappedPos++;
345    }
346   
347    /*
348    * If this sequence is only gaps in the selected range, skip it
349    */
350  24 if (firstUngappedPos > selectionEndRes)
351    {
352  1 continue;
353    }
354   
355  23 int lastUngappedPos = selectionEndRes;
356  23 while (lastUngappedPos >= selectionStartRes
357    && Comparison.isGap(selected.getCharAt(lastUngappedPos)))
358    {
359  0 lastUngappedPos--;
360    }
361   
362    /*
363    * Find the selected start/end residue positions in sequence
364    */
365  23 int startResiduePos = selected.findPosition(firstUngappedPos);
366  23 int endResiduePos = selected.findPosition(lastUngappedPos);
367  23 for (SequenceI seq : mapTo.getAlignment().getSequences())
368    {
369  69 int mappedStartResidue = 0;
370  69 int mappedEndResidue = 0;
371  69 for (AlignedCodonFrame acf : codonFrames)
372    {
373    // rather than use acf.getCoveringMapping() we iterate through all
374    // mappings to make sure all CDS are selected for a protein
375  69 for (SequenceToSequenceMapping map : acf.getMappings())
376    {
377  178 if (map.covers(selected) && map.covers(seq))
378    {
379    /*
380    * Found a sequence mapping. Locate the start/end mapped residues.
381    */
382  23 List<AlignedCodonFrame> mapping = Arrays
383    .asList(new AlignedCodonFrame[]
384    { acf });
385    // locate start
386  23 SearchResultsI sr = buildSearchResults(selected,
387    startResiduePos, mapping);
388  23 for (SearchResultMatchI m : sr.getResults())
389    {
390  24 mappedStartResidue = m.getStart();
391  24 mappedEndResidue = m.getEnd();
392    }
393    // locate end - allowing for adjustment of start range
394  23 sr = buildSearchResults(selected, endResiduePos, mapping);
395  23 for (SearchResultMatchI m : sr.getResults())
396    {
397  24 mappedStartResidue = Math.min(mappedStartResidue,
398    m.getStart());
399  24 mappedEndResidue = Math.max(mappedEndResidue, m.getEnd());
400    }
401   
402    /*
403    * Find the mapped aligned columns, save the range. Note findIndex
404    * returns a base 1 position, SequenceGroup uses base 0
405    */
406  23 int mappedStartCol = seq.findIndex(mappedStartResidue) - 1;
407  23 minStartCol = minStartCol == -1 ? mappedStartCol
408    : Math.min(minStartCol, mappedStartCol);
409  23 int mappedEndCol = seq.findIndex(mappedEndResidue) - 1;
410  23 maxEndCol = maxEndCol == -1 ? mappedEndCol
411    : Math.max(maxEndCol, mappedEndCol);
412  23 mappedGroup.addSequence(seq, false);
413  23 break;
414    }
415    }
416    }
417    }
418    }
419  11 mappedGroup.setStartRes(minStartCol < 0 ? 0 : minStartCol);
420  11 mappedGroup.setEndRes(maxEndCol < 0 ? 0 : maxEndCol);
421  11 return mappedGroup;
422    }
423   
424    /**
425    * Returns an OrderCommand equivalent to the given one, but acting on mapped
426    * sequences as described by the mappings, or null if no mapping can be made.
427    *
428    * @param command
429    * the original order command
430    * @param undo
431    * if true, the action is to undo the sort
432    * @param mapTo
433    * the alignment we are mapping to
434    * @param mappings
435    * the mappings available
436    * @return
437    */
 
438  0 toggle public static CommandI mapOrderCommand(OrderCommand command, boolean undo,
439    AlignmentI mapTo, List<AlignedCodonFrame> mappings)
440    {
441  0 SequenceI[] sortOrder = command.getSequenceOrder(undo);
442  0 List<SequenceI> mappedOrder = new ArrayList<>();
443  0 int j = 0;
444   
445    /*
446    * Assumption: we are only interested in a cDNA/protein mapping; refactor in
447    * future if we want to support sorting (c)dna as (c)dna or protein as
448    * protein
449    */
450  0 boolean mappingToNucleotide = mapTo.isNucleotide();
451  0 for (SequenceI seq : sortOrder)
452    {
453  0 for (AlignedCodonFrame acf : mappings)
454    {
455  0 for (SequenceI seq2 : mapTo.getSequences())
456    {
457    /*
458    * the corresponding peptide / CDS is the one for which there is
459    * a complete ('covering') mapping to 'seq'
460    */
461  0 SequenceI peptide = mappingToNucleotide ? seq2 : seq;
462  0 SequenceI cds = mappingToNucleotide ? seq : seq2;
463  0 SequenceToSequenceMapping s2s = acf.getCoveringMapping(cds,
464    peptide);
465  0 if (s2s != null)
466    {
467  0 mappedOrder.add(seq2);
468  0 j++;
469  0 break;
470    }
471    }
472    }
473    }
474   
475    /*
476    * Return null if no mappings made.
477    */
478  0 if (j == 0)
479    {
480  0 return null;
481    }
482   
483    /*
484    * Add any unmapped sequences on the end of the sort in their original
485    * ordering.
486    */
487  0 if (j < mapTo.getHeight())
488    {
489  0 for (SequenceI seq : mapTo.getSequences())
490    {
491  0 if (!mappedOrder.contains(seq))
492    {
493  0 mappedOrder.add(seq);
494    }
495    }
496    }
497   
498    /*
499    * Have to sort the sequences before constructing the OrderCommand - which
500    * then resorts them?!?
501    */
502  0 final SequenceI[] mappedOrderArray = mappedOrder
503    .toArray(new SequenceI[mappedOrder.size()]);
504  0 SequenceI[] oldOrder = mapTo.getSequencesArray();
505  0 AlignmentSorter.sortBy(mapTo, new AlignmentOrder(mappedOrderArray));
506  0 final OrderCommand result = new OrderCommand(command.getDescription(),
507    oldOrder, mapTo);
508  0 return result;
509    }
510   
511    /**
512    * Returns a ColumnSelection in the 'mapTo' view which corresponds to the
513    * given selection in the 'mapFrom' view. We assume one is nucleotide, the
514    * other is protein (and holds the mappings from codons to protein residues).
515    *
516    * @param colsel
517    * @param mapFrom
518    * @param mapTo
519    * @return
520    */
 
521  13 toggle public static void mapColumnSelection(ColumnSelection colsel,
522    HiddenColumns hiddencols, AlignViewportI mapFrom,
523    AlignViewportI mapTo, ColumnSelection newColSel,
524    HiddenColumns newHidden)
525    {
526  13 boolean targetIsNucleotide = mapTo.isNucleotide();
527  13 AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
528  13 List<AlignedCodonFrame> codonFrames = protein.getAlignment()
529    .getCodonFrames();
530   
531  13 if (colsel == null)
532    {
533  1 return;
534    }
535   
536  12 char fromGapChar = mapFrom.getAlignment().getGapCharacter();
537   
538    /*
539    * For each mapped column, find the range of columns that residues in that
540    * column map to.
541    */
542  12 List<SequenceI> fromSequences = mapFrom.getAlignment().getSequences();
543  12 List<SequenceI> toSequences = mapTo.getAlignment().getSequences();
544   
545  12 for (Integer sel : colsel.getSelected())
546    {
547  12 mapColumn(sel.intValue(), codonFrames, newColSel, fromSequences,
548    toSequences, fromGapChar);
549    }
550   
551  12 Iterator<int[]> regions = hiddencols.iterator();
552  18 while (regions.hasNext())
553    {
554  6 mapHiddenColumns(regions.next(), codonFrames, newHidden,
555    fromSequences, toSequences, fromGapChar);
556    }
557  12 return;
558    }
559   
560    /**
561    * Helper method that maps a [start, end] hidden column range to its mapped
562    * equivalent
563    *
564    * @param hidden
565    * @param mappings
566    * @param mappedColumns
567    * @param fromSequences
568    * @param toSequences
569    * @param fromGapChar
570    */
 
571  6 toggle protected static void mapHiddenColumns(int[] hidden,
572    List<AlignedCodonFrame> mappings, HiddenColumns mappedColumns,
573    List<SequenceI> fromSequences, List<SequenceI> toSequences,
574    char fromGapChar)
575    {
576  12 for (int col = hidden[0]; col <= hidden[1]; col++)
577    {
578  6 int[] mappedTo = findMappedColumns(col, mappings, fromSequences,
579    toSequences, fromGapChar);
580   
581    /*
582    * Add the range of hidden columns to the mapped selection (converting
583    * base 1 to base 0).
584    */
585  6 if (mappedTo != null)
586    {
587  5 mappedColumns.hideColumns(mappedTo[0] - 1, mappedTo[1] - 1);
588    }
589    }
590    }
591   
592    /**
593    * Helper method to map one column selection
594    *
595    * @param col
596    * the column number (base 0)
597    * @param mappings
598    * the sequence mappings
599    * @param mappedColumns
600    * the mapped column selections to add to
601    * @param fromSequences
602    * @param toSequences
603    * @param fromGapChar
604    */
 
605  12 toggle protected static void mapColumn(int col, List<AlignedCodonFrame> mappings,
606    ColumnSelection mappedColumns, List<SequenceI> fromSequences,
607    List<SequenceI> toSequences, char fromGapChar)
608    {
609  12 int[] mappedTo = findMappedColumns(col, mappings, fromSequences,
610    toSequences, fromGapChar);
611   
612    /*
613    * Add the range of mapped columns to the mapped selection (converting
614    * base 1 to base 0). Note that this may include intron-only regions which
615    * lie between the start and end ranges of the selection.
616    */
617  12 if (mappedTo != null)
618    {
619  48 for (int i = mappedTo[0]; i <= mappedTo[1]; i++)
620    {
621  37 mappedColumns.addElement(i - 1);
622    }
623    }
624    }
625   
626    /**
627    * Helper method to find the range of columns mapped to from one column.
628    * Returns the maximal range of columns mapped to from all sequences in the
629    * source column, or null if no mappings were found.
630    *
631    * @param col
632    * @param mappings
633    * @param fromSequences
634    * @param toSequences
635    * @param fromGapChar
636    * @return
637    */
 
638  18 toggle protected static int[] findMappedColumns(int col,
639    List<AlignedCodonFrame> mappings, List<SequenceI> fromSequences,
640    List<SequenceI> toSequences, char fromGapChar)
641    {
642  18 int[] mappedTo = new int[] { Integer.MAX_VALUE, Integer.MIN_VALUE };
643  18 boolean found = false;
644   
645    /*
646    * For each sequence in the 'from' alignment
647    */
648  18 for (SequenceI fromSeq : fromSequences)
649    {
650    /*
651    * Ignore gaps (unmapped anyway)
652    */
653  54 if (fromSeq.getCharAt(col) == fromGapChar)
654    {
655  20 continue;
656    }
657   
658    /*
659    * Get the residue position and find the mapped position.
660    */
661  34 int residuePos = fromSeq.findPosition(col);
662  34 SearchResultsI sr = buildSearchResults(fromSeq, residuePos, mappings);
663  34 for (SearchResultMatchI m : sr.getResults())
664    {
665  44 int mappedStartResidue = m.getStart();
666  44 int mappedEndResidue = m.getEnd();
667  44 SequenceI mappedSeq = m.getSequence();
668   
669    /*
670    * Locate the aligned sequence whose dataset is mappedSeq. TODO a
671    * datamodel that can do this efficiently.
672    */
673  44 for (SequenceI toSeq : toSequences)
674    {
675  88 if (toSeq.getDatasetSequence() == mappedSeq
676    && mappedStartResidue >= toSeq.getStart()
677    && mappedEndResidue <= toSeq.getEnd())
678    {
679  44 int mappedStartCol = toSeq.findIndex(mappedStartResidue);
680  44 int mappedEndCol = toSeq.findIndex(mappedEndResidue);
681  44 mappedTo[0] = Math.min(mappedTo[0], mappedStartCol);
682  44 mappedTo[1] = Math.max(mappedTo[1], mappedEndCol);
683  44 found = true;
684  44 break;
685    // note: remove break if we ever want to map one to many sequences
686    }
687    }
688    }
689    }
690  18 return found ? mappedTo : null;
691    }
692   
693    /**
694    * Returns the mapped codon or codons for a given aligned sequence column
695    * position (base 0).
696    *
697    * @param seq
698    * an aligned peptide sequence
699    * @param col
700    * an aligned column position (base 0)
701    * @param mappings
702    * a set of codon mappings
703    * @return the bases of the mapped codon(s) in the cDNA dataset sequence(s),
704    * or an empty list if none found
705    */
 
706  10704 toggle public static List<char[]> findCodonsFor(SequenceI seq, int col,
707    List<AlignedCodonFrame> mappings)
708    {
709  10704 List<char[]> result = new ArrayList<>();
710  10704 int dsPos = seq.findPosition(col);
711  10704 for (AlignedCodonFrame mapping : mappings)
712    {
713  245092 if (mapping.involvesSequence(seq))
714    {
715  21358 List<char[]> codons = mapping
716    .getMappedCodons(seq.getDatasetSequence(), dsPos);
717  21358 if (codons != null)
718    {
719  21307 result.addAll(codons);
720    }
721    }
722    }
723  10704 return result;
724    }
725   
726    /**
727    * Converts a series of [start, end] range pairs into an array of individual
728    * positions. This also caters for 'reverse strand' (start > end) cases.
729    *
730    * @param ranges
731    * @return
732    */
 
733  21336 toggle public static int[] flattenRanges(int[] ranges)
734    {
735    /*
736    * Count how many positions altogether
737    */
738  21336 int count = 0;
739  42695 for (int i = 0; i < ranges.length - 1; i += 2)
740    {
741  21359 count += Math.abs(ranges[i + 1] - ranges[i]) + 1;
742    }
743   
744  21336 int[] result = new int[count];
745  21336 int k = 0;
746  42695 for (int i = 0; i < ranges.length - 1; i += 2)
747    {
748  21359 int from = ranges[i];
749  21359 final int to = ranges[i + 1];
750  21359 int step = from <= to ? 1 : -1;
751  21359 do
752    {
753  64039 result[k++] = from;
754  64039 from += step;
755  64039 } while (from != to + step);
756    }
757  21336 return result;
758    }
759   
760    /**
761    * Returns a list of any mappings that are from or to the given (aligned or
762    * dataset) sequence.
763    *
764    * @param sequence
765    * @param mappings
766    * @return
767    */
 
768  135 toggle public static List<AlignedCodonFrame> findMappingsForSequence(
769    SequenceI sequence, List<AlignedCodonFrame> mappings)
770    {
771  135 return findMappingsForSequenceAndOthers(sequence, mappings, null);
772    }
773   
774    /**
775    * Returns a list of any mappings that are from or to the given (aligned or
776    * dataset) sequence, optionally limited to mappings involving one of a given
777    * list of sequences.
778    *
779    * @param sequence
780    * @param mappings
781    * @param filterList
782    * @return
783    */
 
784  143 toggle public static List<AlignedCodonFrame> findMappingsForSequenceAndOthers(
785    SequenceI sequence, List<AlignedCodonFrame> mappings,
786    List<SequenceI> filterList)
787    {
788  143 List<AlignedCodonFrame> result = new ArrayList<>();
789  143 if (sequence == null || mappings == null)
790    {
791  5 return result;
792    }
793  138 for (AlignedCodonFrame mapping : mappings)
794    {
795  1448 if (mapping.involvesSequence(sequence))
796    {
797  162 if (filterList != null)
798    {
799  11 for (SequenceI otherseq : filterList)
800    {
801  95 SequenceI otherDataset = otherseq.getDatasetSequence();
802  95 if (otherseq == sequence
803    || otherseq == sequence.getDatasetSequence()
804    || (otherDataset != null && (otherDataset == sequence
805    || otherDataset == sequence
806    .getDatasetSequence())))
807    {
808    // skip sequences in subset which directly relate to sequence
809  4 continue;
810    }
811  91 if (mapping.involvesSequence(otherseq))
812    {
813    // selected a mapping contained in subselect alignment
814  6 result.add(mapping);
815  6 break;
816    }
817    }
818    }
819    else
820    {
821  151 result.add(mapping);
822    }
823    }
824    }
825  138 return result;
826    }
827   
828    /**
829    * Returns the total length of the supplied ranges, which may be as single
830    * [start, end] or multiple [start, end, start, end ...]
831    *
832    * @param ranges
833    * @return
834    */
 
835  153 toggle public static int getLength(List<int[]> ranges)
836    {
837  153 if (ranges == null)
838    {
839  1 return 0;
840    }
841  152 int length = 0;
842  152 for (int[] range : ranges)
843    {
844  175 if (range.length % 2 != 0)
845    {
846  0 Console.error(
847    "Error unbalance start/end ranges: " + ranges.toString());
848  0 return 0;
849    }
850  379 for (int i = 0; i < range.length - 1; i += 2)
851    {
852  204 length += Math.abs(range[i + 1] - range[i]) + 1;
853    }
854    }
855  152 return length;
856    }
857   
858    /**
859    * Answers true if any range includes the given value
860    *
861    * @param ranges
862    * @param value
863    * @return
864    */
 
865  22 toggle public static boolean contains(List<int[]> ranges, int value)
866    {
867  22 if (ranges == null)
868    {
869  1 return false;
870    }
871  21 for (int[] range : ranges)
872    {
873  72 if (range[1] >= range[0] && value >= range[0] && value <= range[1])
874    {
875    /*
876    * value within ascending range
877    */
878  8 return true;
879    }
880  64 if (range[1] < range[0] && value <= range[0] && value >= range[1])
881    {
882    /*
883    * value within descending range
884    */
885  5 return true;
886    }
887    }
888  8 return false;
889    }
890   
891    /**
892    * Removes a specified number of positions from the start of a ranges list.
893    * For example, could be used to adjust cds ranges to allow for an incomplete
894    * start codon. Subranges are removed completely, or their start positions
895    * adjusted, until the required number of positions has been removed from the
896    * range. Reverse strand ranges are supported. The input array is not
897    * modified.
898    *
899    * @param removeCount
900    * @param ranges
901    * an array of [start, end, start, end...] positions
902    * @return a new array with the first removeCount positions removed
903    */
 
904  21 toggle public static int[] removeStartPositions(int removeCount,
905    final int[] ranges)
906    {
907  21 if (removeCount <= 0)
908    {
909  5 return ranges;
910    }
911   
912  16 int[] copy = Arrays.copyOf(ranges, ranges.length);
913  16 int sxpos = -1;
914  16 int cdspos = 0;
915  28 for (int x = 0; x < copy.length && sxpos == -1; x += 2)
916    {
917  28 cdspos += Math.abs(copy[x + 1] - copy[x]) + 1;
918  28 if (removeCount < cdspos)
919    {
920    /*
921    * we have removed enough, time to finish
922    */
923  16 sxpos = x;
924   
925    /*
926    * increment start of first exon, or decrement if reverse strand
927    */
928  16 if (copy[x] <= copy[x + 1])
929    {
930  9 copy[x] = copy[x + 1] - cdspos + removeCount + 1;
931    }
932    else
933    {
934  7 copy[x] = copy[x + 1] + cdspos - removeCount - 1;
935    }
936  16 break;
937    }
938    }
939   
940  16 if (sxpos > 0)
941    {
942    /*
943    * we dropped at least one entire sub-range - compact the array
944    */
945  10 int[] nxon = new int[copy.length - sxpos];
946  10 System.arraycopy(copy, sxpos, nxon, 0, copy.length - sxpos);
947  10 return nxon;
948    }
949  6 return copy;
950    }
951   
952    /**
953    * Answers true if range's start-end positions include those of queryRange,
954    * where either range might be in reverse direction, else false
955    *
956    * @param range
957    * a start-end range
958    * @param queryRange
959    * a candidate subrange of range (start2-end2)
960    * @return
961    */
 
962  33 toggle public static boolean rangeContains(int[] range, int[] queryRange)
963    {
964  33 if (range == null || queryRange == null || range.length != 2
965    || queryRange.length != 2)
966    {
967    /*
968    * invalid arguments
969    */
970  4 return false;
971    }
972   
973  29 int min = Math.min(range[0], range[1]);
974  29 int max = Math.max(range[0], range[1]);
975   
976  29 return (min <= queryRange[0] && max >= queryRange[0]
977    && min <= queryRange[1] && max >= queryRange[1]);
978    }
979   
980    /**
981    * Removes the specified number of positions from the given ranges. Provided
982    * to allow a stop codon to be stripped from a CDS sequence so that it matches
983    * the peptide translation length.
984    *
985    * @param positions
986    * @param ranges
987    * a list of (single) [start, end] ranges
988    * @return
989    */
 
990  8 toggle public static void removeEndPositions(int positions, List<int[]> ranges)
991    {
992  8 int toRemove = positions;
993  8 Iterator<int[]> it = new ReverseListIterator<>(ranges);
994  19 while (toRemove > 0)
995    {
996  11 int[] endRange = it.next();
997  11 if (endRange.length != 2)
998    {
999    /*
1000    * not coded for [start1, end1, start2, end2, ...]
1001    */
1002  0 Console.error(
1003    "MappingUtils.removeEndPositions doesn't handle multiple ranges");
1004  0 return;
1005    }
1006   
1007  11 int length = endRange[1] - endRange[0] + 1;
1008  11 if (length <= 0)
1009    {
1010    /*
1011    * not coded for a reverse strand range (end < start)
1012    */
1013  0 Console.error(
1014    "MappingUtils.removeEndPositions doesn't handle reverse strand");
1015  0 return;
1016    }
1017  11 if (length > toRemove)
1018    {
1019  7 endRange[1] -= toRemove;
1020  7 toRemove = 0;
1021    }
1022    else
1023    {
1024  4 toRemove -= length;
1025  4 it.remove();
1026    }
1027    }
1028    }
1029   
1030    /**
1031    * Converts a list of {@code start-end} ranges to a single array of
1032    * {@code start1, end1, start2, ... } ranges
1033    *
1034    * @param ranges
1035    * @return
1036    */
 
1037  171941 toggle public static int[] rangeListToArray(List<int[]> ranges)
1038    {
1039  171941 int rangeCount = ranges.size();
1040  171941 int[] result = new int[rangeCount * 2];
1041  171940 int j = 0;
1042  344077 for (int i = 0; i < rangeCount; i++)
1043    {
1044  172139 int[] range = ranges.get(i);
1045  172138 result[j++] = range[0];
1046  172137 result[j++] = range[1];
1047    }
1048  171938 return result;
1049    }
1050   
1051    /*
1052    * Returns the maximal start-end positions in the given (ordered) list of
1053    * ranges which is overlapped by the given begin-end range, or null if there
1054    * is no overlap.
1055    *
1056    * <pre>
1057    * Examples:
1058    * if ranges is {[4, 8], [10, 12], [16, 19]}
1059    * then
1060    * findOverlap(ranges, 1, 20) == [4, 19]
1061    * findOverlap(ranges, 6, 11) == [6, 11]
1062    * findOverlap(ranges, 9, 15) == [10, 12]
1063    * findOverlap(ranges, 13, 15) == null
1064    * </pre>
1065    *
1066    * @param ranges
1067    * @param begin
1068    * @param end
1069    * @return
1070    */
 
1071  26 toggle protected static int[] findOverlap(List<int[]> ranges, final int begin,
1072    final int end)
1073    {
1074  26 boolean foundStart = false;
1075  26 int from = 0;
1076  26 int to = 0;
1077   
1078    /*
1079    * traverse the ranges to find the first position (if any) >= begin,
1080    * and the last position (if any) <= end
1081    */
1082  26 for (int[] range : ranges)
1083    {
1084  89 if (!foundStart)
1085    {
1086  51 if (range[0] >= begin)
1087    {
1088    /*
1089    * first range that starts with, or follows, begin
1090    */
1091  16 foundStart = true;
1092  16 from = Math.max(range[0], begin);
1093    }
1094  35 else if (range[1] >= begin)
1095    {
1096    /*
1097    * first range that contains begin
1098    */
1099  9 foundStart = true;
1100  9 from = begin;
1101    }
1102    }
1103   
1104  89 if (range[0] <= end)
1105    {
1106  54 to = Math.min(end, range[1]);
1107    }
1108    }
1109   
1110  26 return foundStart && to >= from ? new int[] { from, to } : null;
1111    }
1112    }