Clover icon

Coverage Report

  1. Project Clover database Wed Jan 7 2026 02:39:37 GMT
  2. Package jalview.analysis

File AlignmentUtils.java

 

Coverage histogram

../../img/srcFileCovDistChart9.png
12% of files have more coverage

Code metrics

514
1,034
68
2
3,494
2,188
397
0.38
15.21
34
5.84

Classes

Class Line # Actions
AlignmentUtils 84 1,028 391
0.838327183.8%
AlignmentUtils.DnaVariant 100 6 6
0.00%
 

Contributing tests

This file is covered by 336 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.analysis;
22   
23    import java.awt.Color;
24    import java.util.ArrayList;
25    import java.util.Arrays;
26    import java.util.Collection;
27    import java.util.Collections;
28    import java.util.HashMap;
29    import java.util.HashSet;
30    import java.util.Iterator;
31    import java.util.LinkedHashMap;
32    import java.util.List;
33    import java.util.Locale;
34    import java.util.Map;
35    import java.util.Map.Entry;
36    import java.util.NoSuchElementException;
37    import java.util.Set;
38    import java.util.SortedMap;
39    import java.util.TreeMap;
40    import java.util.Vector;
41    import java.util.stream.Collectors;
42   
43    import jalview.bin.Console;
44    import jalview.commands.RemoveGapColCommand;
45    import jalview.datamodel.AlignedCodon;
46    import jalview.datamodel.AlignedCodonFrame;
47    import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
48    import jalview.datamodel.Alignment;
49    import jalview.datamodel.AlignmentAnnotation;
50    import jalview.datamodel.AlignmentI;
51    import jalview.datamodel.Annotation;
52    import jalview.datamodel.ContactMatrixI;
53    import jalview.datamodel.DBRefEntry;
54    import jalview.datamodel.DBRefSource;
55    import jalview.datamodel.GeneLociI;
56    import jalview.datamodel.HiddenColumns;
57    import jalview.datamodel.IncompleteCodonException;
58    import jalview.datamodel.Mapping;
59    import jalview.datamodel.PDBEntry;
60    import jalview.datamodel.Sequence;
61    import jalview.datamodel.SequenceFeature;
62    import jalview.datamodel.SequenceGroup;
63    import jalview.datamodel.SequenceI;
64    import jalview.datamodel.features.SequenceFeatures;
65    import jalview.io.gff.SequenceOntologyI;
66    import jalview.schemes.ResidueProperties;
67    import jalview.util.ColorUtils;
68    import jalview.util.Comparison;
69    import jalview.util.Constants;
70    import jalview.util.DBRefUtils;
71    import jalview.util.IntRangeComparator;
72    import jalview.util.MapList;
73    import jalview.util.MappingUtils;
74    import jalview.util.ShiftList;
75    import uk.ac.vamsas.objects.core.descriptors.NewickDescriptor;
76   
77    /**
78    * grab bag of useful alignment manipulation operations Expect these to be
79    * refactored elsewhere at some point.
80    *
81    * @author jimp
82    *
83    */
 
84    public class AlignmentUtils
85    {
86    private static final int CODON_LENGTH = 3;
87   
88    private static final String SEQUENCE_VARIANT = "sequence_variant:";
89   
90    /*
91    * the 'id' attribute is provided for variant features fetched from
92    * Ensembl using its REST service with JSON format
93    */
94    public static final String VARIANT_ID = "id";
95   
96    /**
97    * A data model to hold the 'normal' base value at a position, and an optional
98    * sequence variant feature
99    */
 
100    static final class DnaVariant
101    {
102    final String base;
103   
104    SequenceFeature variant;
105   
 
106  0 toggle DnaVariant(String nuc)
107    {
108  0 base = nuc;
109  0 variant = null;
110    }
111   
 
112  0 toggle DnaVariant(String nuc, SequenceFeature var)
113    {
114  0 base = nuc;
115  0 variant = var;
116    }
117   
 
118  0 toggle public String getSource()
119    {
120  0 return variant == null ? null : variant.getFeatureGroup();
121    }
122   
123    /**
124    * toString for aid in the debugger only
125    */
 
126  0 toggle @Override
127    public String toString()
128    {
129  0 return base + ":" + (variant == null ? "" : variant.getDescription());
130    }
131    }
132   
133    /**
134    * given an existing alignment, create a new alignment including all, or up to
135    * flankSize additional symbols from each sequence's dataset sequence
136    *
137    * @param core
138    * @param flankSize
139    * @return AlignmentI
140    */
 
141  27 toggle public static AlignmentI expandContext(AlignmentI core, int flankSize)
142    {
143  27 List<SequenceI> sq = new ArrayList<>();
144  27 int maxoffset = 0;
145  27 for (SequenceI s : core.getSequences())
146    {
147  131 SequenceI newSeq = s.deriveSequence();
148  131 final int newSeqStart = newSeq.getStart() - 1;
149  131 if (newSeqStart > maxoffset
150    && newSeq.getDatasetSequence().getStart() < s.getStart())
151    {
152  131 maxoffset = newSeqStart;
153    }
154  131 sq.add(newSeq);
155    }
156  27 if (flankSize > -1)
157    {
158  25 maxoffset = Math.min(maxoffset, flankSize);
159    }
160   
161    /*
162    * now add offset left and right to create an expanded alignment
163    */
164  27 for (SequenceI s : sq)
165    {
166  131 SequenceI ds = s;
167  262 while (ds.getDatasetSequence() != null)
168    {
169  131 ds = ds.getDatasetSequence();
170    }
171  131 int s_end = s.findPosition(s.getStart() + s.getLength());
172    // find available flanking residues for sequence
173  131 int ustream_ds = s.getStart() - ds.getStart();
174  131 int dstream_ds = ds.getEnd() - s_end;
175   
176    // build new flanked sequence
177   
178    // compute gap padding to start of flanking sequence
179  131 int offset = maxoffset - ustream_ds;
180   
181    // padding is gapChar x ( maxoffset - min(ustream_ds, flank)
182  131 if (flankSize >= 0)
183    {
184  125 if (flankSize < ustream_ds)
185    {
186    // take up to flankSize residues
187  40 offset = maxoffset - flankSize;
188  40 ustream_ds = flankSize;
189    }
190  125 if (flankSize <= dstream_ds)
191    {
192  116 dstream_ds = flankSize - 1;
193    }
194    }
195    // TODO use Character.toLowerCase to avoid creating String objects?
196  131 char[] upstream = new String(ds
197    .getSequence(s.getStart() - 1 - ustream_ds, s.getStart() - 1))
198    .toLowerCase(Locale.ROOT).toCharArray();
199  131 char[] downstream = new String(
200    ds.getSequence(s_end - 1, s_end + dstream_ds))
201    .toLowerCase(Locale.ROOT).toCharArray();
202  131 char[] coreseq = s.getSequence();
203  131 char[] nseq = new char[offset + upstream.length + downstream.length
204    + coreseq.length];
205  131 char c = core.getGapCharacter();
206   
207  131 int p = 0;
208  461 for (; p < offset; p++)
209    {
210  330 nseq[p] = c;
211    }
212   
213  131 System.arraycopy(upstream, 0, nseq, p, upstream.length);
214  131 System.arraycopy(coreseq, 0, nseq, p + upstream.length,
215    coreseq.length);
216  131 System.arraycopy(downstream, 0, nseq,
217    p + coreseq.length + upstream.length, downstream.length);
218  131 s.setSequence(new String(nseq));
219  131 s.setStart(s.getStart() - ustream_ds);
220  131 s.setEnd(s_end + downstream.length);
221    }
222  27 AlignmentI newAl = new jalview.datamodel.Alignment(
223    sq.toArray(new SequenceI[0]));
224  27 for (SequenceI s : sq)
225    {
226  131 if (s.getAnnotation() != null)
227    {
228  1 for (AlignmentAnnotation aa : s.getAnnotation())
229    {
230  1 aa.adjustForAlignment(); // JAL-1712 fix
231  1 newAl.addAnnotation(aa);
232    }
233    }
234    }
235  27 newAl.setDataset(core.getDataset());
236  27 return newAl;
237    }
238   
239    /**
240    * Returns the index (zero-based position) of a sequence in an alignment, or
241    * -1 if not found.
242    *
243    * @param al
244    * @param seq
245    * @return
246    */
 
247  64304 toggle public static int getSequenceIndex(AlignmentI al, SequenceI seq)
248    {
249  64304 int result = -1;
250  64304 int pos = 0;
251  64304 for (SequenceI alSeq : al.getSequences())
252    {
253  126221607 if (alSeq == seq)
254    {
255  64241 result = pos;
256  64241 break;
257    }
258  126157362 pos++;
259    }
260  64302 return result;
261    }
262   
263    /**
264    * Returns a map of lists of sequences in the alignment, keyed by sequence
265    * name. For use in mapping between different alignment views of the same
266    * sequences.
267    *
268    * @see jalview.datamodel.AlignmentI#getSequencesByName()
269    */
 
270  1 toggle public static Map<String, List<SequenceI>> getSequencesByName(
271    AlignmentI al)
272    {
273  1 Map<String, List<SequenceI>> theMap = new LinkedHashMap<>();
274  1 for (SequenceI seq : al.getSequences())
275    {
276  3 String name = seq.getName();
277  3 if (name != null)
278    {
279  3 List<SequenceI> seqs = theMap.get(name);
280  3 if (seqs == null)
281    {
282  2 seqs = new ArrayList<>();
283  2 theMap.put(name, seqs);
284    }
285  3 seqs.add(seq);
286    }
287    }
288  1 return theMap;
289    }
290   
291    /**
292    * Build mapping of protein to cDNA alignment. Mappings are made between
293    * sequences where the cDNA translates to the protein sequence. Any new
294    * mappings are added to the protein alignment. Returns true if any mappings
295    * either already exist or were added, else false.
296    *
297    * @param proteinAlignment
298    * @param cdnaAlignment
299    * @return
300    */
 
301  4 toggle public static boolean mapProteinAlignmentToCdna(
302    final AlignmentI proteinAlignment, final AlignmentI cdnaAlignment)
303    {
304  4 if (proteinAlignment == null || cdnaAlignment == null)
305    {
306  0 return false;
307    }
308   
309  4 Set<SequenceI> mappedDna = new HashSet<>();
310  4 Set<SequenceI> mappedProtein = new HashSet<>();
311   
312    /*
313    * First pass - map sequences where cross-references exist. This include
314    * 1-to-many mappings to support, for example, variant cDNA.
315    */
316  4 boolean mappingPerformed = mapProteinToCdna(proteinAlignment,
317    cdnaAlignment, mappedDna, mappedProtein, true);
318   
319    /*
320    * Second pass - map sequences where no cross-references exist. This only
321    * does 1-to-1 mappings and assumes corresponding sequences are in the same
322    * order in the alignments.
323    */
324  4 mappingPerformed |= mapProteinToCdna(proteinAlignment, cdnaAlignment,
325    mappedDna, mappedProtein, false);
326  4 return mappingPerformed;
327    }
328   
329    /**
330    * Make mappings between compatible sequences (where the cDNA translation
331    * matches the protein).
332    *
333    * @param proteinAlignment
334    * @param cdnaAlignment
335    * @param mappedDna
336    * a set of mapped DNA sequences (to add to)
337    * @param mappedProtein
338    * a set of mapped Protein sequences (to add to)
339    * @param xrefsOnly
340    * if true, only map sequences where xrefs exist
341    * @return
342    */
 
343  8 toggle protected static boolean mapProteinToCdna(
344    final AlignmentI proteinAlignment, final AlignmentI cdnaAlignment,
345    Set<SequenceI> mappedDna, Set<SequenceI> mappedProtein,
346    boolean xrefsOnly)
347    {
348  8 boolean mappingExistsOrAdded = false;
349  8 List<SequenceI> thisSeqs = proteinAlignment.getSequences();
350  8 for (SequenceI aaSeq : thisSeqs)
351    {
352  22 boolean proteinMapped = false;
353  22 AlignedCodonFrame acf = new AlignedCodonFrame();
354   
355  22 for (SequenceI cdnaSeq : cdnaAlignment.getSequences())
356    {
357    /*
358    * Always try to map if sequences have xref to each other; this supports
359    * variant cDNA or alternative splicing for a protein sequence.
360    *
361    * If no xrefs, try to map progressively, assuming that alignments have
362    * mappable sequences in corresponding order. These are not
363    * many-to-many, as that would risk mixing species with similar cDNA
364    * sequences.
365    */
366  86 if (xrefsOnly && !AlignmentUtils.haveCrossRef(aaSeq, cdnaSeq))
367    {
368  39 continue;
369    }
370   
371    /*
372    * Don't map non-xrefd sequences more than once each. This heuristic
373    * allows us to pair up similar sequences in ordered alignments.
374    */
375  47 if (!xrefsOnly && (mappedProtein.contains(aaSeq)
376    || mappedDna.contains(cdnaSeq)))
377    {
378  29 continue;
379    }
380  18 if (mappingExists(proteinAlignment.getCodonFrames(),
381    aaSeq.getDatasetSequence(), cdnaSeq.getDatasetSequence()))
382    {
383  0 mappingExistsOrAdded = true;
384    }
385    else
386    {
387  18 MapList map = mapCdnaToProtein(aaSeq, cdnaSeq);
388  18 if (map != null)
389    {
390  12 acf.addMap(cdnaSeq, aaSeq, map);
391  12 mappingExistsOrAdded = true;
392  12 proteinMapped = true;
393  12 mappedDna.add(cdnaSeq);
394  12 mappedProtein.add(aaSeq);
395    }
396    }
397    }
398  22 if (proteinMapped)
399    {
400  11 proteinAlignment.addCodonFrame(acf);
401    }
402    }
403  8 return mappingExistsOrAdded;
404    }
405   
406    /**
407    * Answers true if the mappings include one between the given (dataset)
408    * sequences.
409    */
 
410  30 toggle protected static boolean mappingExists(List<AlignedCodonFrame> mappings,
411    SequenceI aaSeq, SequenceI cdnaSeq)
412    {
413  30 if (mappings != null)
414    {
415  30 for (AlignedCodonFrame acf : mappings)
416    {
417  20 if (cdnaSeq == acf.getDnaForAaSeq(aaSeq))
418    {
419  0 return true;
420    }
421    }
422    }
423  30 return false;
424    }
425   
426    /**
427    * Builds a mapping (if possible) of a cDNA to a protein sequence.
428    * <ul>
429    * <li>first checks if the cdna translates exactly to the protein
430    * sequence</li>
431    * <li>else checks for translation after removing a STOP codon</li>
432    * <li>else checks for translation after removing a START codon</li>
433    * <li>if that fails, inspect CDS features on the cDNA sequence</li>
434    * </ul>
435    * Returns null if no mapping is determined.
436    *
437    * @param proteinSeq
438    * the aligned protein sequence
439    * @param cdnaSeq
440    * the aligned cdna sequence
441    * @return
442    */
 
443  32 toggle public static MapList mapCdnaToProtein(SequenceI proteinSeq,
444    SequenceI cdnaSeq)
445    {
446    /*
447    * Here we handle either dataset sequence set (desktop) or absent (applet).
448    * Use only the char[] form of the sequence to avoid creating possibly large
449    * String objects.
450    */
451  32 final SequenceI proteinDataset = proteinSeq.getDatasetSequence();
452  32 char[] aaSeqChars = proteinDataset != null
453    ? proteinDataset.getSequence()
454    : proteinSeq.getSequence();
455  32 final SequenceI cdnaDataset = cdnaSeq.getDatasetSequence();
456  32 char[] cdnaSeqChars = cdnaDataset != null ? cdnaDataset.getSequence()
457    : cdnaSeq.getSequence();
458  32 if (aaSeqChars == null || cdnaSeqChars == null)
459    {
460  0 return null;
461    }
462   
463    /*
464    * cdnaStart/End, proteinStartEnd are base 1 (for dataset sequence mapping)
465    */
466  32 final int mappedLength = CODON_LENGTH * aaSeqChars.length;
467  32 int cdnaLength = cdnaSeqChars.length;
468  32 int cdnaStart = cdnaSeq.getStart();
469  32 int cdnaEnd = cdnaSeq.getEnd();
470  32 final int proteinStart = proteinSeq.getStart();
471  32 final int proteinEnd = proteinSeq.getEnd();
472   
473    /*
474    * If lengths don't match, try ignoring stop codon (if present)
475    */
476  32 if (cdnaLength != mappedLength && cdnaLength > 2)
477    {
478  16 String lastCodon = String.valueOf(cdnaSeqChars,
479    cdnaLength - CODON_LENGTH, CODON_LENGTH)
480    .toUpperCase(Locale.ROOT);
481  16 for (String stop : ResidueProperties.STOP_CODONS)
482    {
483  47 if (lastCodon.equals(stop))
484    {
485  2 cdnaEnd -= CODON_LENGTH;
486  2 cdnaLength -= CODON_LENGTH;
487  2 break;
488    }
489    }
490    }
491   
492    /*
493    * If lengths still don't match, try ignoring start codon.
494    */
495  32 int startOffset = 0;
496  32 if (cdnaLength != mappedLength && cdnaLength > 2
497    && String.valueOf(cdnaSeqChars, 0, CODON_LENGTH)
498    .toUpperCase(Locale.ROOT)
499    .equals(ResidueProperties.START))
500    {
501  5 startOffset += CODON_LENGTH;
502  5 cdnaStart += CODON_LENGTH;
503  5 cdnaLength -= CODON_LENGTH;
504    }
505   
506  32 if (translatesAs(cdnaSeqChars, startOffset, aaSeqChars))
507    {
508    /*
509    * protein is translation of dna (+/- start/stop codons)
510    */
511  15 MapList map = new MapList(new int[] { cdnaStart, cdnaEnd },
512    new int[]
513    { proteinStart, proteinEnd }, CODON_LENGTH, 1);
514  15 return map;
515    }
516   
517    /*
518    * translation failed - try mapping CDS annotated regions of dna
519    */
520  17 return mapCdsToProtein(cdnaSeq, proteinSeq);
521    }
522   
523    /**
524    * Test whether the given cdna sequence, starting at the given offset,
525    * translates to the given amino acid sequence, using the standard translation
526    * table. Designed to fail fast i.e. as soon as a mismatch position is found.
527    *
528    * @param cdnaSeqChars
529    * @param cdnaStart
530    * @param aaSeqChars
531    * @return
532    */
 
533  52 toggle protected static boolean translatesAs(char[] cdnaSeqChars, int cdnaStart,
534    char[] aaSeqChars)
535    {
536  52 if (cdnaSeqChars == null || aaSeqChars == null)
537    {
538  3 return false;
539    }
540   
541  49 int aaPos = 0;
542  49 int dnaPos = cdnaStart;
543  162 for (; dnaPos < cdnaSeqChars.length - 2
544    && aaPos < aaSeqChars.length; dnaPos += CODON_LENGTH, aaPos++)
545    {
546  131 String codon = String.valueOf(cdnaSeqChars, dnaPos, CODON_LENGTH);
547  131 final String translated = ResidueProperties.codonTranslate(codon);
548   
549    /*
550    * allow * in protein to match untranslatable in dna
551    */
552  131 final char aaRes = aaSeqChars[aaPos];
553  131 if ((translated == null || ResidueProperties.STOP.equals(translated))
554    && aaRes == '*')
555    {
556  4 continue;
557    }
558  127 if (translated == null || !(aaRes == translated.charAt(0)))
559    {
560    // debug
561    // jalview.bin.Console.outPrintln(("Mismatch at " + i + "/" + aaResidue
562    // + ": "
563    // + codon + "(" + translated + ") != " + aaRes));
564  18 return false;
565    }
566    }
567   
568    /*
569    * check we matched all of the protein sequence
570    */
571  31 if (aaPos != aaSeqChars.length)
572    {
573  2 return false;
574    }
575   
576    /*
577    * check we matched all of the dna except
578    * for optional trailing STOP codon
579    */
580  29 if (dnaPos == cdnaSeqChars.length)
581    {
582  17 return true;
583    }
584  12 if (dnaPos == cdnaSeqChars.length - CODON_LENGTH)
585    {
586  11 String codon = String.valueOf(cdnaSeqChars, dnaPos, CODON_LENGTH);
587  11 if (ResidueProperties.STOP
588    .equals(ResidueProperties.codonTranslate(codon)))
589    {
590  9 return true;
591    }
592    }
593  3 return false;
594    }
595   
596    /**
597    * Align sequence 'seq' to match the alignment of a mapped sequence. Note this
598    * currently assumes that we are aligning cDNA to match protein.
599    *
600    * @param seq
601    * the sequence to be realigned
602    * @param al
603    * the alignment whose sequence alignment is to be 'copied'
604    * @param gap
605    * character string represent a gap in the realigned sequence
606    * @param preserveUnmappedGaps
607    * @param preserveMappedGaps
608    * @return true if the sequence was realigned, false if it could not be
609    */
 
610  8 toggle public static boolean alignSequenceAs(SequenceI seq, AlignmentI al,
611    String gap, boolean preserveMappedGaps,
612    boolean preserveUnmappedGaps)
613    {
614    /*
615    * Get any mappings from the source alignment to the target (dataset)
616    * sequence.
617    */
618    // TODO there may be one AlignedCodonFrame per dataset sequence, or one with
619    // all mappings. Would it help to constrain this?
620  8 List<AlignedCodonFrame> mappings = al.getCodonFrame(seq);
621  8 if (mappings == null || mappings.isEmpty())
622    {
623  0 return false;
624    }
625   
626    /*
627    * Locate the aligned source sequence whose dataset sequence is mapped. We
628    * just take the first match here (as we can't align like more than one
629    * sequence).
630    */
631  8 SequenceI alignFrom = null;
632  8 AlignedCodonFrame mapping = null;
633  8 for (AlignedCodonFrame mp : mappings)
634    {
635  8 alignFrom = mp.findAlignedSequence(seq, al);
636  8 if (alignFrom != null)
637    {
638  4 mapping = mp;
639  4 break;
640    }
641    }
642   
643  8 if (alignFrom == null)
644    {
645  4 return false;
646    }
647  4 alignSequenceAs(seq, alignFrom, mapping, gap, al.getGapCharacter(),
648    preserveMappedGaps, preserveUnmappedGaps);
649  4 return true;
650    }
651   
652    /**
653    * Align sequence 'alignTo' the same way as 'alignFrom', using the mapping to
654    * match residues and codons. Flags control whether existing gaps in unmapped
655    * (intron) and mapped (exon) regions are preserved or not. Gaps between
656    * intron and exon are only retained if both flags are set.
657    *
658    * @param alignTo
659    * @param alignFrom
660    * @param mapping
661    * @param myGap
662    * @param sourceGap
663    * @param preserveUnmappedGaps
664    * @param preserveMappedGaps
665    */
 
666  19 toggle public static void alignSequenceAs(SequenceI alignTo, SequenceI alignFrom,
667    AlignedCodonFrame mapping, String myGap, char sourceGap,
668    boolean preserveMappedGaps, boolean preserveUnmappedGaps)
669    {
670    // TODO generalise to work for Protein-Protein, dna-dna, dna-protein
671   
672    // aligned and dataset sequence positions, all base zero
673  19 int thisSeqPos = 0;
674  19 int sourceDsPos = 0;
675   
676  19 int basesWritten = 0;
677  19 char myGapChar = myGap.charAt(0);
678  19 int ratio = myGap.length();
679   
680  19 int fromOffset = alignFrom.getStart() - 1;
681  19 int toOffset = alignTo.getStart() - 1;
682  19 int sourceGapMappedLength = 0;
683  19 boolean inExon = false;
684  19 final int toLength = alignTo.getLength();
685  19 final int fromLength = alignFrom.getLength();
686  19 StringBuilder thisAligned = new StringBuilder(2 * toLength);
687   
688    /*
689    * Traverse the 'model' aligned sequence
690    */
691  205 for (int i = 0; i < fromLength; i++)
692    {
693  186 char sourceChar = alignFrom.getCharAt(i);
694  186 if (sourceChar == sourceGap)
695    {
696  44 sourceGapMappedLength += ratio;
697  44 continue;
698    }
699   
700    /*
701    * Found a non-gap character. Locate its mapped region if any.
702    */
703  142 sourceDsPos++;
704    // Note mapping positions are base 1, our sequence positions base 0
705  142 int[] mappedPos = mapping.getMappedRegion(alignTo, alignFrom,
706    sourceDsPos + fromOffset);
707  142 if (mappedPos == null)
708    {
709    /*
710    * unmapped position; treat like a gap
711    */
712  94 sourceGapMappedLength += ratio;
713    // jalview.bin.Console.errPrintln("Can't align: no codon mapping to
714    // residue "
715    // + sourceDsPos + "(" + sourceChar + ")");
716    // return;
717  94 continue;
718    }
719   
720  48 int mappedCodonStart = mappedPos[0]; // position (1...) of codon start
721  48 int mappedCodonEnd = mappedPos[mappedPos.length - 1]; // codon end pos
722  48 StringBuilder trailingCopiedGap = new StringBuilder();
723   
724    /*
725    * Copy dna sequence up to and including this codon. Optionally, include
726    * gaps before the codon starts (in introns) and/or after the codon starts
727    * (in exons).
728    *
729    * Note this only works for 'linear' splicing, not reverse or interleaved.
730    * But then 'align dna as protein' doesn't make much sense otherwise.
731    */
732  48 int intronLength = 0;
733  294 while (basesWritten + toOffset < mappedCodonEnd
734    && thisSeqPos < toLength)
735    {
736  246 final char c = alignTo.getCharAt(thisSeqPos++);
737  246 if (c != myGapChar)
738    {
739  146 basesWritten++;
740  146 int sourcePosition = basesWritten + toOffset;
741  146 if (sourcePosition < mappedCodonStart)
742    {
743    /*
744    * Found an unmapped (intron) base. First add in any preceding gaps
745    * (if wanted).
746    */
747  48 if (preserveUnmappedGaps && trailingCopiedGap.length() > 0)
748    {
749  17 thisAligned.append(trailingCopiedGap.toString());
750  17 intronLength += trailingCopiedGap.length();
751  17 trailingCopiedGap = new StringBuilder();
752    }
753  48 intronLength++;
754  48 inExon = false;
755    }
756    else
757    {
758  98 final boolean startOfCodon = sourcePosition == mappedCodonStart;
759  98 int gapsToAdd = calculateGapsToInsert(preserveMappedGaps,
760    preserveUnmappedGaps, sourceGapMappedLength, inExon,
761    trailingCopiedGap.length(), intronLength, startOfCodon);
762  215 for (int k = 0; k < gapsToAdd; k++)
763    {
764  117 thisAligned.append(myGapChar);
765    }
766  98 sourceGapMappedLength = 0;
767  98 inExon = true;
768    }
769  146 thisAligned.append(c);
770  146 trailingCopiedGap = new StringBuilder();
771    }
772    else
773    {
774  100 if (inExon && preserveMappedGaps)
775    {
776  32 trailingCopiedGap.append(myGapChar);
777    }
778  68 else if (!inExon && preserveUnmappedGaps)
779    {
780  27 trailingCopiedGap.append(myGapChar);
781    }
782    }
783    }
784    }
785   
786    /*
787    * At end of model aligned sequence. Copy any remaining target sequence, optionally
788    * including (intron) gaps.
789    */
790  129 while (thisSeqPos < toLength)
791    {
792  110 final char c = alignTo.getCharAt(thisSeqPos++);
793  110 if (c != myGapChar || preserveUnmappedGaps)
794    {
795  102 thisAligned.append(c);
796    }
797  110 sourceGapMappedLength--;
798    }
799   
800    /*
801    * finally add gaps to pad for any trailing source gaps or
802    * unmapped characters
803    */
804  19 if (preserveUnmappedGaps)
805    {
806  24 while (sourceGapMappedLength > 0)
807    {
808  12 thisAligned.append(myGapChar);
809  12 sourceGapMappedLength--;
810    }
811    }
812   
813    /*
814    * All done aligning, set the aligned sequence.
815    */
816  19 alignTo.setSequence(new String(thisAligned));
817    }
818   
819    /**
820    * Helper method to work out how many gaps to insert when realigning.
821    *
822    * @param preserveMappedGaps
823    * @param preserveUnmappedGaps
824    * @param sourceGapMappedLength
825    * @param inExon
826    * @param trailingCopiedGap
827    * @param intronLength
828    * @param startOfCodon
829    * @return
830    */
 
831  98 toggle protected static int calculateGapsToInsert(boolean preserveMappedGaps,
832    boolean preserveUnmappedGaps, int sourceGapMappedLength,
833    boolean inExon, int trailingGapLength, int intronLength,
834    final boolean startOfCodon)
835    {
836  98 int gapsToAdd = 0;
837  98 if (startOfCodon)
838    {
839    /*
840    * Reached start of codon. Ignore trailing gaps in intron unless we are
841    * preserving gaps in both exon and intron. Ignore them anyway if the
842    * protein alignment introduces a gap at least as large as the intronic
843    * region.
844    */
845  40 if (inExon && !preserveMappedGaps)
846    {
847  4 trailingGapLength = 0;
848    }
849  40 if (!inExon && !(preserveMappedGaps && preserveUnmappedGaps))
850    {
851  19 trailingGapLength = 0;
852    }
853  40 if (inExon)
854    {
855  14 gapsToAdd = Math.max(sourceGapMappedLength, trailingGapLength);
856    }
857    else
858    {
859  26 if (intronLength + trailingGapLength <= sourceGapMappedLength)
860    {
861  20 gapsToAdd = sourceGapMappedLength - intronLength;
862    }
863    else
864    {
865  6 gapsToAdd = Math.min(
866    intronLength + trailingGapLength - sourceGapMappedLength,
867    trailingGapLength);
868    }
869    }
870    }
871    else
872    {
873    /*
874    * second or third base of codon; check for any gaps in dna
875    */
876  58 if (!preserveMappedGaps)
877    {
878  32 trailingGapLength = 0;
879    }
880  58 gapsToAdd = Math.max(sourceGapMappedLength, trailingGapLength);
881    }
882  98 return gapsToAdd;
883    }
884   
885    /**
886    * Realigns the given protein to match the alignment of the dna, using codon
887    * mappings to translate aligned codon positions to protein residues.
888    *
889    * @param protein
890    * the alignment whose sequences are realigned by this method
891    * @param dna
892    * the dna alignment whose alignment we are 'copying'
893    * @return the number of sequences that were realigned
894    */
 
895  5 toggle public static int alignProteinAsDna(AlignmentI protein, AlignmentI dna)
896    {
897  5 if (protein.isNucleotide() || !dna.isNucleotide())
898    {
899  0 jalview.bin.Console
900    .errPrintln("Wrong alignment type in alignProteinAsDna");
901  0 return 0;
902    }
903  5 List<SequenceI> unmappedProtein = new ArrayList<>();
904  5 Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons = buildCodonColumnsMap(
905    protein, dna, unmappedProtein);
906  5 return alignProteinAs(protein, alignedCodons, unmappedProtein);
907    }
908   
909    /**
910    * Realigns the given dna to match the alignment of the protein, using codon
911    * mappings to translate aligned peptide positions to codons.
912    *
913    * Always produces a padded CDS alignment.
914    *
915    * @param dna
916    * the alignment whose sequences are realigned by this method
917    * @param protein
918    * the protein alignment whose alignment we are 'copying'
919    * @return the number of sequences that were realigned
920    */
 
921  4 toggle public static int alignCdsAsProtein(AlignmentI dna, AlignmentI protein)
922    {
923  4 if (protein.isNucleotide() || !dna.isNucleotide())
924    {
925  0 jalview.bin.Console
926    .errPrintln("Wrong alignment type in alignProteinAsDna");
927  0 return 0;
928    }
929    // todo: implement this
930  4 List<AlignedCodonFrame> mappings = protein.getCodonFrames();
931  4 int alignedCount = 0;
932  4 int width = 0; // alignment width for padding CDS
933  4 for (SequenceI dnaSeq : dna.getSequences())
934    {
935  5 if (alignCdsSequenceAsProtein(dnaSeq, protein, mappings,
936    dna.getGapCharacter()))
937    {
938  5 alignedCount++;
939    }
940  5 width = Math.max(dnaSeq.getLength(), width);
941    }
942  4 int oldwidth;
943  4 int diff;
944  4 for (SequenceI dnaSeq : dna.getSequences())
945    {
946  5 oldwidth = dnaSeq.getLength();
947  5 diff = width - oldwidth;
948  5 if (diff > 0)
949    {
950  1 dnaSeq.insertCharAt(oldwidth, diff, dna.getGapCharacter());
951    }
952    }
953  4 return alignedCount;
954    }
955   
956    /**
957    * Helper method to align (if possible) the dna sequence to match the
958    * alignment of a mapped protein sequence. This is currently limited to
959    * handling coding sequence only.
960    *
961    * @param cdsSeq
962    * @param protein
963    * @param mappings
964    * @param gapChar
965    * @return
966    */
 
967  5 toggle static boolean alignCdsSequenceAsProtein(SequenceI cdsSeq,
968    AlignmentI protein, List<AlignedCodonFrame> mappings,
969    char gapChar)
970    {
971  5 SequenceI cdsDss = cdsSeq.getDatasetSequence();
972  5 if (cdsDss == null)
973    {
974  0 System.err
975    .println("alignCdsSequenceAsProtein needs aligned sequence!");
976  0 return false;
977    }
978   
979  5 List<AlignedCodonFrame> dnaMappings = MappingUtils
980    .findMappingsForSequence(cdsSeq, mappings);
981  5 for (AlignedCodonFrame mapping : dnaMappings)
982    {
983  5 SequenceI peptide = mapping.findAlignedSequence(cdsSeq, protein);
984  5 if (peptide != null)
985    {
986  5 final int peptideLength = peptide.getLength();
987  5 Mapping map = mapping.getMappingBetween(cdsSeq, peptide);
988  5 if (map != null)
989    {
990  5 MapList mapList = map.getMap();
991  5 if (map.getTo() == peptide.getDatasetSequence())
992    {
993  5 mapList = mapList.getInverse();
994    }
995  5 final int cdsLength = cdsDss.getLength();
996  5 int mappedFromLength = MappingUtils
997    .getLength(mapList.getFromRanges());
998  5 int mappedToLength = MappingUtils
999    .getLength(mapList.getToRanges());
1000  5 boolean addStopCodon = (cdsLength == mappedFromLength
1001    * CODON_LENGTH + CODON_LENGTH)
1002    || (peptide.getDatasetSequence()
1003    .getLength() == mappedFromLength - 1);
1004  5 if (cdsLength != mappedToLength && !addStopCodon)
1005    {
1006  0 jalview.bin.Console.errPrintln(String.format(
1007    "Can't align cds as protein (length mismatch %d/%d): %s",
1008    cdsLength, mappedToLength, cdsSeq.getName()));
1009    }
1010   
1011    /*
1012    * pre-fill the aligned cds sequence with gaps
1013    */
1014  5 char[] alignedCds = new char[peptideLength * CODON_LENGTH
1015  5 + (addStopCodon ? CODON_LENGTH : 0)];
1016  5 Arrays.fill(alignedCds, gapChar);
1017   
1018    /*
1019    * walk over the aligned peptide sequence and insert mapped
1020    * codons for residues in the aligned cds sequence
1021    */
1022  5 int copiedBases = 0;
1023  5 int cdsStart = cdsDss.getStart();
1024  5 int proteinPos = peptide.getStart() - 1;
1025  5 int cdsCol = 0;
1026   
1027  33 for (int col = 0; col < peptideLength; col++)
1028    {
1029  28 char residue = peptide.getCharAt(col);
1030   
1031  28 if (Comparison.isGap(residue))
1032    {
1033  13 cdsCol += CODON_LENGTH;
1034    }
1035    else
1036    {
1037  15 proteinPos++;
1038  15 int[] codon = mapList.locateInTo(proteinPos, proteinPos);
1039  15 if (codon == null)
1040    {
1041    // e.g. incomplete start codon, X in peptide
1042  0 cdsCol += CODON_LENGTH;
1043    }
1044    else
1045    {
1046  60 for (int j = codon[0]; j <= codon[1]; j++)
1047    {
1048  45 char mappedBase = cdsDss.getCharAt(j - cdsStart);
1049  45 alignedCds[cdsCol++] = mappedBase;
1050  45 copiedBases++;
1051    }
1052    }
1053    }
1054    }
1055   
1056    /*
1057    * append stop codon if not mapped from protein,
1058    * closing it up to the end of the mapped sequence
1059    */
1060  5 if (copiedBases == cdsLength - CODON_LENGTH)
1061    {
1062  0 for (int i = alignedCds.length - 1; i >= 0; i--)
1063    {
1064  0 if (!Comparison.isGap(alignedCds[i]))
1065    {
1066  0 cdsCol = i + 1; // gap just after end of sequence
1067  0 break;
1068    }
1069    }
1070  0 for (int i = cdsLength - CODON_LENGTH; i < cdsLength; i++)
1071    {
1072  0 alignedCds[cdsCol++] = cdsDss.getCharAt(i);
1073    }
1074    }
1075  5 cdsSeq.setSequence(new String(alignedCds));
1076  5 return true;
1077    }
1078    }
1079    }
1080  0 return false;
1081    }
1082   
1083    /**
1084    * Builds a map whose key is an aligned codon position (3 alignment column
1085    * numbers base 0), and whose value is a map from protein sequence to each
1086    * protein's peptide residue for that codon. The map generates an ordering of
1087    * the codons, and allows us to read off the peptides at each position in
1088    * order to assemble 'aligned' protein sequences.
1089    *
1090    * @param protein
1091    * the protein alignment
1092    * @param dna
1093    * the coding dna alignment
1094    * @param unmappedProtein
1095    * any unmapped proteins are added to this list
1096    * @return
1097    */
 
1098  5 toggle protected static Map<AlignedCodon, Map<SequenceI, AlignedCodon>> buildCodonColumnsMap(
1099    AlignmentI protein, AlignmentI dna,
1100    List<SequenceI> unmappedProtein)
1101    {
1102    /*
1103    * maintain a list of any proteins with no mappings - these will be
1104    * rendered 'as is' in the protein alignment as we can't align them
1105    */
1106  5 unmappedProtein.addAll(protein.getSequences());
1107   
1108  5 List<AlignedCodonFrame> mappings = protein.getCodonFrames();
1109   
1110    /*
1111    * Map will hold, for each aligned codon position e.g. [3, 5, 6], a map of
1112    * {dnaSequence, {proteinSequence, codonProduct}} at that position. The
1113    * comparator keeps the codon positions ordered.
1114    */
1115  5 Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons = new TreeMap<>(
1116    new CodonComparator());
1117   
1118  5 for (SequenceI dnaSeq : dna.getSequences())
1119    {
1120  30 for (AlignedCodonFrame mapping : mappings)
1121    {
1122  516 SequenceI prot = mapping.findAlignedSequence(dnaSeq, protein);
1123  516 if (prot != null)
1124    {
1125  32 Mapping seqMap = mapping.getMappingForSequence(dnaSeq);
1126  32 addCodonPositions(dnaSeq, prot, protein.getGapCharacter(), seqMap,
1127    alignedCodons);
1128  32 unmappedProtein.remove(prot);
1129    }
1130    }
1131    }
1132   
1133    /*
1134    * Finally add any unmapped peptide start residues (e.g. for incomplete
1135    * codons) as if at the codon position before the second residue
1136    */
1137    // TODO resolve JAL-2022 so this fudge can be removed
1138  5 int mappedSequenceCount = protein.getHeight() - unmappedProtein.size();
1139  5 addUnmappedPeptideStarts(alignedCodons, mappedSequenceCount);
1140   
1141  5 return alignedCodons;
1142    }
1143   
1144    /**
1145    * Scans for any protein mapped from position 2 (meaning unmapped start
1146    * position e.g. an incomplete codon), and synthesizes a 'codon' for it at the
1147    * preceding position in the alignment
1148    *
1149    * @param alignedCodons
1150    * the codon-to-peptide map
1151    * @param mappedSequenceCount
1152    * the number of distinct sequences in the map
1153    */
 
1154  5 toggle protected static void addUnmappedPeptideStarts(
1155    Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons,
1156    int mappedSequenceCount)
1157    {
1158    // TODO delete this ugly hack once JAL-2022 is resolved
1159    // i.e. we can model startPhase > 0 (incomplete start codon)
1160   
1161  5 List<SequenceI> sequencesChecked = new ArrayList<>();
1162  5 AlignedCodon lastCodon = null;
1163  5 Map<SequenceI, AlignedCodon> toAdd = new HashMap<>();
1164   
1165  5 for (Entry<AlignedCodon, Map<SequenceI, AlignedCodon>> entry : alignedCodons
1166    .entrySet())
1167    {
1168  1913 for (Entry<SequenceI, AlignedCodon> sequenceCodon : entry.getValue()
1169    .entrySet())
1170    {
1171  10670 SequenceI seq = sequenceCodon.getKey();
1172  10670 if (sequencesChecked.contains(seq))
1173    {
1174  10640 continue;
1175    }
1176  30 sequencesChecked.add(seq);
1177  30 AlignedCodon codon = sequenceCodon.getValue();
1178  30 if (codon.peptideCol > 1)
1179    {
1180  0 jalview.bin.Console.errPrintln(
1181    "Problem mapping protein with >1 unmapped start positions: "
1182    + seq.getName());
1183    }
1184  30 else if (codon.peptideCol == 1)
1185    {
1186    /*
1187    * first position (peptideCol == 0) was unmapped - add it
1188    */
1189  6 if (lastCodon != null)
1190    {
1191  5 AlignedCodon firstPeptide = new AlignedCodon(lastCodon.pos1,
1192    lastCodon.pos2, lastCodon.pos3,
1193    String.valueOf(seq.getCharAt(0)), 0);
1194  5 toAdd.put(seq, firstPeptide);
1195    }
1196    else
1197    {
1198    /*
1199    * unmapped residue at start of alignment (no prior column) -
1200    * 'insert' at nominal codon [0, 0, 0]
1201    */
1202  1 AlignedCodon firstPeptide = new AlignedCodon(0, 0, 0,
1203    String.valueOf(seq.getCharAt(0)), 0);
1204  1 toAdd.put(seq, firstPeptide);
1205    }
1206    }
1207  30 if (sequencesChecked.size() == mappedSequenceCount)
1208    {
1209    // no need to check past first mapped position in all sequences
1210  5 break;
1211    }
1212    }
1213  1913 lastCodon = entry.getKey();
1214    }
1215   
1216    /*
1217    * add any new codons safely after iterating over the map
1218    */
1219  5 for (Entry<SequenceI, AlignedCodon> startCodon : toAdd.entrySet())
1220    {
1221  6 addCodonToMap(alignedCodons, startCodon.getValue(),
1222    startCodon.getKey());
1223    }
1224    }
1225   
1226    /**
1227    * Update the aligned protein sequences to match the codon alignments given in
1228    * the map.
1229    *
1230    * @param protein
1231    * @param alignedCodons
1232    * an ordered map of codon positions (columns), with sequence/peptide
1233    * values present in each column
1234    * @param unmappedProtein
1235    * @return
1236    */
 
1237  5 toggle protected static int alignProteinAs(AlignmentI protein,
1238    Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons,
1239    List<SequenceI> unmappedProtein)
1240    {
1241    /*
1242    * prefill peptide sequences with gaps
1243    */
1244  5 int alignedWidth = alignedCodons.size();
1245  5 char[] gaps = new char[alignedWidth];
1246  5 Arrays.fill(gaps, protein.getGapCharacter());
1247  5 Map<SequenceI, char[]> peptides = new HashMap<>();
1248  5 for (SequenceI seq : protein.getSequences())
1249    {
1250  31 if (!unmappedProtein.contains(seq))
1251    {
1252  30 peptides.put(seq, Arrays.copyOf(gaps, gaps.length));
1253    }
1254    }
1255   
1256    /*
1257    * Traverse the codons left to right (as defined by CodonComparator)
1258    * and insert peptides in each column where the sequence is mapped.
1259    * This gives a peptide 'alignment' where residues are aligned if their
1260    * corresponding codons occupy the same columns in the cdna alignment.
1261    */
1262  5 int column = 0;
1263  5 for (AlignedCodon codon : alignedCodons.keySet())
1264    {
1265  1914 final Map<SequenceI, AlignedCodon> columnResidues = alignedCodons
1266    .get(codon);
1267  1914 for (Entry<SequenceI, AlignedCodon> entry : columnResidues.entrySet())
1268    {
1269  10682 char residue = entry.getValue().product.charAt(0);
1270  10682 peptides.get(entry.getKey())[column] = residue;
1271    }
1272  1914 column++;
1273    }
1274   
1275    /*
1276    * and finally set the constructed sequences
1277    */
1278  5 for (Entry<SequenceI, char[]> entry : peptides.entrySet())
1279    {
1280  30 entry.getKey().setSequence(new String(entry.getValue()));
1281    }
1282   
1283  5 return 0;
1284    }
1285   
1286    /**
1287    * Populate the map of aligned codons by traversing the given sequence
1288    * mapping, locating the aligned positions of mapped codons, and adding those
1289    * positions and their translation products to the map.
1290    *
1291    * @param dna
1292    * the aligned sequence we are mapping from
1293    * @param protein
1294    * the sequence to be aligned to the codons
1295    * @param gapChar
1296    * the gap character in the dna sequence
1297    * @param seqMap
1298    * a mapping to a sequence translation
1299    * @param alignedCodons
1300    * the map we are building up
1301    */
 
1302  32 toggle static void addCodonPositions(SequenceI dna, SequenceI protein,
1303    char gapChar, Mapping seqMap,
1304    Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons)
1305    {
1306  32 Iterator<AlignedCodon> codons = seqMap.getCodonIterator(dna, gapChar);
1307   
1308    /*
1309    * add codon positions, and their peptide translations, to the alignment
1310    * map, while remembering the first codon mapped
1311    */
1312  10716 while (codons.hasNext())
1313    {
1314  10684 try
1315    {
1316  10684 AlignedCodon codon = codons.next();
1317  10684 addCodonToMap(alignedCodons, codon, protein);
1318    } catch (IncompleteCodonException e)
1319    {
1320    // possible incomplete trailing codon - ignore
1321    } catch (NoSuchElementException e)
1322    {
1323    // possibly peptide lacking STOP
1324    }
1325    }
1326    }
1327   
1328    /**
1329    * Helper method to add a codon-to-peptide entry to the aligned codons map
1330    *
1331    * @param alignedCodons
1332    * @param codon
1333    * @param protein
1334    */
 
1335  10690 toggle protected static void addCodonToMap(
1336    Map<AlignedCodon, Map<SequenceI, AlignedCodon>> alignedCodons,
1337    AlignedCodon codon, SequenceI protein)
1338    {
1339  10690 Map<SequenceI, AlignedCodon> seqProduct = alignedCodons.get(codon);
1340  10690 if (seqProduct == null)
1341    {
1342  1914 seqProduct = new HashMap<>();
1343  1914 alignedCodons.put(codon, seqProduct);
1344    }
1345  10690 seqProduct.put(protein, codon);
1346    }
1347   
1348    /**
1349    * Returns true if a cDNA/Protein mapping either exists, or could be made,
1350    * between at least one pair of sequences in the two alignments. Currently,
1351    * the logic is:
1352    * <ul>
1353    * <li>One alignment must be nucleotide, and the other protein</li>
1354    * <li>At least one pair of sequences must be already mapped, or mappable</li>
1355    * <li>Mappable means the nucleotide translation matches the protein
1356    * sequence</li>
1357    * <li>The translation may ignore start and stop codons if present in the
1358    * nucleotide</li>
1359    * </ul>
1360    *
1361    * @param al1
1362    * @param al2
1363    * @return
1364    */
 
1365  31 toggle public static boolean isMappable(AlignmentI al1, AlignmentI al2)
1366    {
1367  31 if (al1 == null || al2 == null || al1 == al2)
1368    {
1369  5 return false;
1370    }
1371   
1372    /*
1373    * Require one nucleotide and one protein
1374    */
1375  26 if (al1.isNucleotide() == al2.isNucleotide())
1376    {
1377    // or if protein - check if alternate coding
1378  14 if (al1.isNucleotide())
1379    {
1380  7 return false;
1381    }
1382  7 return check3diPeptideMapping(al1,al2);
1383    }
1384  12 AlignmentI dna = al1.isNucleotide() ? al1 : al2;
1385  12 AlignmentI protein = dna == al1 ? al2 : al1;
1386  12 List<AlignedCodonFrame> mappings = protein.getCodonFrames();
1387  12 for (SequenceI dnaSeq : dna.getSequences())
1388    {
1389  13 for (SequenceI proteinSeq : protein.getSequences())
1390    {
1391  13 if (isMappable(dnaSeq, proteinSeq, mappings))
1392    {
1393  2 return true;
1394    }
1395    }
1396    }
1397  10 return false;
1398    }
 
1399  7 toggle public static boolean check3diPeptideMapping(AlignmentI al1, AlignmentI al2)
1400    {
1401  7 if (al1.getHeight()!=al2.getHeight())
1402  1 { return false;
1403    }
1404  6 int mappable=0;
1405  6 for (SequenceI al1seq: al1.getSequences())
1406    {
1407  34 boolean foundMappable = false;
1408  34 for (SequenceI al2seq:al2.getSequences())
1409    {
1410  454 if (canBuild3diMapping(al1seq,al2seq))
1411    {
1412  2 foundMappable = true;
1413  2 break;
1414    }
1415    }
1416  34 if (foundMappable)
1417    {
1418  2 mappable++;
1419    }
1420    }
1421  6 if (mappable>0 && mappable >= (al1.getHeight()*9/10))
1422    {
1423    // TODO allow optional if mappable > a few
1424  2 return true;
1425    }
1426  4 return false;
1427    }
1428   
1429    /**
1430    * exact name, start-end, and identical length non-gap sequences
1431    * @param al1seq
1432    * @param al2seq
1433    * @return
1434    */
 
1435  466 toggle public static boolean canBuild3diMapping(SequenceI al1seq,
1436    SequenceI al2seq)
1437    {
1438  466 if (!al1seq.getDisplayId(true)
1439    .equalsIgnoreCase(al2seq.getDisplayId(true))) {
1440  458 return false;
1441    }
1442  8 String s1 = AlignSeq
1443    .extractGaps(jalview.util.Comparison.GapChars,
1444    al1seq.getSequenceAsString());
1445  8 String s2 = AlignSeq.extractGaps(
1446    jalview.util.Comparison.GapChars,
1447    al2seq.getSequenceAsString());
1448  8 return s1.length()==s2.length();
1449    }
1450   
 
1451  3 toggle public static boolean map3diPeptideToProteinAligment(
1452    AlignmentI proteinAlignment, AlignmentI tdiAlignment)
1453    {
1454  3 if (proteinAlignment==null || tdiAlignment==null)
1455    {
1456  0 return false;
1457    }
1458  3 Set<SequenceI> mappedDna = new HashSet<>();
1459  3 Set<SequenceI> mappedProtein = new HashSet<>();
1460   
1461    /*
1462    * First pass - map sequences where cross-references exist. This include
1463    * 1-to-many mappings to support, for example, variant cDNA.
1464    */
1465  3 boolean mappingPerformed = mapProteinToTdiAlignment(proteinAlignment,
1466    tdiAlignment, mappedDna, mappedProtein);
1467  3 return mappingPerformed;
1468   
1469   
1470    }
1471   
1472   
1473    /**
1474    * Make mappings between compatible sequences (ids are identical, length of seqs are identical).
1475    *
1476    * @param proteinAlignment
1477    * @param tdiAlignment
1478    * @param mappedTdiSeq
1479    * a set of mapped DNA sequences (to add to)
1480    * @param mappedProtein
1481    * a set of mapped Protein sequences (to add to)
1482    * @param xrefsOnly
1483    * if true, only map sequences where xrefs exist
1484    * @return
1485    */
 
1486  3 toggle protected static boolean mapProteinToTdiAlignment(
1487    final AlignmentI proteinAlignment, final AlignmentI tdiAlignment,
1488    Set<SequenceI> mappedTdiSeq, Set<SequenceI> mappedProtein)
1489    {
1490  3 boolean mappingExistsOrAdded = false;
1491  3 List<SequenceI> thisSeqs = proteinAlignment.getSequences();
1492  3 for (SequenceI aaSeq : thisSeqs)
1493    {
1494  6 aaSeq = aaSeq.getDatasetSequence()!=null ? aaSeq.getDatasetSequence():null;
1495  6 boolean proteinMapped = false;
1496  6 AlignedCodonFrame acf = new AlignedCodonFrame();
1497   
1498  6 for (SequenceI cdnaSeq : tdiAlignment.getSequences())
1499    {
1500    // resolve dataset sequences
1501  12 cdnaSeq = cdnaSeq.getDatasetSequence()!=null ? cdnaSeq.getDatasetSequence():null;
1502   
1503  12 if (mappingExists(proteinAlignment.getCodonFrames(),
1504    aaSeq, cdnaSeq))
1505    {
1506  0 mappingExistsOrAdded = true;
1507    }
1508    else
1509    {
1510   
1511  12 if (canBuild3diMapping(aaSeq, cdnaSeq))
1512    {
1513  6 MapList map = new MapList(new int[] { aaSeq.getStart(),aaSeq.getEnd()},new int[] { cdnaSeq.getStart(),cdnaSeq.getEnd()},1,1);
1514  6 acf.addMap(cdnaSeq, aaSeq, map);
1515  6 mappingExistsOrAdded = true;
1516  6 proteinMapped = true;
1517  6 mappedTdiSeq.add(cdnaSeq);
1518  6 mappedProtein.add(aaSeq);
1519    }
1520    }
1521    }
1522  6 if (proteinMapped)
1523    {
1524  6 proteinAlignment.addCodonFrame(acf);
1525    }
1526    }
1527  3 return mappingExistsOrAdded;
1528    }
1529   
1530   
1531    /**
1532    * Returns true if the dna sequence is mapped, or could be mapped, to the
1533    * protein sequence.
1534    *
1535    * @param dnaSeq
1536    * @param proteinSeq
1537    * @param mappings
1538    * @return
1539    */
 
1540  13 toggle protected static boolean isMappable(SequenceI dnaSeq,
1541    SequenceI proteinSeq, List<AlignedCodonFrame> mappings)
1542    {
1543  13 if (dnaSeq == null || proteinSeq == null)
1544    {
1545  0 return false;
1546    }
1547   
1548  13 SequenceI dnaDs = dnaSeq.getDatasetSequence() == null ? dnaSeq
1549    : dnaSeq.getDatasetSequence();
1550  13 SequenceI proteinDs = proteinSeq.getDatasetSequence() == null
1551    ? proteinSeq
1552    : proteinSeq.getDatasetSequence();
1553   
1554  13 for (AlignedCodonFrame mapping : mappings)
1555    {
1556  0 if (proteinDs == mapping.getAaForDnaSeq(dnaDs))
1557    {
1558    /*
1559    * already mapped
1560    */
1561  0 return true;
1562    }
1563    }
1564   
1565    /*
1566    * Just try to make a mapping (it is not yet stored), test whether
1567    * successful.
1568    */
1569  13 return mapCdnaToProtein(proteinDs, dnaDs) != null;
1570    }
1571   
1572    /**
1573    * Finds any reference annotations associated with the sequences in
1574    * sequenceScope, that are not already added to the alignment, and adds them
1575    * to the 'candidates' map. Also populates a lookup table of annotation
1576    * labels, keyed by calcId, for use in constructing tooltips or the like.
1577    *
1578    * @param sequenceScope
1579    * the sequences to scan for reference annotations
1580    * @param labelForCalcId
1581    * (optional) map to populate with label for calcId
1582    * @param candidates
1583    * map to populate with annotations for sequence
1584    * @param al
1585    * the alignment to check for presence of annotations
1586    */
 
1587  84 toggle public static void findAddableReferenceAnnotations(
1588    List<SequenceI> sequenceScope, Map<String, String> labelForCalcId,
1589    final Map<SequenceI, List<AlignmentAnnotation>> candidates,
1590    AlignmentI al)
1591    {
1592  84 if (sequenceScope == null)
1593    {
1594  1 return;
1595    }
1596   
1597    /*
1598    * For each sequence in scope, make a list of any annotations on the
1599    * underlying dataset sequence which are not already on the alignment.
1600    *
1601    * Add to a map of { alignmentSequence, <List of annotations to add> }
1602    */
1603  83 for (SequenceI seq : sequenceScope)
1604    {
1605  80 SequenceI dataset = seq.getDatasetSequence();
1606  80 if (dataset == null)
1607    {
1608  0 continue;
1609    }
1610  80 AlignmentAnnotation[] datasetAnnotations = dataset.getAnnotation();
1611  80 if (datasetAnnotations == null)
1612    {
1613  35 continue;
1614    }
1615  45 final List<AlignmentAnnotation> result = new ArrayList<>();
1616  45 for (AlignmentAnnotation dsann : datasetAnnotations)
1617    {
1618  156 if (dsann.annotations == null) {
1619  0 continue;// ignore non-positional annotation
1620    }
1621    /*
1622    * Find matching annotations on the alignment. If none is found, then
1623    * add this annotation to the list of 'addable' annotations for this
1624    * sequence.
1625    */
1626  156 final Iterable<AlignmentAnnotation> matchedAlignmentAnnotations = al
1627    .findAnnotations(seq, dsann.getCalcId(), dsann.label);
1628  156 boolean found = false;
1629  156 if (matchedAlignmentAnnotations != null)
1630    {
1631  153 for (AlignmentAnnotation matched : matchedAlignmentAnnotations)
1632    {
1633  127 if (dsann.description.equals(matched.description))
1634    {
1635  49 found = true;
1636  49 break;
1637    }
1638    }
1639    }
1640  156 if (!found) {
1641  107 result.add(dsann);
1642  107 if (labelForCalcId != null)
1643    {
1644  10 labelForCalcId.put(dsann.getCalcId(), dsann.label);
1645    }
1646    }
1647    }
1648    /*
1649    * Save any addable annotations for this sequence
1650    */
1651  45 if (!result.isEmpty())
1652    {
1653  43 candidates.put(seq, result);
1654    }
1655    }
1656    }
1657   
1658    /**
1659    * Adds annotations to the top of the alignment annotations, in the same order
1660    * as their related sequences. If you already have an annotation and want to
1661    * add it to a sequence in an alignment use {@code addReferenceAnnotationTo}
1662    *
1663    * @param annotations
1664    * the annotations to add
1665    * @param alignment
1666    * the alignment to add them to
1667    * @param selectionGroup
1668    * current selection group - may be null, if provided then any added
1669    * annotation will be trimmed to just those columns in the selection
1670    * group
1671    */
 
1672  38 toggle public static void addReferenceAnnotations(
1673    Map<SequenceI, List<AlignmentAnnotation>> annotations,
1674    final AlignmentI alignment, final SequenceGroup selectionGroup)
1675    {
1676  38 for (SequenceI seq : annotations.keySet())
1677    {
1678  39 for (AlignmentAnnotation ann : annotations.get(seq))
1679    {
1680  101 addReferenceAnnotationTo(alignment, seq, ann, selectionGroup);
1681    }
1682    }
1683    }
1684   
 
1685  4 toggle public static boolean isSSAnnotationPresent(
1686    Map<SequenceI, List<AlignmentAnnotation>> annotations)
1687    {
1688   
1689  4 for (SequenceI seq : annotations.keySet())
1690    {
1691  3 if (isSecondaryStructurePresent(
1692    annotations.get(seq).toArray(new AlignmentAnnotation[0])))
1693    {
1694  2 return true;
1695    }
1696    }
1697  2 return false;
1698    }
1699   
1700    /**
1701    * Make a copy of a reference annotation {@code ann} and add it to an
1702    * alignment sequence {@code seq} in {@code alignment}, optionally limited to
1703    * the extent of {@code selectionGroup}
1704    *
1705    * @param alignment
1706    * @param seq
1707    * @param ann
1708    * @param selectionGroup
1709    * current selection group - may be null, if provided then any added
1710    * annotation will be trimmed to just those columns in the selection
1711    * group
1712    * @return annotation added to {@code seq and {@code alignment}
1713    */
 
1714  105 toggle public static AlignmentAnnotation addReferenceAnnotationTo(
1715    final AlignmentI alignment, final SequenceI seq,
1716    final AlignmentAnnotation ann, final SequenceGroup selectionGroup)
1717    {
1718  105 AlignmentAnnotation copyAnn = new AlignmentAnnotation(ann);
1719  105 int startRes = 0;
1720  105 int endRes = ann.annotations.length;
1721  105 if (selectionGroup != null)
1722    {
1723  2 startRes = -1 + Math.min(seq.getEnd(), Math.max(seq.getStart(),
1724    seq.findPosition(selectionGroup.getStartRes())));
1725  2 endRes = -1 + Math.min(seq.getEnd(),
1726    seq.findPosition(selectionGroup.getEndRes()));
1727   
1728    }
1729  105 copyAnn.restrict(startRes, endRes + 0);
1730   
1731    /*
1732    * Add to the sequence (sets copyAnn.datasetSequence), unless the
1733    * original annotation is already on the sequence.
1734    */
1735  105 if (!seq.hasAnnotation(ann))
1736    {
1737  105 ContactMatrixI cm = seq.getDatasetSequence().getContactMatrixFor(ann);
1738  105 if (cm != null)
1739    {
1740  29 seq.addContactListFor(copyAnn, cm);
1741    }
1742  105 seq.addAlignmentAnnotation(copyAnn);
1743    }
1744    // adjust for gaps
1745  105 copyAnn.adjustForAlignment();
1746    // add to the alignment and set visible
1747  105 alignment.addAnnotation(copyAnn);
1748  105 copyAnn.visible = true;
1749   
1750  105 return copyAnn;
1751    }
1752   
1753    /**
1754    * Set visibility of alignment annotations of specified types (labels), for
1755    * specified sequences. This supports controls like "Show all secondary
1756    * structure", "Hide all Temp factor", etc.
1757    *
1758    * @al the alignment to scan for annotations
1759    * @param types
1760    * the types (labels) of annotations to be updated
1761    * @param forSequences
1762    * if not null, only annotations linked to one of these sequences are
1763    * in scope for update; if null, acts on all sequence annotations
1764    * @param anyType
1765    * if this flag is true, 'types' is ignored (label not checked)
1766    * @param doShow
1767    * if true, set visibility on, else set off
1768    */
 
1769  5 toggle public static void showOrHideSequenceAnnotations(AlignmentI al,
1770    Collection<String> types, List<SequenceI> forSequences,
1771    boolean anyType, boolean doShow)
1772    {
1773  5 AlignmentAnnotation[] anns = al.getAlignmentAnnotation();
1774  5 if (anns != null)
1775    {
1776  5 for (AlignmentAnnotation aa : anns)
1777    {
1778  30 if (anyType || types.contains(aa.label))
1779    {
1780  21 if ((aa.sequenceRef != null) && (forSequences == null
1781    || forSequences.contains(aa.sequenceRef)))
1782    {
1783  11 aa.visible = doShow;
1784    }
1785    }
1786    }
1787    }
1788    }
1789   
1790    /**
1791    * Shows or hides auto calculated annotations for a sequence group.
1792    *
1793    * @param al
1794    * The alignment object with the annotations.
1795    * @param type
1796    * The type of annotation to show or hide.
1797    * @param selectedGroup
1798    * The sequence group for which the annotations should be shown or
1799    * hidden.
1800    * @param anyType
1801    * If true, all types of annotations will be shown/hidden.
1802    * @param doShow
1803    * If true, the annotations will be shown; if false, annotations will
1804    * be hidden.
1805    */
 
1806  0 toggle public static void showOrHideAutoCalculatedAnnotationsForGroup(
1807    AlignmentI al, String type, SequenceGroup selectedGroup,
1808    boolean anyType, boolean doShow)
1809    {
1810    // Get all alignment annotations
1811  0 AlignmentAnnotation[] anns = al.getAlignmentAnnotation();
1812   
1813  0 if (anns != null)
1814    {
1815  0 for (AlignmentAnnotation aa : anns)
1816    {
1817    // Check if anyType is true or if the annotation's label contains the
1818    // specified type (currently for secondary structure consensus)
1819  0 if ((anyType && aa.label
1820    .startsWith(Constants.SECONDARY_STRUCTURE_CONSENSUS_LABEL))
1821    || aa.label.startsWith(type))
1822    {
1823    // If the annotation's group reference is not null and matches the
1824    // selected group, update its visibility.
1825  0 if (aa.groupRef != null && selectedGroup == aa.groupRef)
1826    {
1827  0 aa.visible = doShow;
1828    }
1829    }
1830    }
1831    }
1832    }
1833   
 
1834  0 toggle public static AlignmentAnnotation getFirstSequenceAnnotationOfType(
1835    AlignmentI al, int graphType)
1836    {
1837  0 AlignmentAnnotation[] anns = al.getAlignmentAnnotation();
1838  0 if (anns != null)
1839    {
1840  0 for (AlignmentAnnotation aa : anns)
1841    {
1842  0 if (aa.sequenceRef != null && aa.graph == graphType)
1843  0 return aa;
1844    }
1845    }
1846  0 return null;
1847    }
1848   
1849    /**
1850    * Returns true if either sequence has a cross-reference to the other
1851    *
1852    * @param seq1
1853    * @param seq2
1854    * @return
1855    */
 
1856  52 toggle public static boolean haveCrossRef(SequenceI seq1, SequenceI seq2)
1857    {
1858    // Note: moved here from class CrossRef as the latter class has dependencies
1859    // not availability to the applet's classpath
1860  52 return hasCrossRef(seq1, seq2) || hasCrossRef(seq2, seq1);
1861    }
1862   
1863    /**
1864    * Returns true if seq1 has a cross-reference to seq2. Currently this assumes
1865    * that sequence name is structured as Source|AccessionId.
1866    *
1867    * @param seq1
1868    * @param seq2
1869    * @return
1870    */
 
1871  108 toggle public static boolean hasCrossRef(SequenceI seq1, SequenceI seq2)
1872    {
1873  108 if (seq1 == null || seq2 == null)
1874    {
1875  8 return false;
1876    }
1877  100 String name = seq2.getName();
1878  100 final List<DBRefEntry> xrefs = seq1.getDBRefs();
1879  100 if (xrefs != null)
1880    {
1881  34 for (int ix = 0, nx = xrefs.size(); ix < nx; ix++)
1882    {
1883  24 DBRefEntry xref = xrefs.get(ix);
1884  24 String xrefName = xref.getSource() + "|" + xref.getAccessionId();
1885    // case-insensitive test, consistent with DBRefEntry.equalRef()
1886  24 if (xrefName.equalsIgnoreCase(name))
1887    {
1888  12 return true;
1889    }
1890    }
1891    }
1892  88 return false;
1893    }
1894   
1895    /**
1896    * Constructs an alignment consisting of the mapped (CDS) regions in the given
1897    * nucleotide sequences, and updates mappings to match. The CDS sequences are
1898    * added to the original alignment's dataset, which is shared by the new
1899    * alignment. Mappings from nucleotide to CDS, and from CDS to protein, are
1900    * added to the alignment dataset.
1901    *
1902    * @param dna
1903    * aligned nucleotide (dna or cds) sequences
1904    * @param dataset
1905    * the alignment dataset the sequences belong to
1906    * @param products
1907    * (optional) to restrict results to CDS that map to specified
1908    * protein products
1909    * @return an alignment whose sequences are the cds-only parts of the dna
1910    * sequences (or null if no mappings are found)
1911    */
 
1912  6 toggle public static AlignmentI makeCdsAlignment(SequenceI[] dna,
1913    AlignmentI dataset, SequenceI[] products)
1914    {
1915  6 if (dataset == null || dataset.getDataset() != null)
1916    {
1917  0 throw new IllegalArgumentException(
1918    "IMPLEMENTATION ERROR: dataset.getDataset() must be null!");
1919    }
1920  6 List<SequenceI> foundSeqs = new ArrayList<>();
1921  6 List<SequenceI> cdsSeqs = new ArrayList<>();
1922  6 List<AlignedCodonFrame> mappings = dataset.getCodonFrames();
1923  6 HashSet<SequenceI> productSeqs = null;
1924  6 if (products != null)
1925    {
1926  3 productSeqs = new HashSet<>();
1927  3 for (SequenceI seq : products)
1928    {
1929  24 productSeqs.add(seq.getDatasetSequence() == null ? seq
1930    : seq.getDatasetSequence());
1931    }
1932    }
1933   
1934    /*
1935    * Construct CDS sequences from mappings on the alignment dataset.
1936    * The logic is:
1937    * - find the protein product(s) mapped to from each dna sequence
1938    * - if the mapping covers the whole dna sequence (give or take start/stop
1939    * codon), take the dna as the CDS sequence
1940    * - else search dataset mappings for a suitable dna sequence, i.e. one
1941    * whose whole sequence is mapped to the protein
1942    * - if no sequence found, construct one from the dna sequence and mapping
1943    * (and add it to dataset so it is found if this is repeated)
1944    */
1945  6 for (SequenceI dnaSeq : dna)
1946    {
1947  52 SequenceI dnaDss = dnaSeq.getDatasetSequence() == null ? dnaSeq
1948    : dnaSeq.getDatasetSequence();
1949   
1950  52 List<AlignedCodonFrame> seqMappings = MappingUtils
1951    .findMappingsForSequence(dnaSeq, mappings);
1952  52 for (AlignedCodonFrame mapping : seqMappings)
1953    {
1954  42 List<Mapping> mappingsFromSequence = mapping
1955    .getMappingsFromSequence(dnaSeq);
1956   
1957  42 for (Mapping aMapping : mappingsFromSequence)
1958    {
1959  44 MapList mapList = aMapping.getMap();
1960  44 if (mapList.getFromRatio() == 1)
1961    {
1962    /*
1963    * not a dna-to-protein mapping (likely dna-to-cds)
1964    */
1965  11 continue;
1966    }
1967   
1968    /*
1969    * skip if mapping is not to one of the target set of proteins
1970    */
1971  33 SequenceI proteinProduct = aMapping.getTo();
1972  33 if (productSeqs != null && !productSeqs.contains(proteinProduct))
1973    {
1974  2 continue;
1975    }
1976   
1977    /*
1978    * try to locate the CDS from the dataset mappings;
1979    * guard against duplicate results (for the case that protein has
1980    * dbrefs to both dna and cds sequences)
1981    */
1982  31 SequenceI cdsSeq = findCdsForProtein(mappings, dnaSeq,
1983    seqMappings, aMapping);
1984  31 if (cdsSeq != null)
1985    {
1986  11 if (!foundSeqs.contains(cdsSeq))
1987    {
1988  11 foundSeqs.add(cdsSeq);
1989  11 SequenceI derivedSequence = cdsSeq.deriveSequence();
1990  11 cdsSeqs.add(derivedSequence);
1991  11 if (!dataset.getSequences().contains(cdsSeq))
1992    {
1993  0 dataset.addSequence(cdsSeq);
1994    }
1995    }
1996  11 continue;
1997    }
1998   
1999    /*
2000    * didn't find mapped CDS sequence - construct it and add
2001    * its dataset sequence to the dataset
2002    */
2003  20 cdsSeq = makeCdsSequence(dnaSeq.getDatasetSequence(), aMapping,
2004    dataset).deriveSequence();
2005    // cdsSeq has a name constructed as CDS|<dbref>
2006    // <dbref> will be either the accession for the coding sequence,
2007    // marked in the /via/ dbref to the protein product accession
2008    // or it will be the original nucleotide accession.
2009  20 SequenceI cdsSeqDss = cdsSeq.getDatasetSequence();
2010   
2011  20 cdsSeqs.add(cdsSeq);
2012   
2013    /*
2014    * build the mapping from CDS to protein
2015    */
2016  20 List<int[]> cdsRange = Collections
2017    .singletonList(new int[]
2018    { cdsSeq.getStart(),
2019    cdsSeq.getLength() + cdsSeq.getStart() - 1 });
2020  20 MapList cdsToProteinMap = new MapList(cdsRange,
2021    mapList.getToRanges(), mapList.getFromRatio(),
2022    mapList.getToRatio());
2023   
2024  20 if (!dataset.getSequences().contains(cdsSeqDss))
2025    {
2026    /*
2027    * if this sequence is a newly created one, add it to the dataset
2028    * and made a CDS to protein mapping (if sequence already exists,
2029    * CDS-to-protein mapping _is_ the transcript-to-protein mapping)
2030    */
2031  20 dataset.addSequence(cdsSeqDss);
2032  20 AlignedCodonFrame cdsToProteinMapping = new AlignedCodonFrame();
2033  20 cdsToProteinMapping.addMap(cdsSeqDss, proteinProduct,
2034    cdsToProteinMap);
2035   
2036    /*
2037    * guard against duplicating the mapping if repeating this action
2038    */
2039  20 if (!mappings.contains(cdsToProteinMapping))
2040    {
2041  20 mappings.add(cdsToProteinMapping);
2042    }
2043    }
2044   
2045  20 propagateDBRefsToCDS(cdsSeqDss, dnaSeq.getDatasetSequence(),
2046    proteinProduct, aMapping);
2047    /*
2048    * add another mapping from original 'from' range to CDS
2049    */
2050  20 AlignedCodonFrame dnaToCdsMapping = new AlignedCodonFrame();
2051  20 final MapList dnaToCdsMap = new MapList(mapList.getFromRanges(),
2052    cdsRange, 1, 1);
2053  20 dnaToCdsMapping.addMap(dnaSeq.getDatasetSequence(), cdsSeqDss,
2054    dnaToCdsMap);
2055  20 if (!mappings.contains(dnaToCdsMapping))
2056    {
2057  20 mappings.add(dnaToCdsMapping);
2058    }
2059   
2060    /*
2061    * transfer dna chromosomal loci (if known) to the CDS
2062    * sequence (via the mapping)
2063    */
2064  20 final MapList cdsToDnaMap = dnaToCdsMap.getInverse();
2065  20 transferGeneLoci(dnaSeq, cdsToDnaMap, cdsSeq);
2066   
2067    /*
2068    * add DBRef with mapping from protein to CDS
2069    * (this enables Get Cross-References from protein alignment)
2070    * This is tricky because we can't have two DBRefs with the
2071    * same source and accession, so need a different accession for
2072    * the CDS from the dna sequence
2073    */
2074   
2075    // specific use case:
2076    // Genomic contig ENSCHR:1, contains coding regions for ENSG01,
2077    // ENSG02, ENSG03, with transcripts and products similarly named.
2078    // cannot add distinct dbrefs mapping location on ENSCHR:1 to ENSG01
2079   
2080    // JBPNote: ?? can't actually create an example that demonstrates we
2081    // need to
2082    // synthesize an xref.
2083   
2084  20 List<DBRefEntry> primrefs = dnaDss.getPrimaryDBRefs();
2085  33 for (int ip = 0, np = primrefs.size(); ip < np; ip++)
2086    {
2087  13 DBRefEntry primRef = primrefs.get(ip);
2088    /*
2089    * create a cross-reference from CDS to the source sequence's
2090    * primary reference and vice versa
2091    */
2092  13 String source = primRef.getSource();
2093  13 String version = primRef.getVersion();
2094  13 DBRefEntry cdsCrossRef = new DBRefEntry(source,
2095    source + ":" + version, primRef.getAccessionId());
2096  13 cdsCrossRef
2097    .setMap(new Mapping(dnaDss, new MapList(cdsToDnaMap)));
2098  13 cdsSeqDss.addDBRef(cdsCrossRef);
2099   
2100  13 dnaSeq.addDBRef(new DBRefEntry(source, version,
2101    cdsSeq.getName(), new Mapping(cdsSeqDss, dnaToCdsMap)));
2102    // problem here is that the cross-reference is synthesized -
2103    // cdsSeq.getName() may be like 'CDS|dnaaccession' or
2104    // 'CDS|emblcdsacc'
2105    // assuming cds version same as dna ?!?
2106   
2107  13 DBRefEntry proteinToCdsRef = new DBRefEntry(source, version,
2108    cdsSeq.getName());
2109    //
2110  13 proteinToCdsRef.setMap(
2111    new Mapping(cdsSeqDss, cdsToProteinMap.getInverse()));
2112  13 proteinProduct.addDBRef(proteinToCdsRef);
2113    }
2114    /*
2115    * transfer any features on dna that overlap the CDS
2116    */
2117  20 transferFeatures(dnaSeq, cdsSeq, dnaToCdsMap, null,
2118    SequenceOntologyI.CDS);
2119    }
2120    }
2121    }
2122   
2123  6 AlignmentI cds = new Alignment(
2124    cdsSeqs.toArray(new SequenceI[cdsSeqs.size()]));
2125  6 cds.setDataset(dataset);
2126   
2127  6 return cds;
2128    }
2129   
2130    /**
2131    * Tries to transfer gene loci (dbref to chromosome positions) from fromSeq to
2132    * toSeq, mediated by the given mapping between the sequences
2133    *
2134    * @param fromSeq
2135    * @param targetToFrom
2136    * Map
2137    * @param targetSeq
2138    */
 
2139  23 toggle protected static void transferGeneLoci(SequenceI fromSeq,
2140    MapList targetToFrom, SequenceI targetSeq)
2141    {
2142  23 if (targetSeq.getGeneLoci() != null)
2143    {
2144    // already have - don't override
2145  1 return;
2146    }
2147  22 GeneLociI fromLoci = fromSeq.getGeneLoci();
2148  22 if (fromLoci == null)
2149    {
2150  10 return;
2151    }
2152   
2153  12 MapList newMap = targetToFrom.traverse(fromLoci.getMapping());
2154   
2155  12 if (newMap != null)
2156    {
2157  12 targetSeq.setGeneLoci(fromLoci.getSpeciesId(),
2158    fromLoci.getAssemblyId(), fromLoci.getChromosomeId(), newMap);
2159    }
2160    }
2161   
2162    /**
2163    * A helper method that finds a CDS sequence in the alignment dataset that is
2164    * mapped to the given protein sequence, and either is, or has a mapping from,
2165    * the given dna sequence.
2166    *
2167    * @param mappings
2168    * set of all mappings on the dataset
2169    * @param dnaSeq
2170    * a dna (or cds) sequence we are searching from
2171    * @param seqMappings
2172    * the set of mappings involving dnaSeq
2173    * @param aMapping
2174    * a transcript-to-peptide mapping
2175    * @return
2176    */
 
2177  38 toggle static SequenceI findCdsForProtein(List<AlignedCodonFrame> mappings,
2178    SequenceI dnaSeq, List<AlignedCodonFrame> seqMappings,
2179    Mapping aMapping)
2180    {
2181    /*
2182    * TODO a better dna-cds-protein mapping data representation to allow easy
2183    * navigation; until then this clunky looping around lists of mappings
2184    */
2185  38 SequenceI seqDss = dnaSeq.getDatasetSequence() == null ? dnaSeq
2186    : dnaSeq.getDatasetSequence();
2187  38 SequenceI proteinProduct = aMapping.getTo();
2188   
2189    /*
2190    * is this mapping from the whole dna sequence (i.e. CDS)?
2191    * allowing for possible stop codon on dna but not peptide
2192    */
2193  38 int mappedFromLength = MappingUtils
2194    .getLength(aMapping.getMap().getFromRanges());
2195  38 int dnaLength = seqDss.getLength();
2196  38 if (mappedFromLength == dnaLength
2197    || mappedFromLength == dnaLength - CODON_LENGTH)
2198    {
2199    /*
2200    * if sequence has CDS features, this is a transcript with no UTR
2201    * - do not take this as the CDS sequence! (JAL-2789)
2202    */
2203  4 if (seqDss.getFeatures().getFeaturesByOntology(SequenceOntologyI.CDS)
2204    .isEmpty())
2205    {
2206  1 return seqDss;
2207    }
2208    }
2209   
2210    /*
2211    * looks like we found the dna-to-protein mapping; search for the
2212    * corresponding cds-to-protein mapping
2213    */
2214  37 List<AlignedCodonFrame> mappingsToPeptide = MappingUtils
2215    .findMappingsForSequence(proteinProduct, mappings);
2216  37 for (AlignedCodonFrame acf : mappingsToPeptide)
2217    {
2218  52 for (SequenceToSequenceMapping map : acf.getMappings())
2219    {
2220  276 Mapping mapping = map.getMapping();
2221  276 if (mapping != aMapping
2222    && mapping.getMap().getFromRatio() == CODON_LENGTH
2223    && proteinProduct == mapping.getTo()
2224    && seqDss != map.getFromSeq())
2225    {
2226  15 mappedFromLength = MappingUtils
2227    .getLength(mapping.getMap().getFromRanges());
2228  15 if (mappedFromLength == map.getFromSeq().getLength())
2229    {
2230    /*
2231    * found a 3:1 mapping to the protein product which covers
2232    * the whole dna sequence i.e. is from CDS; finally check the CDS
2233    * is mapped from the given dna start sequence
2234    */
2235  15 SequenceI cdsSeq = map.getFromSeq();
2236    // todo this test is weak if seqMappings contains multiple mappings;
2237    // we get away with it if transcript:cds relationship is 1:1
2238  15 List<AlignedCodonFrame> dnaToCdsMaps = MappingUtils
2239    .findMappingsForSequence(cdsSeq, seqMappings);
2240  15 if (!dnaToCdsMaps.isEmpty())
2241    {
2242  13 return cdsSeq;
2243    }
2244    }
2245    }
2246    }
2247    }
2248  24 return null;
2249    }
2250   
2251    /**
2252    * Helper method that makes a CDS sequence as defined by the mappings from the
2253    * given sequence i.e. extracts the 'mapped from' ranges (which may be on
2254    * forward or reverse strand).
2255    *
2256    * @param seq
2257    * @param mapping
2258    * @param dataset
2259    * - existing dataset. We check for sequences that look like the CDS
2260    * we are about to construct, if one exists already, then we will
2261    * just return that one.
2262    * @return CDS sequence (as a dataset sequence)
2263    */
 
2264  20 toggle static SequenceI makeCdsSequence(SequenceI seq, Mapping mapping,
2265    AlignmentI dataset)
2266    {
2267    /*
2268    * construct CDS sequence name as "CDS|" with 'from id' held in the mapping
2269    * if set (e.g. EMBL protein_id), else sequence name appended
2270    */
2271  20 String mapFromId = mapping.getMappedFromId();
2272  20 final String seqId = "CDS|"
2273  20 + (mapFromId != null ? mapFromId : seq.getName());
2274   
2275  20 SequenceI newSeq = null;
2276   
2277    /*
2278    * construct CDS sequence by splicing mapped from ranges
2279    */
2280  20 char[] seqChars = seq.getSequence();
2281  20 List<int[]> fromRanges = mapping.getMap().getFromRanges();
2282  20 int cdsWidth = MappingUtils.getLength(fromRanges);
2283  20 char[] newSeqChars = new char[cdsWidth];
2284   
2285  20 int newPos = 0;
2286  20 for (int[] range : fromRanges)
2287    {
2288  32 if (range[0] <= range[1])
2289    {
2290    // forward strand mapping - just copy the range
2291  32 int length = range[1] - range[0] + 1;
2292  32 System.arraycopy(seqChars, range[0] - 1, newSeqChars, newPos,
2293    length);
2294  32 newPos += length;
2295    }
2296    else
2297    {
2298    // reverse strand mapping - copy and complement one by one
2299  0 for (int i = range[0]; i >= range[1]; i--)
2300    {
2301  0 newSeqChars[newPos++] = Dna.getComplement(seqChars[i - 1]);
2302    }
2303    }
2304   
2305  32 newSeq = new Sequence(seqId, newSeqChars, 1, newPos);
2306    }
2307   
2308  20 if (dataset != null)
2309    {
2310  20 SequenceI[] matches = dataset.findSequenceMatch(newSeq.getName());
2311  20 if (matches != null)
2312    {
2313  20 boolean matched = false;
2314  20 for (SequenceI mtch : matches)
2315    {
2316  3 if (mtch.getStart() != newSeq.getStart())
2317    {
2318  0 continue;
2319    }
2320  3 if (mtch.getEnd() != newSeq.getEnd())
2321    {
2322  0 continue;
2323    }
2324  3 if (!Arrays.equals(mtch.getSequence(), newSeq.getSequence()))
2325    {
2326  3 continue;
2327    }
2328  0 if (!matched)
2329    {
2330  0 matched = true;
2331  0 newSeq = mtch;
2332    }
2333    else
2334    {
2335  0 Console.error(
2336    "JAL-2154 regression: warning - found (and ignored) a duplicate CDS sequence:"
2337    + mtch.toString());
2338    }
2339    }
2340    }
2341    }
2342    // newSeq.setDescription(mapFromId);
2343   
2344  20 return newSeq;
2345    }
2346   
2347    /**
2348    * Adds any DBRefEntrys to cdsSeq from contig that have a Mapping congruent to
2349    * the given mapping.
2350    *
2351    * @param cdsSeq
2352    * @param contig
2353    * @param proteinProduct
2354    * @param mapping
2355    * @return list of DBRefEntrys added
2356    */
 
2357  20 toggle protected static List<DBRefEntry> propagateDBRefsToCDS(SequenceI cdsSeq,
2358    SequenceI contig, SequenceI proteinProduct, Mapping mapping)
2359    {
2360   
2361    // gather direct refs from contig congruent with mapping
2362  20 List<DBRefEntry> direct = new ArrayList<>();
2363  20 HashSet<String> directSources = new HashSet<>();
2364   
2365  20 List<DBRefEntry> refs = contig.getDBRefs();
2366  20 if (refs != null)
2367    {
2368  292 for (int ib = 0, nb = refs.size(); ib < nb; ib++)
2369    {
2370  279 DBRefEntry dbr = refs.get(ib);
2371  279 MapList map;
2372  ? if (dbr.hasMap() && (map = dbr.getMap().getMap()).isTripletMap())
2373    {
2374    // check if map is the CDS mapping
2375  24 if (mapping.getMap().equals(map))
2376    {
2377  24 direct.add(dbr);
2378  24 directSources.add(dbr.getSource());
2379    }
2380    }
2381    }
2382    }
2383  20 List<DBRefEntry> onSource = DBRefUtils.selectRefs(
2384    proteinProduct.getDBRefs(),
2385    directSources.toArray(new String[0]));
2386  20 List<DBRefEntry> propagated = new ArrayList<>();
2387   
2388    // and generate appropriate mappings
2389  44 for (int ic = 0, nc = direct.size(); ic < nc; ic++)
2390    {
2391  24 DBRefEntry cdsref = direct.get(ic);
2392  24 Mapping m = cdsref.getMap();
2393    // clone maplist and mapping
2394  24 MapList cdsposmap = new MapList(
2395    Arrays.asList(new int[][]
2396    { new int[] { cdsSeq.getStart(), cdsSeq.getEnd() } }),
2397    m.getMap().getToRanges(), 3, 1);
2398  24 Mapping cdsmap = new Mapping(m.getTo(), m.getMap());
2399   
2400    // create dbref
2401  24 DBRefEntry newref = new DBRefEntry(cdsref.getSource(),
2402    cdsref.getVersion(), cdsref.getAccessionId(),
2403    new Mapping(cdsmap.getTo(), cdsposmap));
2404   
2405    // and see if we can map to the protein product for this mapping.
2406    // onSource is the filtered set of accessions on protein that we are
2407    // tranferring, so we assume accession is the same.
2408  24 if (cdsmap.getTo() == null && onSource != null)
2409    {
2410  2 List<DBRefEntry> sourceRefs = DBRefUtils.searchRefs(onSource,
2411    cdsref.getAccessionId());
2412  2 if (sourceRefs != null)
2413    {
2414  2 for (DBRefEntry srcref : sourceRefs)
2415    {
2416  2 if (srcref.getSource().equalsIgnoreCase(cdsref.getSource()))
2417    {
2418    // we have found a complementary dbref on the protein product, so
2419    // update mapping's getTo
2420  2 newref.getMap().setTo(proteinProduct);
2421    }
2422    }
2423    }
2424    }
2425  24 cdsSeq.addDBRef(newref);
2426  24 propagated.add(newref);
2427    }
2428  20 return propagated;
2429    }
2430   
2431    /**
2432    * Transfers co-located features on 'fromSeq' to 'toSeq', adjusting the
2433    * feature start/end ranges, optionally omitting specified feature types.
2434    * Returns the number of features copied.
2435    *
2436    * @param fromSeq
2437    * @param toSeq
2438    * @param mapping
2439    * the mapping from 'fromSeq' to 'toSeq'
2440    * @param select
2441    * if not null, only features of this type are copied (including
2442    * subtypes in the Sequence Ontology)
2443    * @param omitting
2444    */
 
2445  23 toggle protected static int transferFeatures(SequenceI fromSeq, SequenceI toSeq,
2446    MapList mapping, String select, String... omitting)
2447    {
2448  23 SequenceI copyTo = toSeq;
2449  43 while (copyTo.getDatasetSequence() != null)
2450    {
2451  20 copyTo = copyTo.getDatasetSequence();
2452    }
2453  23 if (fromSeq == copyTo || fromSeq.getDatasetSequence() == copyTo)
2454    {
2455  0 return 0; // shared dataset sequence
2456    }
2457   
2458    /*
2459    * get features, optionally restricted by an ontology term
2460    */
2461  23 List<SequenceFeature> sfs = select == null
2462    ? fromSeq.getFeatures().getPositionalFeatures()
2463    : fromSeq.getFeatures().getFeaturesByOntology(select);
2464   
2465  23 int count = 0;
2466  23 for (SequenceFeature sf : sfs)
2467    {
2468  9610 String type = sf.getType();
2469  9610 boolean omit = false;
2470  9610 for (String toOmit : omitting)
2471    {
2472  9603 if (type.equals(toOmit))
2473    {
2474  134 omit = true;
2475    }
2476    }
2477  9610 if (omit)
2478    {
2479  134 continue;
2480    }
2481   
2482    /*
2483    * locate the mapped range - null if either start or end is
2484    * not mapped (no partial overlaps are calculated)
2485    */
2486  9476 int start = sf.getBegin();
2487  9476 int end = sf.getEnd();
2488  9476 int[] mappedTo = mapping.locateInTo(start, end);
2489    /*
2490    * if whole exon range doesn't map, try interpreting it
2491    * as 5' or 3' exon overlapping the CDS range
2492    */
2493  9476 if (mappedTo == null)
2494    {
2495  4447 mappedTo = mapping.locateInTo(end, end);
2496  4447 if (mappedTo != null)
2497    {
2498    /*
2499    * end of exon is in CDS range - 5' overlap
2500    * to a range from the start of the peptide
2501    */
2502  0 mappedTo[0] = 1;
2503    }
2504    }
2505  9476 if (mappedTo == null)
2506    {
2507  4447 mappedTo = mapping.locateInTo(start, start);
2508  4447 if (mappedTo != null)
2509    {
2510    /*
2511    * start of exon is in CDS range - 3' overlap
2512    * to a range up to the end of the peptide
2513    */
2514  0 mappedTo[1] = toSeq.getLength();
2515    }
2516    }
2517  9476 if (mappedTo != null)
2518    {
2519  5029 int newBegin = Math.min(mappedTo[0], mappedTo[1]);
2520  5029 int newEnd = Math.max(mappedTo[0], mappedTo[1]);
2521  5029 SequenceFeature copy = new SequenceFeature(sf, newBegin, newEnd,
2522    sf.getFeatureGroup(), sf.getScore());
2523  5029 copyTo.addSequenceFeature(copy);
2524  5029 count++;
2525    }
2526    }
2527  23 return count;
2528    }
2529   
2530    /**
2531    * Returns a mapping from dna to protein by inspecting sequence features of
2532    * type "CDS" on the dna. A mapping is constructed if the total CDS feature
2533    * length is 3 times the peptide length (optionally after dropping a trailing
2534    * stop codon). This method does not check whether the CDS nucleotide sequence
2535    * translates to the peptide sequence.
2536    *
2537    * @param dnaSeq
2538    * @param proteinSeq
2539    * @return
2540    */
 
2541  23 toggle public static MapList mapCdsToProtein(SequenceI dnaSeq,
2542    SequenceI proteinSeq)
2543    {
2544  23 List<int[]> ranges = findCdsPositions(dnaSeq);
2545  23 int mappedDnaLength = MappingUtils.getLength(ranges);
2546   
2547    /*
2548    * if not a whole number of codons, truncate mapping
2549    */
2550  23 int codonRemainder = mappedDnaLength % CODON_LENGTH;
2551  23 if (codonRemainder > 0)
2552    {
2553  2 mappedDnaLength -= codonRemainder;
2554  2 MappingUtils.removeEndPositions(codonRemainder, ranges);
2555    }
2556   
2557  23 int proteinLength = proteinSeq.getLength();
2558  23 int proteinStart = proteinSeq.getStart();
2559  23 int proteinEnd = proteinSeq.getEnd();
2560   
2561    /*
2562    * incomplete start codon may mean X at start of peptide
2563    * we ignore both for mapping purposes
2564    */
2565  23 if (proteinSeq.getCharAt(0) == 'X')
2566    {
2567    // todo JAL-2022 support startPhase > 0
2568  1 proteinStart++;
2569  1 proteinLength--;
2570    }
2571  23 List<int[]> proteinRange = new ArrayList<>();
2572   
2573    /*
2574    * dna length should map to protein (or protein plus stop codon)
2575    */
2576  23 int codesForResidues = mappedDnaLength / CODON_LENGTH;
2577  23 if (codesForResidues == (proteinLength + 1))
2578    {
2579    // assuming extra codon is for STOP and not in peptide
2580    // todo: check trailing codon is indeed a STOP codon
2581  2 codesForResidues--;
2582  2 mappedDnaLength -= CODON_LENGTH;
2583  2 MappingUtils.removeEndPositions(CODON_LENGTH, ranges);
2584    }
2585   
2586  23 if (codesForResidues == proteinLength)
2587    {
2588  4 proteinRange.add(new int[] { proteinStart, proteinEnd });
2589  4 return new MapList(ranges, proteinRange, CODON_LENGTH, 1);
2590    }
2591  19 return null;
2592    }
2593   
2594    /**
2595    * Returns a list of CDS ranges found (as sequence positions base 1), i.e. of
2596    * [start, end] positions of sequence features of type "CDS" (or a sub-type of
2597    * CDS in the Sequence Ontology). The ranges are sorted into ascending start
2598    * position order, so this method is only valid for linear CDS in the same
2599    * sense as the protein product.
2600    *
2601    * @param dnaSeq
2602    * @return
2603    */
 
2604  25 toggle protected static List<int[]> findCdsPositions(SequenceI dnaSeq)
2605    {
2606  25 List<int[]> result = new ArrayList<>();
2607   
2608  25 List<SequenceFeature> sfs = dnaSeq.getFeatures()
2609    .getFeaturesByOntology(SequenceOntologyI.CDS);
2610  25 if (sfs.isEmpty())
2611    {
2612  17 return result;
2613    }
2614  8 SequenceFeatures.sortFeatures(sfs, true);
2615   
2616  8 for (SequenceFeature sf : sfs)
2617    {
2618  16 int phase = 0;
2619  16 try
2620    {
2621  16 String s = sf.getPhase();
2622  16 if (s != null)
2623    {
2624  2 phase = Integer.parseInt(s);
2625    }
2626    } catch (NumberFormatException e)
2627    {
2628    // leave as zero
2629    }
2630    /*
2631    * phase > 0 on first codon means 5' incomplete - skip to the start
2632    * of the next codon; example ENST00000496384
2633    */
2634  16 int begin = sf.getBegin();
2635  16 int end = sf.getEnd();
2636  16 if (result.isEmpty() && phase > 0)
2637    {
2638  2 begin += phase;
2639  2 if (begin > end)
2640    {
2641    // shouldn't happen!
2642  0 System.err
2643    .println("Error: start phase extends beyond start CDS in "
2644    + dnaSeq.getName());
2645    }
2646    }
2647  16 result.add(new int[] { begin, end });
2648    }
2649   
2650    /*
2651    * Finally sort ranges by start position. This avoids a dependency on
2652    * keeping features in order on the sequence (if they are in order anyway,
2653    * the sort will have almost no work to do). The implicit assumption is CDS
2654    * ranges are assembled in order. Other cases should not use this method,
2655    * but instead construct an explicit mapping for CDS (e.g. EMBL parsing).
2656    */
2657  8 Collections.sort(result, IntRangeComparator.ASCENDING);
2658  8 return result;
2659    }
2660   
2661    /**
2662    * Makes an alignment with a copy of the given sequences, adding in any
2663    * non-redundant sequences which are mapped to by the cross-referenced
2664    * sequences.
2665    *
2666    * @param seqs
2667    * @param xrefs
2668    * @param dataset
2669    * the alignment dataset shared by the new copy
2670    * @return
2671    */
 
2672  0 toggle public static AlignmentI makeCopyAlignment(SequenceI[] seqs,
2673    SequenceI[] xrefs, AlignmentI dataset)
2674    {
2675  0 AlignmentI copy = new Alignment(new Alignment(seqs));
2676  0 copy.setDataset(dataset);
2677  0 boolean isProtein = !copy.isNucleotide();
2678  0 SequenceIdMatcher matcher = new SequenceIdMatcher(seqs);
2679  0 if (xrefs != null)
2680    {
2681    // BH 2019.01.25 recoded to remove iterators
2682   
2683  0 for (int ix = 0, nx = xrefs.length; ix < nx; ix++)
2684    {
2685  0 SequenceI xref = xrefs[ix];
2686  0 List<DBRefEntry> dbrefs = xref.getDBRefs();
2687  0 if (dbrefs != null)
2688    {
2689  0 for (int ir = 0, nir = dbrefs.size(); ir < nir; ir++)
2690    {
2691  0 DBRefEntry dbref = dbrefs.get(ir);
2692  0 Mapping map = dbref.getMap();
2693  0 SequenceI mto;
2694  0 if (map == null || (mto = map.getTo()) == null
2695    || mto.isProtein() != isProtein)
2696    {
2697  0 continue;
2698    }
2699  0 SequenceI mappedTo = mto;
2700  0 SequenceI match = matcher.findIdMatch(mappedTo);
2701  0 if (match == null)
2702    {
2703  0 matcher.add(mappedTo);
2704  0 copy.addSequence(mappedTo);
2705    }
2706    }
2707    }
2708    }
2709    }
2710  0 return copy;
2711    }
2712   
2713    /**
2714    * Try to align sequences in 'unaligned' to match the alignment of their
2715    * mapped regions in 'aligned'. For example, could use this to align CDS
2716    * sequences which are mapped to their parent cDNA sequences.
2717    *
2718    * This method handles 1:1 mappings (dna-to-dna or protein-to-protein). For
2719    * dna-to-protein or protein-to-dna use alternative methods.
2720    *
2721    * @param unaligned
2722    * sequences to be aligned
2723    * @param aligned
2724    * holds aligned sequences and their mappings
2725    * @return
2726    */
 
2727  6 toggle public static int alignAs(AlignmentI unaligned, AlignmentI aligned)
2728    {
2729    /*
2730    * easy case - aligning a copy of aligned sequences
2731    */
2732  6 if (alignAsSameSequences(unaligned, aligned))
2733    {
2734  0 return unaligned.getHeight();
2735    }
2736   
2737    /*
2738    * fancy case - aligning via mappings between sequences
2739    */
2740  6 List<SequenceI> unmapped = new ArrayList<>();
2741  6 Map<Integer, Map<SequenceI, Character>> columnMap = buildMappedColumnsMap(
2742    unaligned, aligned, unmapped);
2743  6 int width = columnMap.size();
2744  6 char gap = unaligned.getGapCharacter();
2745  6 int realignedCount = 0;
2746    // TODO: verify this loop scales sensibly for very wide/high alignments
2747   
2748  6 for (SequenceI seq : unaligned.getSequences())
2749    {
2750  30 if (!unmapped.contains(seq))
2751    {
2752  30 char[] newSeq = new char[width];
2753  30 Arrays.fill(newSeq, gap); // JBPComment - doubt this is faster than the
2754    // Integer iteration below
2755  30 int newCol = 0;
2756  30 int lastCol = 0;
2757   
2758    /*
2759    * traverse the map to find columns populated
2760    * by our sequence
2761    */
2762  30 for (Integer column : columnMap.keySet())
2763    {
2764  59000 Character c = columnMap.get(column).get(seq);
2765  59000 if (c != null)
2766    {
2767    /*
2768    * sequence has a character at this position
2769    *
2770    */
2771  32002 newSeq[newCol] = c;
2772  32002 lastCol = newCol;
2773    }
2774  59000 newCol++;
2775    }
2776   
2777    /*
2778    * trim trailing gaps
2779    */
2780  30 if (lastCol < width)
2781    {
2782  30 char[] tmp = new char[lastCol + 1];
2783  30 System.arraycopy(newSeq, 0, tmp, 0, lastCol + 1);
2784  30 newSeq = tmp;
2785    }
2786    // TODO: optimise SequenceI to avoid char[]->String->char[]
2787  30 seq.setSequence(String.valueOf(newSeq));
2788  30 realignedCount++;
2789    }
2790    }
2791  6 return realignedCount;
2792    }
2793   
2794    /**
2795    * If unaligned and aligned sequences share the same dataset sequences, then
2796    * simply copies the aligned sequences to the unaligned sequences and returns
2797    * true; else returns false
2798    *
2799    * @param unaligned
2800    * - sequences to be aligned based on aligned
2801    * @param aligned
2802    * - 'guide' alignment containing sequences derived from same dataset
2803    * as unaligned
2804    * @return
2805    */
 
2806  10 toggle static boolean alignAsSameSequences(AlignmentI unaligned,
2807    AlignmentI aligned)
2808    {
2809  10 if (aligned.getDataset() == null || unaligned.getDataset() == null)
2810    {
2811  0 return false; // should only pass alignments with datasets here
2812    }
2813   
2814    // map from dataset sequence to alignment sequence(s)
2815  10 Map<SequenceI, List<SequenceI>> alignedDatasets = new HashMap<>();
2816  10 for (SequenceI seq : aligned.getSequences())
2817    {
2818  63 SequenceI ds = seq.getDatasetSequence();
2819  63 if (alignedDatasets.get(ds) == null)
2820    {
2821  62 alignedDatasets.put(ds, new ArrayList<SequenceI>());
2822    }
2823  63 alignedDatasets.get(ds).add(seq);
2824    }
2825   
2826    /*
2827    * first pass - check whether all sequences to be aligned share a
2828    * dataset sequence with an aligned sequence; also note the leftmost
2829    * ungapped column from which to copy
2830    */
2831  10 int leftmost = Integer.MAX_VALUE;
2832  10 for (SequenceI seq : unaligned.getSequences())
2833    {
2834  16 final SequenceI ds = seq.getDatasetSequence();
2835  16 if (!alignedDatasets.containsKey(ds))
2836    {
2837  7 return false;
2838    }
2839  9 SequenceI alignedSeq = alignedDatasets.get(ds).get(0);
2840  9 int startCol = alignedSeq.findIndex(seq.getStart()); // 1..
2841  9 leftmost = Math.min(leftmost, startCol);
2842    }
2843   
2844    /*
2845    * second pass - copy aligned sequences;
2846    * heuristic rule: pair off sequences in order for the case where
2847    * more than one shares the same dataset sequence
2848    */
2849  3 final char gapCharacter = aligned.getGapCharacter();
2850  3 for (SequenceI seq : unaligned.getSequences())
2851    {
2852  7 List<SequenceI> alignedSequences = alignedDatasets
2853    .get(seq.getDatasetSequence());
2854  7 if (alignedSequences.isEmpty())
2855    {
2856    /*
2857    * defensive check - shouldn't happen! (JAL-3536)
2858    */
2859  0 continue;
2860    }
2861  7 SequenceI alignedSeq = alignedSequences.get(0);
2862   
2863    /*
2864    * gap fill for leading (5') UTR if any
2865    */
2866    // TODO this copies intron columns - wrong!
2867  7 int startCol = alignedSeq.findIndex(seq.getStart()); // 1..
2868  7 int endCol = alignedSeq.findIndex(seq.getEnd());
2869  7 char[] seqchars = new char[endCol - leftmost + 1];
2870  7 Arrays.fill(seqchars, gapCharacter);
2871  7 char[] toCopy = alignedSeq.getSequence(startCol - 1, endCol);
2872  7 System.arraycopy(toCopy, 0, seqchars, startCol - leftmost,
2873    toCopy.length);
2874  7 seq.setSequence(String.valueOf(seqchars));
2875  7 if (alignedSequences.size() > 0)
2876    {
2877    // pop off aligned sequences (except the last one)
2878  7 alignedSequences.remove(0);
2879    }
2880    }
2881   
2882    /*
2883    * finally remove gapped columns (e.g. introns)
2884    */
2885  3 new RemoveGapColCommand("", unaligned.getSequencesArray(), 0,
2886    unaligned.getWidth() - 1, unaligned);
2887   
2888  3 return true;
2889    }
2890   
2891    /**
2892    * Returns a map whose key is alignment column number (base 1), and whose
2893    * values are a map of sequence characters in that column.
2894    *
2895    * @param unaligned
2896    * @param aligned
2897    * @param unmapped
2898    * @return
2899    */
 
2900  6 toggle static SortedMap<Integer, Map<SequenceI, Character>> buildMappedColumnsMap(
2901    AlignmentI unaligned, AlignmentI aligned,
2902    List<SequenceI> unmapped)
2903    {
2904    /*
2905    * Map will hold, for each aligned column position, a map of
2906    * {unalignedSequence, characterPerSequence} at that position.
2907    * TreeMap keeps the entries in ascending column order.
2908    */
2909  6 SortedMap<Integer, Map<SequenceI, Character>> map = new TreeMap<>();
2910   
2911    /*
2912    * record any sequences that have no mapping so can't be realigned
2913    */
2914  6 unmapped.addAll(unaligned.getSequences());
2915   
2916  6 List<AlignedCodonFrame> mappings = aligned.getCodonFrames();
2917   
2918  6 for (SequenceI seq : unaligned.getSequences())
2919    {
2920  30 for (AlignedCodonFrame mapping : mappings)
2921    {
2922  518 SequenceI fromSeq = mapping.findAlignedSequence(seq, aligned);
2923  518 if (fromSeq != null)
2924    {
2925  30 Mapping seqMap = mapping.getMappingBetween(fromSeq, seq);
2926  30 if (addMappedPositions(seq, fromSeq, seqMap, map))
2927    {
2928  30 unmapped.remove(seq);
2929    }
2930    }
2931    }
2932    }
2933  6 return map;
2934    }
2935   
2936    /**
2937    * Helper method that adds to a map the mapped column positions of a sequence.
2938    * <br>
2939    * For example if aaTT-Tg-gAAA is mapped to TTTAAA then the map should record
2940    * that columns 3,4,6,10,11,12 map to characters T,T,T,A,A,A of the mapped to
2941    * sequence.
2942    *
2943    * @param seq
2944    * the sequence whose column positions we are recording
2945    * @param fromSeq
2946    * a sequence that is mapped to the first sequence
2947    * @param seqMap
2948    * the mapping from 'fromSeq' to 'seq'
2949    * @param map
2950    * a map to add the column positions (in fromSeq) of the mapped
2951    * positions of seq
2952    * @return
2953    */
 
2954  32 toggle static boolean addMappedPositions(SequenceI seq, SequenceI fromSeq,
2955    Mapping seqMap, Map<Integer, Map<SequenceI, Character>> map)
2956    {
2957  32 if (seqMap == null)
2958    {
2959  0 return false;
2960    }
2961   
2962    /*
2963    * invert mapping if it is from unaligned to aligned sequence
2964    */
2965  32 if (seqMap.getTo() == fromSeq.getDatasetSequence())
2966    {
2967  2 seqMap = new Mapping(seq.getDatasetSequence(),
2968    seqMap.getMap().getInverse());
2969    }
2970   
2971  32 int toStart = seq.getStart();
2972   
2973    /*
2974    * traverse [start, end, start, end...] ranges in fromSeq
2975    */
2976  32 for (int[] fromRange : seqMap.getMap().getFromRanges())
2977    {
2978  70 for (int i = 0; i < fromRange.length - 1; i += 2)
2979    {
2980  35 boolean forward = fromRange[i + 1] >= fromRange[i];
2981   
2982    /*
2983    * find the range mapped to (sequence positions base 1)
2984    */
2985  35 int[] range = seqMap.locateMappedRange(fromRange[i],
2986    fromRange[i + 1]);
2987  35 if (range == null)
2988    {
2989  0 jalview.bin.Console.errPrintln("Error in mapping " + seqMap
2990    + " from " + fromSeq.getName());
2991  0 return false;
2992    }
2993  35 int fromCol = fromSeq.findIndex(fromRange[i]);
2994  35 int mappedCharPos = range[0];
2995   
2996    /*
2997    * walk over the 'from' aligned sequence in forward or reverse
2998    * direction; when a non-gap is found, record the column position
2999    * of the next character of the mapped-to sequence; stop when all
3000    * the characters of the range have been counted
3001    */
3002  2794300 while (mappedCharPos <= range[1] && fromCol <= fromSeq.getLength()
3003    && fromCol >= 0)
3004    {
3005  2794265 if (!Comparison.isGap(fromSeq.getCharAt(fromCol - 1)))
3006    {
3007    /*
3008    * mapped from sequence has a character in this column
3009    * record the column position for the mapped to character
3010    */
3011  32014 Map<SequenceI, Character> seqsMap = map.get(fromCol);
3012  32014 if (seqsMap == null)
3013    {
3014  5410 seqsMap = new HashMap<>();
3015  5410 map.put(fromCol, seqsMap);
3016    }
3017  32014 seqsMap.put(seq, seq.getCharAt(mappedCharPos - toStart));
3018  32014 mappedCharPos++;
3019    }
3020  2794265 fromCol += (forward ? 1 : -1);
3021    }
3022    }
3023    }
3024  32 return true;
3025    }
3026   
3027    // strictly temporary hack until proper criteria for aligning protein to cds
3028    // are in place; this is so Ensembl -> fetch xrefs Uniprot aligns the Uniprot
 
3029  4 toggle public static boolean looksLikeEnsembl(AlignmentI alignment)
3030    {
3031  4 for (SequenceI seq : alignment.getSequences())
3032    {
3033  88 String name = seq.getName();
3034  88 if (!name.startsWith("ENSG") && !name.startsWith("ENST"))
3035    {
3036  0 return false;
3037    }
3038    }
3039  4 return true;
3040    }
3041   
3042    /**
3043    * Creates a deep copy of an alignment along with a dataset alignment,
3044    * alignment annotations, representative sequence and hidden columns.
3045    */
 
3046  0 toggle public static AlignmentI deepCopyAlignment(AlignmentI alignment)
3047    {
3048  0 var alnCpy = new Alignment(alignment);
3049  0 alnCpy.setGapCharacter(alignment.getGapCharacter());
3050  0 alnCpy.setDataset(alignment.getDataset());
3051  0 for (AlignmentAnnotation annotation : alignment.getAlignmentAnnotation())
3052  0 alnCpy.addAnnotation(new AlignmentAnnotation(annotation));
3053  0 if (alignment.hasSeqrep())
3054    {
3055  0 int idx = alignment.findIndex(alignment.getSeqrep());
3056  0 if (idx >= 0)
3057  0 alnCpy.setSeqrep(alnCpy.getSequenceAt(idx));
3058    }
3059  0 if (alignment.getHiddenColumns() != null)
3060  0 alnCpy.setHiddenColumns(new HiddenColumns(alignment.getHiddenColumns()));
3061  0 return alnCpy;
3062    }
3063   
3064    /**
3065    * This method filters and returns the secondary structure annotations
3066    * in an array of annotations
3067    *
3068    * @param annotations
3069    * @return array of secondary structure annotations
3070    */
 
3071  0 toggle public static List<AlignmentAnnotation> getSecondaryStructureAnnots(
3072    AlignmentAnnotation[] annotations)
3073    {
3074  0 List<AlignmentAnnotation> ssAnnotations = new ArrayList<>();
3075  0 if (annotations == null || annotations.length == 0)
3076    {
3077  0 return null;
3078    }
3079   
3080  0 synchronized (annotations)
3081    {
3082  0 for (AlignmentAnnotation aa : annotations)
3083    {
3084  0 if (aa == null)
3085    {
3086  0 continue;
3087    }
3088   
3089  0 if (aa.label != null && Constants.SECONDARY_STRUCTURE_LABELS
3090    .containsKey(aa.label))
3091    {
3092  0 ssAnnotations.add(aa);
3093    }
3094    }
3095    }
3096  0 return ssAnnotations;
3097   
3098    }
3099   
 
3100  19 toggle public static boolean isSecondaryStructurePresent(
3101    AlignmentAnnotation[] annotations)
3102    {
3103  19 boolean ssPresent = false;
3104  19 if (annotations == null || annotations.length == 0)
3105    {
3106  8 return false;
3107    }
3108   
3109  11 synchronized (annotations)
3110    {
3111  11 for (AlignmentAnnotation aa : annotations)
3112    {
3113  12 if (aa == null)
3114    {
3115  0 continue;
3116    }
3117    // TODO: @RENIA decide what exactly we should be testing for - not
3118    // strings/labels
3119  12 if (// aa.isValidStruc() || aa.hasIcons ||
3120  12 (aa.label != null && Constants.SECONDARY_STRUCTURE_LABELS
3121    .containsKey(aa.label)))
3122    {
3123  9 ssPresent = true;
3124  9 break;
3125    }
3126    }
3127    }
3128  11 return ssPresent;
3129   
3130    }
3131   
 
3132  4 toggle public static Color getSecondaryStructureAnnotationColour(char symbol)
3133    {
3134   
3135  4 if (symbol == Constants.COIL)
3136    {
3137  1 return Color.gray;
3138    }
3139  3 if (symbol == Constants.SHEET)
3140    {
3141  1 return Color.green;
3142    }
3143  2 if (symbol == Constants.HELIX)
3144    {
3145  1 return Color.red;
3146    }
3147   
3148  1 return Color.white;
3149    }
3150   
 
3151  111449 toggle public static char findSSAnnotationForGivenSeqposition(
3152    AlignmentAnnotation aa, int seqPosition)
3153    {
3154  111446 char ss = '*';
3155   
3156  111434 if (aa != null)
3157    {
3158  111455 if (aa.getAnnotationForPosition(seqPosition) != null)
3159    {
3160  57623 Annotation a = aa.getAnnotationForPosition(seqPosition);
3161  57619 ss = a.secondaryStructure;
3162   
3163    // There is no representation for coil and it can be either ' ' or null.
3164  57638 if (ss == ' ' || ss == '-')
3165    {
3166  14767 ss = Constants.COIL;
3167    }
3168    }
3169    else
3170    {
3171  53832 ss = Constants.COIL;
3172    }
3173    }
3174   
3175  111420 return ss;
3176    }
3177   
 
3178  3872 toggle public static List<String> extractSSSourceInAlignmentAnnotation(
3179    AlignmentAnnotation[] annotations)
3180    {
3181   
3182  3872 List<String> ssSources = new ArrayList<>();
3183  3872 Set<String> addedSources = new HashSet<>(); // to keep track of added
3184    // sources
3185   
3186  3872 if (annotations == null)
3187    {
3188  12 return ssSources;
3189    }
3190   
3191  3860 for (AlignmentAnnotation aa : annotations)
3192    {
3193   
3194  20440 String ssSource = AlignmentAnnotationUtils.extractSSSourceFromAnnotationDescription(aa);
3195   
3196  20440 if (ssSource != null && !addedSources.contains(ssSource))
3197    {
3198  298 ssSources.add(ssSource);
3199  298 addedSources.add(ssSource);
3200    }
3201   
3202    }
3203  3860 Collections.sort(ssSources);
3204   
3205  3860 return ssSources;
3206   
3207    }
3208   
3209   
3210    /**
3211    * Returns secondary structure annotations from the given sequence for a specified source.
3212    * @param seq the sequence
3213    * @param ssSource the source label ("PDB", "JPred")
3214    * @return list of matching annotations
3215    */
 
3216  6317931 toggle public static List<AlignmentAnnotation> getAlignmentAnnotationForSource(
3217    SequenceI seq, String ssSource)
3218    {
3219   
3220  6317993 List<AlignmentAnnotation> ssAnnots = new ArrayList<AlignmentAnnotation>();
3221   
3222    // Iterate through all known secondary structure labels
3223  6318066 for (String ssLabel : Constants.SECONDARY_STRUCTURE_LABELS.keySet())
3224    {
3225   
3226  12633741 AlignmentAnnotation[] aa = seq.getAnnotation(ssLabel);
3227  12634062 if (aa != null)
3228    {
3229    // If source is "ALL", include all annotations for this label
3230  189210 if (Constants.SS_ALL_PROVIDERS.equals(ssSource))
3231    {
3232  72020 ssAnnots.addAll(Arrays.asList(aa));
3233  72020 continue;
3234    }
3235   
3236  117190 for (AlignmentAnnotation annot : aa)
3237    {
3238  138419 String ssSourceForAnnot = AlignmentAnnotationUtils.extractSSSourceFromAnnotationDescription(
3239    annot);
3240    // Add annotations from the matching source
3241  138419 if (ssSourceForAnnot != null && ssSource.equals(ssSourceForAnnot)
3242    && annot.isForDisplay())
3243    {
3244  47893 ssAnnots.add(annot);
3245    }
3246    }
3247    }
3248    }
3249  6319126 if (ssAnnots.size() > 0)
3250    {
3251  102148 return ssAnnots;
3252    }
3253   
3254  6217122 return null;
3255   
3256    }
3257   
 
3258  48 toggle public static Map<SequenceI, ArrayList<AlignmentAnnotation>> getSequenceAssociatedAlignmentAnnotations(
3259    AlignmentAnnotation[] alignAnnotList, String selectedSSSource)
3260    {
3261   
3262  48 Map<SequenceI, ArrayList<AlignmentAnnotation>> ssAlignmentAnnotationForSequences = new HashMap<SequenceI, ArrayList<AlignmentAnnotation>>();
3263  48 if (alignAnnotList == null || alignAnnotList.length == 0)
3264    {
3265  0 return ssAlignmentAnnotationForSequences;
3266    }
3267   
3268  48 for (AlignmentAnnotation aa : alignAnnotList)
3269    {
3270  251 if (aa.sequenceRef == null)
3271    {
3272  32 continue;
3273    }
3274   
3275  219 if (isSecondaryStructureFrom(selectedSSSource, aa))
3276    {
3277  78 ssAlignmentAnnotationForSequences
3278    .computeIfAbsent(aa.sequenceRef.getDatasetSequence(),
3279    k -> new ArrayList<>())
3280    .add(aa);
3281    }
3282    }
3283   
3284  48 return ssAlignmentAnnotationForSequences;
3285   
3286    }
3287   
3288    /**
3289    * param alignAnnotation
3290    * @param selectedSSSource
3291    * @return any alignment annotation that matches the given source
3292    */
 
3293  0 toggle public static List<AlignmentAnnotation> getSecondaryStructureAnnotionFor(List<AlignmentAnnotation> alignAnnotation, String selectedSSSource)
3294    {
3295  0 ArrayList<AlignmentAnnotation> annForSource = new ArrayList();
3296  0 for (AlignmentAnnotation alan:alignAnnotation)
3297    {
3298  0 if (isSecondaryStructureFrom(selectedSSSource, alan)) {
3299  0 annForSource.add(alan);
3300    }
3301    }
3302  0 return annForSource;
3303    }
3304   
3305    /**
3306    *
3307    * @param selectedSSSource
3308    * @param aa
3309    * @return true if aa is from a provider or all providers as specified by
3310    * selectedSSSource
3311    */
 
3312  219 toggle public static boolean isSecondaryStructureFrom(String selectedSSSource,
3313    AlignmentAnnotation aa)
3314    {
3315   
3316  219 for (String label : Constants.SECONDARY_STRUCTURE_LABELS.keySet())
3317    {
3318   
3319  376 if (label.equals(aa.label))
3320    {
3321   
3322  170 if (selectedSSSource.equals(Constants.SS_ALL_PROVIDERS))
3323    {
3324  38 return true;
3325    }
3326  132 String ssSource = AlignmentAnnotationUtils
3327    .extractSSSourceFromAnnotationDescription(aa);
3328  132 if (ssSource != null && ssSource.equals(selectedSSSource))
3329    {
3330  40 return true;
3331    }
3332    }
3333    }
3334  141 return false;
3335    }
3336   
3337    // Method to get the key for a given provider value
 
3338  0 toggle public static String getSecondaryStructureProviderKey(
3339    String providerValue)
3340    {
3341  0 for (Map.Entry<String, String> entry : Constants.STRUCTURE_PROVIDERS
3342    .entrySet())
3343    {
3344  0 if (entry.getValue().equals(providerValue))
3345    {
3346  0 return entry.getKey(); // Return the key (abbreviation) for the matching
3347    // provider value
3348    }
3349    }
3350  0 return null; // Return null if no match is found
3351    }
3352   
 
3353  0 toggle public static String reduceLabelLength(String label)
3354    {
3355    // Split the input by " | "
3356  0 StringBuilder reducedLabel = new StringBuilder();
3357  0 for (String part:label.split(" \\| ")) {
3358  0 String nextLabel=part;
3359  0 for (Entry<String,String> provider:Constants.STRUCTURE_PROVIDERS.entrySet()) {
3360  0 if (part.equals(provider.getValue())) {
3361  0 nextLabel = provider.getKey();
3362    }
3363    }
3364  0 if (reducedLabel.length()>0)
3365    {
3366  0 reducedLabel.append(" | ");
3367    }
3368  0 reducedLabel.append(nextLabel);
3369    }
3370  0 return reducedLabel.toString();
3371    }
3372   
 
3373  596 toggle public static HashMap<String, Color> assignColorsForSecondaryStructureProviders(
3374    List<String> labels)
3375    {
3376    // Loop through each unique label and add it to the map with a colour.
3377    // for (String label : uniqueLabels)
3378   
3379  596 HashMap<String, Color> secondaryStructureProviderColorMap = new HashMap<String, Color>();
3380  596 for (String label : labels)
3381    {
3382    // Generate or retrieve a colour for the label.
3383  544 String name = label.toUpperCase(Locale.ROOT).trim();
3384  544 secondaryStructureProviderColorMap.put(name,
3385    ColorUtils.getColourFromNameAndScheme(name, "NONE"));
3386    }
3387   
3388  596 return secondaryStructureProviderColorMap;
3389    }
3390   
 
3391  6 toggle public static int computeMaxShifts(SequenceI[] seqs, ShiftList inserts)
3392    {
3393   
3394  6 int oldwidth = 0;
3395  6 int p = 0, lastP = 0, lastIns = 0;
3396  6 for (SequenceI seq : seqs)
3397    {
3398  140 char[] sqs = seq.getSequence();
3399    // System.out.println(seq.getSequenceAsString());
3400  140 if (oldwidth < sqs.length)
3401    {
3402  13 oldwidth = sqs.length;
3403    }
3404  140 p = 0;
3405  140 lastP = 0;
3406  140 lastIns = 0;
3407  140 int modelOffset = 0; // shift between index and insertions encountered
3408  140 do
3409    {
3410  53877 if (sqs[p] >= 'a' && sqs[p] <= 'z')
3411    {
3412  2015 if (lastIns == 0)
3413    {
3414  589 lastP = (modelOffset + p) * 2;
3415    }
3416  2015 lastIns++;
3417    }
3418    else
3419    {
3420  51862 if (lastIns > 0)
3421    {
3422  586 inserts.extendShift(lastP, lastIns);
3423  586 modelOffset -= lastIns;
3424    }
3425  51862 lastIns = 0;
3426    }
3427  53877 } while (++p < sqs.length);
3428  140 if (lastIns > 0)
3429    {
3430  3 inserts.extendShift(lastP, lastIns);
3431    }
3432    }
3433  6 return oldwidth;
3434    }
3435   
3436    /**
3437    * quick and dirty lowercase - to - insert expansion
3438    *
3439    * @param seqs
3440    * - these will be direcly modified
3441    * @return message indicating how many gaps were inserted.
3442    */
 
3443  4 toggle public static String a3mToMSA(SequenceI[] seqs)
3444    {
3445  4 String response = "";
3446  4 ShiftList inserts = new ShiftList();
3447  4 int oldwidth = computeMaxShifts(seqs, inserts);
3448   
3449  4 int newwidth = insertShifts(seqs,inserts);
3450  4 if (oldwidth != newwidth)
3451    {
3452  4 response = "Added " + (newwidth - oldwidth) + " inserts.";
3453    }
3454  4 return response;
3455    }
3456   
 
3457  5 toggle public static int insertShifts(SequenceI[] seqs, ShiftList inserts)
3458    {
3459  5 int newwidth=0;
3460  5 for (SequenceI seq : seqs)
3461    {
3462  80 boolean changed=false;
3463  80 int posShift = 0, totalShift = 0;
3464  80 for (int[] shift : inserts.getShifts())
3465    {
3466    // locate this shift in the totalShift'ed sequence
3467  5912 posShift = (shift[0] / 2);
3468  5912 int len = shift[1];
3469   
3470    // account for any insertions in this sequence
3471  5912 int p = posShift + totalShift;
3472  7021 while (len > 0 && p < seq.getLength()
3473    && (seq.getCharAt(p) >= 'a' && seq.getCharAt(p) <= 'z'))
3474    {
3475  1109 p++;
3476  1109 len--;
3477    }
3478    // and mark any remaining after the insert
3479  5912 if (len > 0)
3480    {
3481  5670 changed=true;
3482  5670 seq.doInsert(p, len, '-');
3483    }
3484  5912 totalShift += shift[1]; // update shifted positions in this sequence
3485    }
3486  80 if (changed)
3487    {
3488  78 seq.sequenceChanged();
3489    }
3490  80 newwidth = (seq.getLength() > newwidth) ? seq.getLength() : newwidth;
3491    }
3492  5 return newwidth;
3493    }
3494    }