Clover icon

Coverage Report

  1. Project Clover database Wed Sep 17 2025 10:52:37 BST
  2. Package jalview.analysis

File AAFrequency.java

 

Coverage histogram

../../img/srcFileCovDistChart9.png
13% of files have more coverage

Code metrics

162
332
19
1
1,020
652
121
0.36
17.47
19
6.37

Classes

Class Line # Actions
AAFrequency 60 332 121
0.8693957386.9%
 

Contributing tests

This file is covered by 235 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.analysis;
22   
23    import jalview.datamodel.AlignedCodonFrame;
24    import jalview.datamodel.AlignmentAnnotation;
25    import jalview.datamodel.AlignmentI;
26    import jalview.datamodel.AnnotatedCollectionI;
27    import jalview.datamodel.Annotation;
28    import jalview.datamodel.Profile;
29    import jalview.datamodel.ProfileI;
30    import jalview.datamodel.Profiles;
31    import jalview.datamodel.ProfilesI;
32    import jalview.datamodel.ResidueCount;
33    import jalview.datamodel.ResidueCount.SymbolCounts;
34    import jalview.datamodel.SecondaryStructureCount;
35    import jalview.datamodel.SequenceGroup;
36    import jalview.datamodel.SequenceI;
37    import jalview.ext.android.SparseIntArray;
38    import jalview.util.Comparison;
39    import jalview.util.Constants;
40    import jalview.util.Format;
41    import jalview.util.MappingUtils;
42    import jalview.util.QuickSort;
43   
44    import java.awt.Color;
45    import java.util.ArrayList;
46    import java.util.Arrays;
47    import java.util.Hashtable;
48    import java.util.List;
49    import java.util.Map;
50   
51    /**
52    * Takes in a vector or array of sequences and column start and column end and
53    * returns a new Hashtable[] of size maxSeqLength, if Hashtable not supplied.
54    * This class is used extensively in calculating alignment colourschemes that
55    * depend on the amount of conservation in each alignment column.
56    *
57    * @author $author$
58    * @version $Revision$
59    */
 
60    public class AAFrequency
61    {
62    public static final String PROFILE = "P";
63   
64    /*
65    * Quick look-up of String value of char 'A' to 'Z'
66    */
67    private static final String[] CHARS = new String['Z' - 'A' + 1];
68   
 
69  50 toggle static
70    {
71  1350 for (char c = 'A'; c <= 'Z'; c++)
72    {
73  1300 CHARS[c - 'A'] = String.valueOf(c);
74    }
75    }
76   
 
77  3 toggle public static final ProfilesI calculate(List<SequenceI> list, int start,
78    int end)
79    {
80  3 return calculate(list, start, end, false);
81    }
82   
 
83  462 toggle public static final ProfilesI calculate(List<SequenceI> sequences,
84    int start, int end, boolean profile)
85    {
86  462 SequenceI[] seqs = new SequenceI[sequences.size()];
87  462 int width = 0;
88  462 synchronized (sequences)
89    {
90  4036 for (int i = 0; i < sequences.size(); i++)
91    {
92  3574 seqs[i] = sequences.get(i);
93  3574 int length = seqs[i].getLength();
94  3574 if (length > width)
95    {
96  461 width = length;
97    }
98    }
99   
100  462 if (end >= width)
101    {
102  255 end = width;
103    }
104   
105  462 ProfilesI reply = calculate(seqs, width, start, end, profile);
106  462 return reply;
107    }
108    }
109   
110    /**
111    * Calculate the consensus symbol(s) for each column in the given range.
112    *
113    * @param sequences
114    * @param width
115    * the full width of the alignment
116    * @param start
117    * start column (inclusive, base zero)
118    * @param end
119    * end column (exclusive)
120    * @param saveFullProfile
121    * if true, store all symbol counts
122    */
 
123  1796 toggle public static final ProfilesI calculate(final SequenceI[] sequences,
124    int width, int start, int end, boolean saveFullProfile)
125    {
126    // long now = System.currentTimeMillis();
127  1796 int seqCount = sequences.length;
128  1796 boolean nucleotide = false;
129  1796 int nucleotideCount = 0;
130  1796 int peptideCount = 0;
131   
132  1796 ProfileI[] result = new ProfileI[width];
133   
134  709624 for (int column = start; column < end; column++)
135    {
136    /*
137    * Apply a heuristic to detect nucleotide data (which can
138    * be counted in more compact arrays); here we test for
139    * more than 90% nucleotide; recheck every 10 columns in case
140    * of misleading data e.g. highly conserved Alanine in peptide!
141    * Mistakenly guessing nucleotide has a small performance cost,
142    * as it will result in counting in sparse arrays.
143    * Mistakenly guessing peptide has a small space cost,
144    * as it will use a larger than necessary array to hold counts.
145    */
146  706971 if (nucleotideCount > 100 && column % 10 == 0)
147    {
148  62927 nucleotide = (9 * peptideCount < nucleotideCount);
149    }
150  708214 ResidueCount residueCounts = new ResidueCount(nucleotide);
151   
152  11841214 for (int row = 0; row < seqCount; row++)
153    {
154  11137649 if (sequences[row] == null)
155    {
156  0 jalview.bin.Console.errPrintln(
157    "WARNING: Consensus skipping null sequence - possible race condition.");
158  0 continue;
159    }
160  11063340 if (sequences[row].getLength() > column)
161    {
162  11047149 char c = sequences[row].getCharAt(column);
163  10979052 residueCounts.add(c);
164  11164111 if (Comparison.isNucleotide(c))
165    {
166  1143569 nucleotideCount++;
167    }
168  10016744 else if (!Comparison.isGap(c))
169    {
170  1179785 peptideCount++;
171    }
172    }
173    else
174    {
175    /*
176    * count a gap if the sequence doesn't reach this column
177    */
178  42501 residueCounts.addGap();
179    }
180    }
181   
182  707722 int maxCount = residueCounts.getModalCount();
183  707645 String maxResidue = residueCounts.getResiduesForCount(maxCount);
184  707816 int gapCount = residueCounts.getGapCount();
185  708000 ProfileI profile = new Profile(seqCount, gapCount, maxCount,
186    maxResidue);
187   
188  708524 if (saveFullProfile)
189    {
190  684647 profile.setCounts(residueCounts);
191    }
192   
193  708261 result[column] = profile;
194    }
195  1796 return new Profiles(seqCount, result);
196    // long elapsed = System.currentTimeMillis() - now;
197    // jalview.bin.Console.outPrintln(elapsed);
198    }
199   
 
200  0 toggle public static final ProfilesI calculateSS(List<SequenceI> list, int start,
201    int end, String source)
202    {
203  0 return calculateSS(list, start, end, false, source);
204    }
 
205  0 toggle public static final ProfilesI calculateSS(List<SequenceI> sequences,
206    int start, int end, boolean profile, String source)
207    {
208  0 return calculateSS(sequences, start, end, profile, source,null);
209    }
 
210  499 toggle public static final ProfilesI calculateSS(List<SequenceI> sequences,
211    int start, int end, boolean profile, String source,
212    SequenceGroup sequenceGroup)
213    {
214  499 SequenceI[] seqs = new SequenceI[sequences.size()];
215  499 int width = 0;
216  499 synchronized (sequences)
217    {
218  4259 for (int i = 0; i < sequences.size(); i++)
219    {
220  3760 seqs[i] = sequences.get(i);
221  3760 int length = seqs[i].getLength();
222  3760 if (length > width)
223    {
224  498 width = length;
225    }
226    }
227   
228  499 if (end >= width)
229    {
230  295 end = width;
231    }
232   
233  499 ProfilesI reply = calculateSS(seqs, width, start, end, profile,
234    source,sequenceGroup);
235  499 return reply;
236    }
237    }
238   
239   
240    /**
241    * TODO - REFACTOR TO ANNOTATEDCOLLECTION!
242    * @param sequences
243    * @param width
244    * @param start
245    * @param end
246    * @param saveFullProfile
247    * @param source - if null, will be treated as 'All'
248    * @param sequenceGroup - if null all secondary structure annotations matching source on sequence will be considered
249    * @return
250    */
 
251  1971 toggle public static final ProfilesI calculateSS(final SequenceI[] sequences,
252    int width, int start, int end, boolean saveFullProfile,
253    String source, SequenceGroup sequenceGroup)
254    {
255   
256   
257  1971 int seqCount = sequences.length;
258   
259  1971 ProfileI[] result = new ProfileI[width];
260  1971 int maxSSannotcount=0,maxSeqWithSScount=0;
261  1971 if (source==null || "".equals(source)) {
262  2 source = Constants.SS_ALL_PROVIDERS;
263    }
264  1971 Map<SequenceI, ArrayList<AlignmentAnnotation>> sq_group_by_source = null;
265  1971 if (sequenceGroup!=null && sequenceGroup.getAnnotationsFromTree().size()>0 && source!=null)
266    {
267  40 sq_group_by_source = AlignmentUtils.getSequenceAssociatedAlignmentAnnotations(sequenceGroup.getAnnotationsFromTree().toArray(new AlignmentAnnotation[0]), source);
268    }
269  755945 for (int column = start; column < end; column++)
270    {
271   
272  754049 int seqWithSSCount = 0;
273  754083 int ssCount = 0;
274   
275  754123 SecondaryStructureCount ssCounts = new SecondaryStructureCount();
276   
277  12124763 for (int row = 0; row < seqCount; row++)
278    {
279  11611184 if (sequences[row] == null)
280    {
281  0 jalview.bin.Console.errPrintln(
282    "WARNING: Consensus skipping null sequence - possible race condition.");
283  0 continue;
284    }
285   
286  11641876 char c = sequences[row].getCharAt(column);
287   
288  11512798 List<AlignmentAnnotation> annots;
289   
290  11541128 if (sq_group_by_source==null) {
291  11542941 annots = AlignmentUtils.getAlignmentAnnotationForSource(sequences[row], source);
292    } else {
293  16018 annots = sq_group_by_source.get(sequences[row]);
294  16018 if (annots==null)
295    {
296  16018 annots = sq_group_by_source.get(sequences[row].getDatasetSequence());
297    }
298    }
299   
300  11614135 if(annots!=null) {
301  191161 if (annots.size()>0) {
302  191153 seqWithSSCount++;
303    }
304  191147 for (AlignmentAnnotation aa : annots)
305    {
306  230974 if (aa != null)
307    {
308  230983 ssCount++;
309    }
310   
311  230988 if (sequences[row].getLength() > column && !Comparison.isGap(c)
312    && aa != null)
313    {
314   
315  171060 int seqPosition = sequences[row].findPosition(column);
316   
317  171197 char ss = AlignmentUtils
318    .findSSAnnotationForGivenSeqposition(aa, seqPosition);
319  171164 if (ss == '*')
320    {
321  0 continue;
322    }
323  171149 ssCounts.add(ss);
324    }
325  59917 else if (Comparison.isGap(c) && aa != null)
326    {
327  59917 ssCounts.addGap();
328    }
329    }
330    }
331    }
332   
333  753867 int maxSSCount = ssCounts.getModalCount();
334  753747 String maxSS = ssCounts.getSSForCount(maxSSCount);
335  753754 int gapCount = ssCounts.getGapCount();
336  753737 ProfileI profile = new Profile(maxSS, ssCount, gapCount, maxSSCount,
337    seqWithSSCount);
338  753837 maxSeqWithSScount=Math.max(maxSeqWithSScount, seqWithSSCount);
339  753973 if (saveFullProfile)
340    {
341  730062 profile.setSSCounts(ssCounts);
342    }
343   
344  753819 result[column] = profile;
345  754111 maxSSannotcount=Math.max(maxSSannotcount, ssCount);
346    }
347  1971 return new Profiles(maxSSannotcount,result);
348    }
349   
350    /**
351    * Make an estimate of the profile size we are going to compute i.e. how many
352    * different characters may be present in it. Overestimating has a cost of
353    * using more memory than necessary. Underestimating has a cost of needing to
354    * extend the SparseIntArray holding the profile counts.
355    *
356    * @param profileSizes
357    * counts of sizes of profiles so far encountered
358    * @return
359    */
 
360  0 toggle static int estimateProfileSize(SparseIntArray profileSizes)
361    {
362  0 if (profileSizes.size() == 0)
363    {
364  0 return 4;
365    }
366   
367    /*
368    * could do a statistical heuristic here e.g. 75%ile
369    * for now just return the largest value
370    */
371  0 return profileSizes.keyAt(profileSizes.size() - 1);
372    }
373   
374    /**
375    * Derive the consensus annotations to be added to the alignment for display.
376    * This does not recompute the raw data, but may be called on a change in
377    * display options, such as 'ignore gaps', which may in turn result in a
378    * change in the derived values.
379    *
380    * @param consensus
381    * the annotation row to add annotations to
382    * @param profiles
383    * the source consensus data
384    * @param startCol
385    * start column (inclusive)
386    * @param endCol
387    * end column (exclusive)
388    * @param ignoreGaps
389    * if true, normalise residue percentages ignoring gaps
390    * @param showSequenceLogo
391    * if true include all consensus symbols, else just show modal
392    * residue
393    * @param nseq
394    * number of sequences
395    */
 
396  1467 toggle public static void completeConsensus(AlignmentAnnotation consensus,
397    ProfilesI profiles, int startCol, int endCol, boolean ignoreGaps,
398    boolean showSequenceLogo, long nseq)
399    {
400    // long now = System.currentTimeMillis();
401  1467 if (consensus == null || consensus.annotations == null
402    || consensus.annotations.length < endCol)
403    {
404    /*
405    * called with a bad alignment annotation row
406    * wait for it to be initialised properly
407    */
408  0 return;
409    }
410   
411  478755 for (int i = startCol; i < endCol; i++)
412    {
413  477254 ProfileI profile = profiles.get(i);
414  477258 if (profile == null)
415    {
416    /*
417    * happens if sequences calculated over were
418    * shorter than alignment width
419    */
420  0 consensus.annotations[i] = null;
421  0 return;
422    }
423   
424  477251 final int dp = getPercentageDp(nseq);
425   
426  477252 float value = profile.getPercentageIdentity(ignoreGaps);
427   
428  477296 String description = getTooltip(profile, value, showSequenceLogo,
429    ignoreGaps, dp);
430   
431  477206 String modalResidue = profile.getModalResidue();
432  477225 if ("".equals(modalResidue))
433    {
434  9674 modalResidue = "-";
435    }
436  467538 else if (modalResidue.length() > 1)
437    {
438  9800 modalResidue = "+";
439    }
440  477200 consensus.annotations[i] = new Annotation(modalResidue, description,
441    ' ', value);
442    }
443    // long elapsed = System.currentTimeMillis() - now;
444    // jalview.bin.Console.outPrintln(-elapsed);
445    }
446   
 
447  1382 toggle public static void completeSSConsensus(AlignmentAnnotation ssConsensus,
448    ProfilesI profiles, int startCol, int endCol, boolean ignoreGaps,
449    boolean showSequenceLogo, long nseq)
450    {
451    // long now = System.currentTimeMillis();
452  1382 if (ssConsensus == null || ssConsensus.annotations == null
453    || ssConsensus.annotations.length < endCol)
454    {
455    /*
456    * called with a bad alignment annotation row
457    * wait for it to be initialised properly
458    */
459  4 return;
460    }
461   
462  640302 for (int i = startCol; i < endCol; i++)
463    {
464  638924 ProfileI profile = profiles.get(i);
465  638923 if (profile == null)
466    {
467    /*
468    * happens if sequences calculated over were
469    * shorter than alignment width
470    */
471  0 ssConsensus.annotations[i] = null;
472  0 return;
473    }
474   
475  638923 if (ssConsensus.getNoOfSequencesIncluded() < 0)
476    {
477  501 ssConsensus.setNoOfSequencesIncluded(profile.getSeqWithSSCount());
478  501 ssConsensus.setNoOfTracksIncluded(profiles.getCount());
479    }
480   
481  638924 final int dp = getPercentageDp(nseq);
482   
483  638924 float value = profile.getSSPercentageIdentity(ignoreGaps);
484   
485  638923 String description = getSSTooltip(profile, value, showSequenceLogo,
486    ignoreGaps, dp);
487   
488  638924 String modalSS = profile.getModalSS();
489  638924 if ("".equals(modalSS))
490    {
491  594724 modalSS = "-";
492    }
493  44200 else if (modalSS.length() > 1)
494    {
495  1683 modalSS = "+";
496    }
497  638922 ssConsensus.annotations[i] = new Annotation(modalSS, description,
498    ' ', value);
499    }
500   
501    //Hide consensus with no data to display
502  1378 if(ssConsensus.getNoOfSequencesIncluded()<1)
503  1206 ssConsensus.visible = false;
504   
505    // long elapsed = System.currentTimeMillis() - now;
506    // jalview.bin.Console.outPrintln(-elapsed);
507    }
508   
509    /**
510    * Derive the gap count annotation row.
511    *
512    * @param gaprow
513    * the annotation row to add annotations to
514    * @param profiles
515    * the source consensus data
516    * @param startCol
517    * start column (inclusive)
518    * @param endCol
519    * end column (exclusive)
520    */
 
521  1333 toggle public static void completeGapAnnot(AlignmentAnnotation gaprow,
522    ProfilesI profiles, int startCol, int endCol, long nseq)
523    {
524  1333 if (gaprow == null || gaprow.annotations == null
525    || gaprow.annotations.length < endCol)
526    {
527    /*
528    * called with a bad alignment annotation row
529    * wait for it to be initialised properly
530    */
531  0 return;
532    }
533    // always set ranges again
534  1333 gaprow.graphMax = nseq;
535  1333 gaprow.graphMin = 0;
536  1333 double scale = 0.8 / nseq;
537  201502 for (int i = startCol; i < endCol; i++)
538    {
539  200169 ProfileI profile = profiles.get(i);
540  200169 if (profile == null)
541    {
542    /*
543    * happens if sequences calculated over were
544    * shorter than alignment width
545    */
546  0 gaprow.annotations[i] = null;
547  0 return;
548    }
549   
550  200169 final int gapped = profile.getNonGapped();
551   
552  200169 String description = "" + gapped;
553   
554  200169 gaprow.annotations[i] = new Annotation("", description, '\0', gapped,
555    jalview.util.ColorUtils.bleachColour(Color.DARK_GRAY,
556    (float) scale * gapped));
557    }
558    }
559   
560    /**
561    * Returns a tooltip showing either
562    * <ul>
563    * <li>the full profile (percentages of all residues present), if
564    * showSequenceLogo is true, or</li>
565    * <li>just the modal (most common) residue(s), if showSequenceLogo is
566    * false</li>
567    * </ul>
568    * Percentages are as a fraction of all sequence, or only ungapped sequences
569    * if ignoreGaps is true.
570    *
571    * @param profile
572    * @param pid
573    * @param showSequenceLogo
574    * @param ignoreGaps
575    * @param dp
576    * the number of decimal places to format percentages to
577    * @return
578    */
 
579  477280 toggle static String getTooltip(ProfileI profile, float pid,
580    boolean showSequenceLogo, boolean ignoreGaps, int dp)
581    {
582  477278 ResidueCount counts = profile.getCounts();
583   
584  477289 String description = null;
585  477280 if (counts != null && showSequenceLogo)
586    {
587  79276 int normaliseBy = ignoreGaps ? profile.getNonGapped()
588    : profile.getHeight();
589  79274 description = counts.getTooltip(normaliseBy, dp);
590    }
591    else
592    {
593  397992 StringBuilder sb = new StringBuilder(64);
594  397973 String maxRes = profile.getModalResidue();
595  397943 if (maxRes.length() > 1)
596    {
597  4236 sb.append("[").append(maxRes).append("]");
598    }
599    else
600    {
601  393677 sb.append(maxRes);
602    }
603  397906 if (maxRes.length() > 0)
604    {
605  392378 sb.append(" ");
606  392400 Format.appendPercentage(sb, pid, dp);
607  392534 sb.append("%");
608    }
609  398023 description = sb.toString();
610    }
611  477241 return description;
612    }
613   
 
614  638923 toggle static String getSSTooltip(ProfileI profile, float pid,
615    boolean showSequenceLogo, boolean ignoreGaps, int dp)
616    {
617  638924 SecondaryStructureCount counts = profile.getSSCounts();
618   
619  638924 String description = null;
620  638924 if (counts != null && showSequenceLogo)
621    {
622  74781 int normaliseBy = ignoreGaps ? profile.getNonGapped()
623    : profile.getHeight();
624  74779 description = counts.getTooltip(normaliseBy, dp);
625    }
626    else
627    {
628  564143 StringBuilder sb = new StringBuilder(64);
629  564143 String maxSS = profile.getModalSS();
630  564143 if (maxSS.length() > 1)
631    {
632  1053 sb.append("[").append(maxSS).append("]");
633    }
634    else
635    {
636  563090 sb.append(maxSS);
637    }
638  564143 if (maxSS.length() > 0)
639    {
640  27398 sb.append(" ");
641  27398 Format.appendPercentage(sb, pid, dp);
642  27398 sb.append("%");
643    }
644  564143 description = sb.toString();
645    }
646  638924 return description;
647    }
648   
649    /**
650    * Returns the sorted profile for the given consensus data. The returned array
651    * contains
652    *
653    * <pre>
654    * [profileType, numberOfValues, totalPercent, charValue1, percentage1, charValue2, percentage2, ...]
655    * in descending order of percentage value
656    * </pre>
657    *
658    * @param profile
659    * the data object from which to extract and sort values
660    * @param ignoreGaps
661    * if true, only non-gapped values are included in percentage
662    * calculations
663    * @return
664    */
 
665  86766 toggle public static int[] extractProfile(ProfileI profile, boolean ignoreGaps)
666    {
667  86766 char[] symbols;
668  86766 int[] values;
669   
670  86766 if (profile.getCounts() != null)
671    {
672  86766 ResidueCount counts = profile.getCounts();
673  86766 SymbolCounts symbolCounts = counts.getSymbolCounts();
674  86766 symbols = symbolCounts.symbols;
675  86766 values = symbolCounts.values;
676   
677    }
678  0 else if (profile.getSSCounts() != null)
679    {
680  0 SecondaryStructureCount counts = profile.getSSCounts();
681    // to do
682  0 SecondaryStructureCount.SymbolCounts symbolCounts = counts
683    .getSymbolCounts();
684  0 symbols = symbolCounts.symbols;
685  0 values = symbolCounts.values;
686    }
687    else
688    {
689  0 return null;
690    }
691   
692  86766 QuickSort.sort(values, symbols);
693  86766 int totalPercentage = 0;
694  86766 final int divisor = ignoreGaps ? profile.getNonGapped()
695    : profile.getHeight();
696   
697    /*
698    * traverse the arrays in reverse order (highest counts first)
699    */
700  86766 int[] result = new int[3 + 2 * symbols.length];
701  86766 int nextArrayPos = 3;
702  86766 int nonZeroCount = 0;
703   
704  257198 for (int i = symbols.length - 1; i >= 0; i--)
705    {
706  170434 int theChar = symbols[i];
707  170434 int charCount = values[i];
708  170434 final int percentage = (charCount * 100) / divisor;
709  170434 if (percentage == 0)
710    {
711    /*
712    * this count (and any remaining) round down to 0% - discard
713    */
714  2 break;
715    }
716  170432 nonZeroCount++;
717  170432 result[nextArrayPos++] = theChar;
718  170432 result[nextArrayPos++] = percentage;
719  170432 totalPercentage += percentage;
720    }
721   
722    /*
723    * truncate array if any zero values were discarded
724    */
725  86766 if (nonZeroCount < symbols.length)
726    {
727  2 int[] tmp = new int[3 + 2 * nonZeroCount];
728  2 System.arraycopy(result, 0, tmp, 0, tmp.length);
729  2 result = tmp;
730    }
731   
732    /*
733    * fill in 'header' values
734    */
735  86766 result[0] = AlignmentAnnotation.SEQUENCE_PROFILE;
736  86766 result[1] = nonZeroCount;
737  86766 result[2] = totalPercentage;
738   
739  86766 return result;
740    }
741   
742    /**
743    * Extract a sorted extract of cDNA codon profile data. The returned array
744    * contains
745    *
746    * <pre>
747    * [profileType, numberOfValues, totalPercentage, charValue1, percentage1, charValue2, percentage2, ...]
748    * in descending order of percentage value, where the character values encode codon triplets
749    * </pre>
750    *
751    * @param hashtable
752    * @return
753    */
 
754  2 toggle public static int[] extractCdnaProfile(
755    Hashtable<String, Object> hashtable, boolean ignoreGaps)
756    {
757    // this holds #seqs, #ungapped, and then codon count, indexed by encoded
758    // codon triplet
759  2 int[] codonCounts = (int[]) hashtable.get(PROFILE);
760  2 int[] sortedCounts = new int[codonCounts.length - 2];
761  2 System.arraycopy(codonCounts, 2, sortedCounts, 0,
762    codonCounts.length - 2);
763   
764  2 int[] result = new int[3 + 2 * sortedCounts.length];
765    // first value is just the type of profile data
766  2 result[0] = AlignmentAnnotation.CDNA_PROFILE;
767   
768  2 char[] codons = new char[sortedCounts.length];
769  130 for (int i = 0; i < codons.length; i++)
770    {
771  128 codons[i] = (char) i;
772    }
773  2 QuickSort.sort(sortedCounts, codons);
774  2 int totalPercentage = 0;
775  2 int distinctValuesCount = 0;
776  2 int j = 3;
777  2 int divisor = ignoreGaps ? codonCounts[1] : codonCounts[0];
778  8 for (int i = codons.length - 1; i >= 0; i--)
779    {
780  8 final int codonCount = sortedCounts[i];
781  8 if (codonCount == 0)
782    {
783  0 break; // nothing else of interest here
784    }
785  8 final int percentage = codonCount * 100 / divisor;
786  8 if (percentage == 0)
787    {
788    /*
789    * this (and any remaining) values rounded down to 0 - discard
790    */
791  2 break;
792    }
793  6 distinctValuesCount++;
794  6 result[j++] = codons[i];
795  6 result[j++] = percentage;
796  6 totalPercentage += percentage;
797    }
798  2 result[2] = totalPercentage;
799   
800    /*
801    * Just return the non-zero values
802    */
803    // todo next value is redundant if we limit the array to non-zero counts
804  2 result[1] = distinctValuesCount;
805  2 return Arrays.copyOfRange(result, 0, j);
806    }
807   
808    /**
809    * Compute a consensus for the cDNA coding for a protein alignment.
810    *
811    * @param alignment
812    * the protein alignment (which should hold mappings to cDNA
813    * sequences)
814    * @param hconsensus
815    * the consensus data stores to be populated (one per column)
816    */
 
817  5 toggle public static void calculateCdna(AlignmentI alignment,
818    Hashtable<String, Object>[] hconsensus)
819    {
820  5 final char gapCharacter = alignment.getGapCharacter();
821  5 List<AlignedCodonFrame> mappings = alignment.getCodonFrames();
822  5 if (mappings == null || mappings.isEmpty())
823    {
824  0 return;
825    }
826   
827  5 int cols = alignment.getWidth();
828  1955 for (int col = 0; col < cols; col++)
829    {
830    // todo would prefer a Java bean for consensus data
831  1950 Hashtable<String, Object> columnHash = new Hashtable<>();
832    // #seqs, #ungapped seqs, counts indexed by (codon encoded + 1)
833  1950 int[] codonCounts = new int[66];
834  1950 codonCounts[0] = alignment.getSequences().size();
835  1950 int ungappedCount = 0;
836  1950 for (SequenceI seq : alignment.getSequences())
837    {
838  20922 if (seq.getCharAt(col) == gapCharacter)
839    {
840  10174 continue;
841    }
842  10748 List<char[]> codons = MappingUtils.findCodonsFor(seq, col,
843    mappings);
844  10748 for (char[] codon : codons)
845    {
846  10658 int codonEncoded = CodingUtils.encodeCodon(codon);
847  10658 if (codonEncoded >= 0)
848    {
849  10658 codonCounts[codonEncoded + 2]++;
850  10658 ungappedCount++;
851  10658 break;
852    }
853    }
854    }
855  1950 codonCounts[1] = ungappedCount;
856    // todo: sort values here, save counts and codons?
857  1950 columnHash.put(PROFILE, codonCounts);
858  1950 hconsensus[col] = columnHash;
859    }
860    }
861   
862    /**
863    * Derive displayable cDNA consensus annotation from computed consensus data.
864    *
865    * @param consensusAnnotation
866    * the annotation row to be populated for display
867    * @param consensusData
868    * the computed consensus data
869    * @param showProfileLogo
870    * if true show all symbols present at each position, else only the
871    * modal value
872    * @param nseqs
873    * the number of sequences in the alignment
874    */
 
875  5 toggle public static void completeCdnaConsensus(
876    AlignmentAnnotation consensusAnnotation,
877    Hashtable<String, Object>[] consensusData,
878    boolean showProfileLogo, int nseqs)
879    {
880  5 if (consensusAnnotation == null
881    || consensusAnnotation.annotations == null
882    || consensusAnnotation.annotations.length < consensusData.length)
883    {
884    // called with a bad alignment annotation row - wait for it to be
885    // initialised properly
886  0 return;
887    }
888   
889    // ensure codon triplet scales with font size
890  5 consensusAnnotation.scaleColLabel = true;
891  1955 for (int col = 0; col < consensusData.length; col++)
892    {
893  1950 Hashtable<String, Object> hci = consensusData[col];
894  1950 if (hci == null)
895    {
896    // gapped protein column?
897  0 continue;
898    }
899    // array holds #seqs, #ungapped, then codon counts indexed by codon
900  1950 final int[] codonCounts = (int[]) hci.get(PROFILE);
901  1950 int totalCount = 0;
902   
903    /*
904    * First pass - get total count and find the highest
905    */
906  1950 final char[] codons = new char[codonCounts.length - 2];
907  126750 for (int j = 2; j < codonCounts.length; j++)
908    {
909  124800 final int codonCount = codonCounts[j];
910  124800 codons[j - 2] = (char) (j - 2);
911  124800 totalCount += codonCount;
912    }
913   
914    /*
915    * Sort array of encoded codons by count ascending - so the modal value
916    * goes to the end; start by copying the count (dropping the first value)
917    */
918  1950 int[] sortedCodonCounts = new int[codonCounts.length - 2];
919  1950 System.arraycopy(codonCounts, 2, sortedCodonCounts, 0,
920    codonCounts.length - 2);
921  1950 QuickSort.sort(sortedCodonCounts, codons);
922   
923  1950 int modalCodonEncoded = codons[codons.length - 1];
924  1950 int modalCodonCount = sortedCodonCounts[codons.length - 1];
925  1950 String modalCodon = String
926    .valueOf(CodingUtils.decodeCodon(modalCodonEncoded));
927  1950 if (sortedCodonCounts.length > 1 && sortedCodonCounts[codons.length
928    - 2] == sortedCodonCounts[codons.length - 1])
929    {
930    /*
931    * two or more codons share the modal count
932    */
933  50 modalCodon = "+";
934    }
935  1950 float pid = sortedCodonCounts[sortedCodonCounts.length - 1] * 100
936    / (float) totalCount;
937   
938    /*
939    * todo ? Replace consensus hashtable with sorted arrays of codons and
940    * counts (non-zero only). Include total count in count array [0].
941    */
942   
943    /*
944    * Scan sorted array backwards for most frequent values first. Show
945    * repeated values compactly.
946    */
947  1950 StringBuilder mouseOver = new StringBuilder(32);
948  1950 StringBuilder samePercent = new StringBuilder();
949  1950 String percent = null;
950  1950 String lastPercent = null;
951  1950 int percentDecPl = getPercentageDp(nseqs);
952   
953  3850 for (int j = codons.length - 1; j >= 0; j--)
954    {
955  3850 int codonCount = sortedCodonCounts[j];
956  3850 if (codonCount == 0)
957    {
958    /*
959    * remaining codons are 0% - ignore, but finish off the last one if
960    * necessary
961    */
962  1950 if (samePercent.length() > 0)
963    {
964  1900 mouseOver.append(samePercent).append(": ").append(percent)
965    .append("% ");
966    }
967  1950 break;
968    }
969  1900 int codonEncoded = codons[j];
970  1900 final int pct = codonCount * 100 / totalCount;
971  1900 String codon = String
972    .valueOf(CodingUtils.decodeCodon(codonEncoded));
973  1900 StringBuilder sb = new StringBuilder();
974  1900 Format.appendPercentage(sb, pct, percentDecPl);
975  1900 percent = sb.toString();
976  1900 if (showProfileLogo || codonCount == modalCodonCount)
977    {
978  1900 if (percent.equals(lastPercent) && j > 0)
979    {
980  0 samePercent.append(samePercent.length() == 0 ? "" : ", ");
981  0 samePercent.append(codon);
982    }
983    else
984    {
985  1900 if (samePercent.length() > 0)
986    {
987  0 mouseOver.append(samePercent).append(": ").append(lastPercent)
988    .append("% ");
989    }
990  1900 samePercent.setLength(0);
991  1900 samePercent.append(codon);
992    }
993  1900 lastPercent = percent;
994    }
995    }
996   
997  1950 consensusAnnotation.annotations[col] = new Annotation(modalCodon,
998    mouseOver.toString(), ' ', pid);
999    }
1000    }
1001   
1002    /**
1003    * Returns the number of decimal places to show for profile percentages. For
1004    * less than 100 sequences, returns zero (the integer percentage value will be
1005    * displayed). For 100-999 sequences, returns 1, for 1000-9999 returns 2, etc.
1006    *
1007    * @param nseq
1008    * @return
1009    */
 
1010  1118009 toggle protected static int getPercentageDp(long nseq)
1011    {
1012  1118030 int scale = 0;
1013  1118021 while (nseq >= 100)
1014    {
1015  0 scale++;
1016  0 nseq /= 10;
1017    }
1018  1118033 return scale;
1019    }
1020    }