Clover icon

Coverage Report

  1. Project Clover database Mon Sep 2 2024 17:57:51 BST
  2. Package jalview.analysis

File AAFrequency.java

 

Coverage histogram

../../img/srcFileCovDistChart9.png
13% of files have more coverage

Code metrics

146
308
18
1
956
605
109
0.35
17.11
18
6.06

Classes

Class Line # Actions
AAFrequency 57 308 109
0.8686440686.9%
 

Contributing tests

This file is covered by 195 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.analysis;
22   
23    import jalview.datamodel.AlignedCodonFrame;
24    import jalview.datamodel.AlignmentAnnotation;
25    import jalview.datamodel.AlignmentI;
26    import jalview.datamodel.Annotation;
27    import jalview.datamodel.Profile;
28    import jalview.datamodel.ProfileI;
29    import jalview.datamodel.Profiles;
30    import jalview.datamodel.ProfilesI;
31    import jalview.datamodel.ResidueCount;
32    import jalview.datamodel.ResidueCount.SymbolCounts;
33    import jalview.datamodel.SecondaryStructureCount;
34    import jalview.datamodel.SeqCigar;
35    import jalview.datamodel.SequenceI;
36    import jalview.ext.android.SparseIntArray;
37    import jalview.util.Comparison;
38    import jalview.util.Constants;
39    import jalview.util.Format;
40    import jalview.util.MappingUtils;
41    import jalview.util.QuickSort;
42   
43    import java.awt.Color;
44    import java.util.Arrays;
45    import java.util.Hashtable;
46    import java.util.List;
47   
48    /**
49    * Takes in a vector or array of sequences and column start and column end and
50    * returns a new Hashtable[] of size maxSeqLength, if Hashtable not supplied.
51    * This class is used extensively in calculating alignment colourschemes that
52    * depend on the amount of conservation in each alignment column.
53    *
54    * @author $author$
55    * @version $Revision$
56    */
 
57    public class AAFrequency
58    {
59    public static final String PROFILE = "P";
60   
61    /*
62    * Quick look-up of String value of char 'A' to 'Z'
63    */
64    private static final String[] CHARS = new String['Z' - 'A' + 1];
65   
 
66  50 toggle static
67    {
68  1350 for (char c = 'A'; c <= 'Z'; c++)
69    {
70  1300 CHARS[c - 'A'] = String.valueOf(c);
71    }
72    }
73   
 
74  3 toggle public static final ProfilesI calculate(List<SequenceI> list, int start,
75    int end)
76    {
77  3 return calculate(list, start, end, false);
78    }
79   
 
80  384 toggle public static final ProfilesI calculate(List<SequenceI> sequences,
81    int start, int end, boolean profile)
82    {
83  384 SequenceI[] seqs = new SequenceI[sequences.size()];
84  384 int width = 0;
85  384 synchronized (sequences)
86    {
87  3233 for (int i = 0; i < sequences.size(); i++)
88    {
89  2849 seqs[i] = sequences.get(i);
90  2849 int length = seqs[i].getLength();
91  2849 if (length > width)
92    {
93  383 width = length;
94    }
95    }
96   
97  384 if (end >= width)
98    {
99  213 end = width;
100    }
101   
102  384 ProfilesI reply = calculate(seqs, width, start, end, profile);
103  384 return reply;
104    }
105    }
106   
107    /**
108    * Calculate the consensus symbol(s) for each column in the given range.
109    *
110    * @param sequences
111    * @param width
112    * the full width of the alignment
113    * @param start
114    * start column (inclusive, base zero)
115    * @param end
116    * end column (exclusive)
117    * @param saveFullProfile
118    * if true, store all symbol counts
119    */
 
120  1346 toggle public static final ProfilesI calculate(final SequenceI[] sequences,
121    int width, int start, int end, boolean saveFullProfile)
122    {
123    // long now = System.currentTimeMillis();
124  1346 int seqCount = sequences.length;
125  1346 boolean nucleotide = false;
126  1346 int nucleotideCount = 0;
127  1346 int peptideCount = 0;
128   
129  1346 ProfileI[] result = new ProfileI[width];
130   
131  609993 for (int column = start; column < end; column++)
132    {
133    /*
134    * Apply a heuristic to detect nucleotide data (which can
135    * be counted in more compact arrays); here we test for
136    * more than 90% nucleotide; recheck every 10 columns in case
137    * of misleading data e.g. highly conserved Alanine in peptide!
138    * Mistakenly guessing nucleotide has a small performance cost,
139    * as it will result in counting in sparse arrays.
140    * Mistakenly guessing peptide has a small space cost,
141    * as it will use a larger than necessary array to hold counts.
142    */
143  608652 if (nucleotideCount > 100 && column % 10 == 0)
144    {
145  54710 nucleotide = (9 * peptideCount < nucleotideCount);
146    }
147  608657 ResidueCount residueCounts = new ResidueCount(nucleotide);
148   
149  11295475 for (int row = 0; row < seqCount; row++)
150    {
151  10692217 if (sequences[row] == null)
152    {
153  0 jalview.bin.Console.errPrintln(
154    "WARNING: Consensus skipping null sequence - possible race condition.");
155  0 continue;
156    }
157  10679543 if (sequences[row].getLength() > column)
158    {
159  10657494 char c = sequences[row].getCharAt(column);
160  10656238 residueCounts.add(c);
161  10644026 if (Comparison.isNucleotide(c))
162    {
163  959741 nucleotideCount++;
164    }
165  9703678 else if (!Comparison.isGap(c))
166    {
167  857178 peptideCount++;
168    }
169    }
170    else
171    {
172    /*
173    * count a gap if the sequence doesn't reach this column
174    */
175  30133 residueCounts.addGap();
176    }
177    }
178   
179  608643 int maxCount = residueCounts.getModalCount();
180  608634 String maxResidue = residueCounts.getResiduesForCount(maxCount);
181  608645 int gapCount = residueCounts.getGapCount();
182  608641 ProfileI profile = new Profile(seqCount, gapCount, maxCount,
183    maxResidue);
184   
185  608654 if (saveFullProfile)
186    {
187  590280 profile.setCounts(residueCounts);
188    }
189   
190  608627 result[column] = profile;
191    }
192  1346 return new Profiles(result);
193    // long elapsed = System.currentTimeMillis() - now;
194    // jalview.bin.Console.outPrintln(elapsed);
195    }
196   
 
197  0 toggle public static final ProfilesI calculateSS(List<SequenceI> list, int start,
198    int end)
199    {
200  0 return calculateSS(list, start, end, false);
201    }
202   
 
203  381 toggle public static final ProfilesI calculateSS(List<SequenceI> sequences,
204    int start, int end, boolean profile)
205    {
206  381 SequenceI[] seqs = new SequenceI[sequences.size()];
207  381 int width = 0;
208  381 synchronized (sequences)
209    {
210  3227 for (int i = 0; i < sequences.size(); i++)
211    {
212  2846 seqs[i] = sequences.get(i);
213  2846 int length = seqs[i].getLength();
214  2846 if (length > width)
215    {
216  380 width = length;
217    }
218    }
219   
220  381 if (end >= width)
221    {
222  213 end = width;
223    }
224   
225  381 ProfilesI reply = calculateSS(seqs, width, start, end, profile);
226  381 return reply;
227    }
228    }
229   
 
230  1351 toggle public static final ProfilesI calculateSS(final SequenceI[] sequences,
231    int width, int start, int end, boolean saveFullProfile)
232    {
233   
234  1351 int seqCount = sequences.length;
235   
236  1351 ProfileI[] result = new ProfileI[width];
237   
238  610901 for (int column = start; column < end; column++)
239    {
240   
241  609577 int ssCount = 0;
242   
243  609554 SecondaryStructureCount ssCounts = new SecondaryStructureCount();
244   
245  11263194 for (int row = 0; row < seqCount; row++)
246    {
247  10653177 if (sequences[row] == null)
248    {
249  0 jalview.bin.Console.errPrintln(
250    "WARNING: Consensus skipping null sequence - possible race condition.");
251  0 continue;
252    }
253   
254  10689405 char c = sequences[row].getCharAt(column);
255  10695802 AlignmentAnnotation aa = AlignmentUtils
256    .getDisplayedAlignmentAnnotation(sequences[row]);
257  10697656 if (aa != null)
258    {
259  31971 ssCount++;
260    }
261   
262  10546276 if (sequences[row].getLength() > column && !Comparison.isGap(c)
263    && aa != null)
264    {
265   
266  21452 int seqPosition = sequences[row].findPosition(column);
267   
268  21452 char ss = AlignmentUtils.findSSAnnotationForGivenSeqposition(aa,
269    seqPosition);
270  21452 if (ss == '*')
271    {
272  0 continue;
273    }
274  21452 ssCounts.add(ss);
275    }
276  10547910 else if (Comparison.isGap(c) && aa != null)
277    {
278  10519 ssCounts.addGap();
279    }
280    }
281   
282  609480 int maxSSCount = ssCounts.getModalCount();
283  609470 String maxSS = ssCounts.getSSForCount(maxSSCount);
284  609492 int gapCount = ssCounts.getGapCount();
285  609500 ProfileI profile = new Profile(maxSS, ssCount, gapCount, maxSSCount);
286   
287  609569 if (saveFullProfile)
288    {
289  591267 profile.setSSCounts(ssCounts);
290    }
291   
292  609548 result[column] = profile;
293    }
294  1351 return new Profiles(result);
295    }
296   
297    /**
298    * Make an estimate of the profile size we are going to compute i.e. how many
299    * different characters may be present in it. Overestimating has a cost of
300    * using more memory than necessary. Underestimating has a cost of needing to
301    * extend the SparseIntArray holding the profile counts.
302    *
303    * @param profileSizes
304    * counts of sizes of profiles so far encountered
305    * @return
306    */
 
307  0 toggle static int estimateProfileSize(SparseIntArray profileSizes)
308    {
309  0 if (profileSizes.size() == 0)
310    {
311  0 return 4;
312    }
313   
314    /*
315    * could do a statistical heuristic here e.g. 75%ile
316    * for now just return the largest value
317    */
318  0 return profileSizes.keyAt(profileSizes.size() - 1);
319    }
320   
321    /**
322    * Derive the consensus annotations to be added to the alignment for display.
323    * This does not recompute the raw data, but may be called on a change in
324    * display options, such as 'ignore gaps', which may in turn result in a
325    * change in the derived values.
326    *
327    * @param consensus
328    * the annotation row to add annotations to
329    * @param profiles
330    * the source consensus data
331    * @param startCol
332    * start column (inclusive)
333    * @param endCol
334    * end column (exclusive)
335    * @param ignoreGaps
336    * if true, normalise residue percentages ignoring gaps
337    * @param showSequenceLogo
338    * if true include all consensus symbols, else just show modal
339    * residue
340    * @param nseq
341    * number of sequences
342    */
 
343  1094 toggle public static void completeConsensus(AlignmentAnnotation consensus,
344    ProfilesI profiles, int startCol, int endCol, boolean ignoreGaps,
345    boolean showSequenceLogo, long nseq)
346    {
347    // long now = System.currentTimeMillis();
348  1094 if (consensus == null || consensus.annotations == null
349    || consensus.annotations.length < endCol)
350    {
351    /*
352    * called with a bad alignment annotation row
353    * wait for it to be initialised properly
354    */
355  0 return;
356    }
357   
358  590623 for (int i = startCol; i < endCol; i++)
359    {
360  589560 ProfileI profile = profiles.get(i);
361  589561 if (profile == null)
362    {
363    /*
364    * happens if sequences calculated over were
365    * shorter than alignment width
366    */
367  0 consensus.annotations[i] = null;
368  0 return;
369    }
370   
371  589548 final int dp = getPercentageDp(nseq);
372   
373  589511 float value = profile.getPercentageIdentity(ignoreGaps);
374   
375  589537 String description = getTooltip(profile, value, showSequenceLogo,
376    ignoreGaps, dp);
377   
378  589494 String modalResidue = profile.getModalResidue();
379  589515 if ("".equals(modalResidue))
380    {
381  6398 modalResidue = "-";
382    }
383  582998 else if (modalResidue.length() > 1)
384    {
385  7733 modalResidue = "+";
386    }
387  589478 consensus.annotations[i] = new Annotation(modalResidue, description,
388    ' ', value);
389    }
390    // long elapsed = System.currentTimeMillis() - now;
391    // jalview.bin.Console.outPrintln(-elapsed);
392    }
393   
 
394  1065 toggle public static void completeSSConsensus(AlignmentAnnotation ssConsensus,
395    ProfilesI profiles, int startCol, int endCol, boolean ignoreGaps,
396    boolean showSequenceLogo, long nseq)
397    {
398    // long now = System.currentTimeMillis();
399  1065 if (ssConsensus == null || ssConsensus.annotations == null
400    || ssConsensus.annotations.length < endCol)
401    {
402    /*
403    * called with a bad alignment annotation row
404    * wait for it to be initialised properly
405    */
406  0 return;
407    }
408   
409  582256 for (int i = startCol; i < endCol; i++)
410    {
411  584359 ProfileI profile = profiles.get(i);
412  584163 if (profile == null)
413    {
414    /*
415    * happens if sequences calculated over were
416    * shorter than alignment width
417    */
418  0 ssConsensus.annotations[i] = null;
419  0 return;
420    }
421   
422  584140 final int dp = getPercentageDp(nseq);
423   
424  584118 float value = profile.getSSPercentageIdentity(ignoreGaps);
425   
426  584088 String description = getSSTooltip(profile, value, showSequenceLogo,
427    ignoreGaps, dp);
428   
429  584268 String modalSS = profile.getModalSS();
430  584116 if ("".equals(modalSS))
431    {
432  575074 modalSS = "-";
433    }
434  9039 else if (modalSS.length() > 1)
435    {
436  622 modalSS = "+";
437    }
438  584083 ssConsensus.annotations[i] = new Annotation(modalSS, description, ' ',
439    value);
440    }
441    // long elapsed = System.currentTimeMillis() - now;
442    // jalview.bin.Console.outPrintln(-elapsed);
443    }
444   
445    /**
446    * Derive the gap count annotation row.
447    *
448    * @param gaprow
449    * the annotation row to add annotations to
450    * @param profiles
451    * the source consensus data
452    * @param startCol
453    * start column (inclusive)
454    * @param endCol
455    * end column (exclusive)
456    */
 
457  1928 toggle public static void completeGapAnnot(AlignmentAnnotation gaprow,
458    ProfilesI profiles, int startCol, int endCol, long nseq)
459    {
460  1928 if (gaprow == null || gaprow.annotations == null
461    || gaprow.annotations.length < endCol)
462    {
463    /*
464    * called with a bad alignment annotation row
465    * wait for it to be initialised properly
466    */
467  0 return;
468    }
469    // always set ranges again
470  1928 gaprow.graphMax = nseq;
471  1928 gaprow.graphMin = 0;
472  1928 double scale = 0.8 / nseq;
473  1139991 for (int i = startCol; i < endCol; i++)
474    {
475  1138063 ProfileI profile = profiles.get(i);
476  1138044 if (profile == null)
477    {
478    /*
479    * happens if sequences calculated over were
480    * shorter than alignment width
481    */
482  0 gaprow.annotations[i] = null;
483  0 return;
484    }
485   
486  1138056 final int gapped = profile.getNonGapped();
487   
488  1138040 String description = "" + gapped;
489   
490  1138057 gaprow.annotations[i] = new Annotation("", description, '\0', gapped,
491    jalview.util.ColorUtils.bleachColour(Color.DARK_GRAY,
492    (float) scale * gapped));
493    }
494    }
495   
496    /**
497    * Returns a tooltip showing either
498    * <ul>
499    * <li>the full profile (percentages of all residues present), if
500    * showSequenceLogo is true, or</li>
501    * <li>just the modal (most common) residue(s), if showSequenceLogo is
502    * false</li>
503    * </ul>
504    * Percentages are as a fraction of all sequence, or only ungapped sequences
505    * if ignoreGaps is true.
506    *
507    * @param profile
508    * @param pid
509    * @param showSequenceLogo
510    * @param ignoreGaps
511    * @param dp
512    * the number of decimal places to format percentages to
513    * @return
514    */
 
515  589529 toggle static String getTooltip(ProfileI profile, float pid,
516    boolean showSequenceLogo, boolean ignoreGaps, int dp)
517    {
518  589527 ResidueCount counts = profile.getCounts();
519   
520  589491 String description = null;
521  589511 if (counts != null && showSequenceLogo)
522    {
523  61188 int normaliseBy = ignoreGaps ? profile.getNonGapped()
524    : profile.getHeight();
525  61188 description = counts.getTooltip(normaliseBy, dp);
526    }
527    else
528    {
529  528331 StringBuilder sb = new StringBuilder(64);
530  528324 String maxRes = profile.getModalResidue();
531  528284 if (maxRes.length() > 1)
532    {
533  2886 sb.append("[").append(maxRes).append("]");
534    }
535    else
536    {
537  525409 sb.append(maxRes);
538    }
539  528263 if (maxRes.length() > 0)
540    {
541  525009 sb.append(" ");
542  525012 Format.appendPercentage(sb, pid, dp);
543  525010 sb.append("%");
544    }
545  528266 description = sb.toString();
546    }
547  589496 return description;
548    }
549   
 
550  584061 toggle static String getSSTooltip(ProfileI profile, float pid,
551    boolean showSequenceLogo, boolean ignoreGaps, int dp)
552    {
553  584071 SecondaryStructureCount counts = profile.getSSCounts();
554   
555  584019 String description = null;
556  584080 if (counts != null && showSequenceLogo)
557    {
558  57351 int normaliseBy = ignoreGaps ? profile.getNonGapped()
559    : profile.getHeight();
560  57351 description = counts.getTooltip(normaliseBy, dp);
561    }
562    else
563    {
564  526620 StringBuilder sb = new StringBuilder(64);
565  526463 String maxSS = profile.getModalSS();
566  526690 if (maxSS.length() > 1)
567    {
568  352 sb.append("[").append(maxSS).append("]");
569    }
570    else
571    {
572  526349 sb.append(maxSS);
573    }
574  526764 if (maxSS.length() > 0)
575    {
576  5905 sb.append(" ");
577  5905 Format.appendPercentage(sb, pid, dp);
578  5905 sb.append("%");
579    }
580  526776 description = sb.toString();
581    }
582  584272 return description;
583    }
584   
585    /**
586    * Returns the sorted profile for the given consensus data. The returned array
587    * contains
588    *
589    * <pre>
590    * [profileType, numberOfValues, totalPercent, charValue1, percentage1, charValue2, percentage2, ...]
591    * in descending order of percentage value
592    * </pre>
593    *
594    * @param profile
595    * the data object from which to extract and sort values
596    * @param ignoreGaps
597    * if true, only non-gapped values are included in percentage
598    * calculations
599    * @return
600    */
 
601  91354 toggle public static int[] extractProfile(ProfileI profile, boolean ignoreGaps)
602    {
603  91354 char[] symbols;
604  91354 int[] values;
605   
606  91355 if (profile.getCounts() != null)
607    {
608  91355 ResidueCount counts = profile.getCounts();
609  91355 SymbolCounts symbolCounts = counts.getSymbolCounts();
610  91355 symbols = symbolCounts.symbols;
611  91355 values = symbolCounts.values;
612   
613    }
614  0 else if (profile.getSSCounts() != null)
615    {
616  0 SecondaryStructureCount counts = profile.getSSCounts();
617    // to do
618  0 SecondaryStructureCount.SymbolCounts symbolCounts = counts
619    .getSymbolCounts();
620  0 symbols = symbolCounts.symbols;
621  0 values = symbolCounts.values;
622    }
623    else
624    {
625  0 return null;
626    }
627   
628  91355 QuickSort.sort(values, symbols);
629  91355 int totalPercentage = 0;
630  91355 final int divisor = ignoreGaps ? profile.getNonGapped()
631    : profile.getHeight();
632   
633    /*
634    * traverse the arrays in reverse order (highest counts first)
635    */
636  91355 int[] result = new int[3 + 2 * symbols.length];
637  91354 int nextArrayPos = 3;
638  91354 int nonZeroCount = 0;
639   
640  265704 for (int i = symbols.length - 1; i >= 0; i--)
641    {
642  174351 int theChar = symbols[i];
643  174351 int charCount = values[i];
644  174350 final int percentage = (charCount * 100) / divisor;
645  174350 if (percentage == 0)
646    {
647    /*
648    * this count (and any remaining) round down to 0% - discard
649    */
650  2 break;
651    }
652  174349 nonZeroCount++;
653  174348 result[nextArrayPos++] = theChar;
654  174349 result[nextArrayPos++] = percentage;
655  174349 totalPercentage += percentage;
656    }
657   
658    /*
659    * truncate array if any zero values were discarded
660    */
661  91355 if (nonZeroCount < symbols.length)
662    {
663  2 int[] tmp = new int[3 + 2 * nonZeroCount];
664  2 System.arraycopy(result, 0, tmp, 0, tmp.length);
665  2 result = tmp;
666    }
667   
668    /*
669    * fill in 'header' values
670    */
671  91355 result[0] = AlignmentAnnotation.SEQUENCE_PROFILE;
672  91355 result[1] = nonZeroCount;
673  91355 result[2] = totalPercentage;
674   
675  91355 return result;
676    }
677   
678    /**
679    * Extract a sorted extract of cDNA codon profile data. The returned array
680    * contains
681    *
682    * <pre>
683    * [profileType, numberOfValues, totalPercentage, charValue1, percentage1, charValue2, percentage2, ...]
684    * in descending order of percentage value, where the character values encode codon triplets
685    * </pre>
686    *
687    * @param hashtable
688    * @return
689    */
 
690  2 toggle public static int[] extractCdnaProfile(
691    Hashtable<String, Object> hashtable, boolean ignoreGaps)
692    {
693    // this holds #seqs, #ungapped, and then codon count, indexed by encoded
694    // codon triplet
695  2 int[] codonCounts = (int[]) hashtable.get(PROFILE);
696  2 int[] sortedCounts = new int[codonCounts.length - 2];
697  2 System.arraycopy(codonCounts, 2, sortedCounts, 0,
698    codonCounts.length - 2);
699   
700  2 int[] result = new int[3 + 2 * sortedCounts.length];
701    // first value is just the type of profile data
702  2 result[0] = AlignmentAnnotation.CDNA_PROFILE;
703   
704  2 char[] codons = new char[sortedCounts.length];
705  130 for (int i = 0; i < codons.length; i++)
706    {
707  128 codons[i] = (char) i;
708    }
709  2 QuickSort.sort(sortedCounts, codons);
710  2 int totalPercentage = 0;
711  2 int distinctValuesCount = 0;
712  2 int j = 3;
713  2 int divisor = ignoreGaps ? codonCounts[1] : codonCounts[0];
714  8 for (int i = codons.length - 1; i >= 0; i--)
715    {
716  8 final int codonCount = sortedCounts[i];
717  8 if (codonCount == 0)
718    {
719  0 break; // nothing else of interest here
720    }
721  8 final int percentage = codonCount * 100 / divisor;
722  8 if (percentage == 0)
723    {
724    /*
725    * this (and any remaining) values rounded down to 0 - discard
726    */
727  2 break;
728    }
729  6 distinctValuesCount++;
730  6 result[j++] = codons[i];
731  6 result[j++] = percentage;
732  6 totalPercentage += percentage;
733    }
734  2 result[2] = totalPercentage;
735   
736    /*
737    * Just return the non-zero values
738    */
739    // todo next value is redundant if we limit the array to non-zero counts
740  2 result[1] = distinctValuesCount;
741  2 return Arrays.copyOfRange(result, 0, j);
742    }
743   
744    /**
745    * Compute a consensus for the cDNA coding for a protein alignment.
746    *
747    * @param alignment
748    * the protein alignment (which should hold mappings to cDNA
749    * sequences)
750    * @param hconsensus
751    * the consensus data stores to be populated (one per column)
752    */
 
753  4 toggle public static void calculateCdna(AlignmentI alignment,
754    Hashtable<String, Object>[] hconsensus)
755    {
756  4 final char gapCharacter = alignment.getGapCharacter();
757  4 List<AlignedCodonFrame> mappings = alignment.getCodonFrames();
758  4 if (mappings == null || mappings.isEmpty())
759    {
760  0 return;
761    }
762   
763  4 int cols = alignment.getWidth();
764  1928 for (int col = 0; col < cols; col++)
765    {
766    // todo would prefer a Java bean for consensus data
767  1924 Hashtable<String, Object> columnHash = new Hashtable<>();
768    // #seqs, #ungapped seqs, counts indexed by (codon encoded + 1)
769  1924 int[] codonCounts = new int[66];
770  1924 codonCounts[0] = alignment.getSequences().size();
771  1924 int ungappedCount = 0;
772  1924 for (SequenceI seq : alignment.getSequences())
773    {
774  20870 if (seq.getCharAt(col) == gapCharacter)
775    {
776  10166 continue;
777    }
778  10704 List<char[]> codons = MappingUtils.findCodonsFor(seq, col,
779    mappings);
780  10704 for (char[] codon : codons)
781    {
782  10657 int codonEncoded = CodingUtils.encodeCodon(codon);
783  10657 if (codonEncoded >= 0)
784    {
785  10657 codonCounts[codonEncoded + 2]++;
786  10657 ungappedCount++;
787  10657 break;
788    }
789    }
790    }
791  1924 codonCounts[1] = ungappedCount;
792    // todo: sort values here, save counts and codons?
793  1924 columnHash.put(PROFILE, codonCounts);
794  1924 hconsensus[col] = columnHash;
795    }
796    }
797   
798    /**
799    * Derive displayable cDNA consensus annotation from computed consensus data.
800    *
801    * @param consensusAnnotation
802    * the annotation row to be populated for display
803    * @param consensusData
804    * the computed consensus data
805    * @param showProfileLogo
806    * if true show all symbols present at each position, else only the
807    * modal value
808    * @param nseqs
809    * the number of sequences in the alignment
810    */
 
811  3 toggle public static void completeCdnaConsensus(
812    AlignmentAnnotation consensusAnnotation,
813    Hashtable<String, Object>[] consensusData,
814    boolean showProfileLogo, int nseqs)
815    {
816  3 if (consensusAnnotation == null
817    || consensusAnnotation.annotations == null
818    || consensusAnnotation.annotations.length < consensusData.length)
819    {
820    // called with a bad alignment annotation row - wait for it to be
821    // initialised properly
822  0 return;
823    }
824   
825    // ensure codon triplet scales with font size
826  3 consensusAnnotation.scaleColLabel = true;
827  981 for (int col = 0; col < consensusData.length; col++)
828    {
829  978 Hashtable<String, Object> hci = consensusData[col];
830  978 if (hci == null)
831    {
832    // gapped protein column?
833  0 continue;
834    }
835    // array holds #seqs, #ungapped, then codon counts indexed by codon
836  978 final int[] codonCounts = (int[]) hci.get(PROFILE);
837  978 int totalCount = 0;
838   
839    /*
840    * First pass - get total count and find the highest
841    */
842  978 final char[] codons = new char[codonCounts.length - 2];
843  63570 for (int j = 2; j < codonCounts.length; j++)
844    {
845  62592 final int codonCount = codonCounts[j];
846  62592 codons[j - 2] = (char) (j - 2);
847  62592 totalCount += codonCount;
848    }
849   
850    /*
851    * Sort array of encoded codons by count ascending - so the modal value
852    * goes to the end; start by copying the count (dropping the first value)
853    */
854  978 int[] sortedCodonCounts = new int[codonCounts.length - 2];
855  978 System.arraycopy(codonCounts, 2, sortedCodonCounts, 0,
856    codonCounts.length - 2);
857  978 QuickSort.sort(sortedCodonCounts, codons);
858   
859  978 int modalCodonEncoded = codons[codons.length - 1];
860  978 int modalCodonCount = sortedCodonCounts[codons.length - 1];
861  978 String modalCodon = String
862    .valueOf(CodingUtils.decodeCodon(modalCodonEncoded));
863  978 if (sortedCodonCounts.length > 1 && sortedCodonCounts[codons.length
864    - 2] == sortedCodonCounts[codons.length - 1])
865    {
866    /*
867    * two or more codons share the modal count
868    */
869  25 modalCodon = "+";
870    }
871  978 float pid = sortedCodonCounts[sortedCodonCounts.length - 1] * 100
872    / (float) totalCount;
873   
874    /*
875    * todo ? Replace consensus hashtable with sorted arrays of codons and
876    * counts (non-zero only). Include total count in count array [0].
877    */
878   
879    /*
880    * Scan sorted array backwards for most frequent values first. Show
881    * repeated values compactly.
882    */
883  978 StringBuilder mouseOver = new StringBuilder(32);
884  978 StringBuilder samePercent = new StringBuilder();
885  978 String percent = null;
886  978 String lastPercent = null;
887  978 int percentDecPl = getPercentageDp(nseqs);
888   
889  1931 for (int j = codons.length - 1; j >= 0; j--)
890    {
891  1931 int codonCount = sortedCodonCounts[j];
892  1931 if (codonCount == 0)
893    {
894    /*
895    * remaining codons are 0% - ignore, but finish off the last one if
896    * necessary
897    */
898  978 if (samePercent.length() > 0)
899    {
900  953 mouseOver.append(samePercent).append(": ").append(percent)
901    .append("% ");
902    }
903  978 break;
904    }
905  953 int codonEncoded = codons[j];
906  953 final int pct = codonCount * 100 / totalCount;
907  953 String codon = String
908    .valueOf(CodingUtils.decodeCodon(codonEncoded));
909  953 StringBuilder sb = new StringBuilder();
910  953 Format.appendPercentage(sb, pct, percentDecPl);
911  953 percent = sb.toString();
912  953 if (showProfileLogo || codonCount == modalCodonCount)
913    {
914  953 if (percent.equals(lastPercent) && j > 0)
915    {
916  0 samePercent.append(samePercent.length() == 0 ? "" : ", ");
917  0 samePercent.append(codon);
918    }
919    else
920    {
921  953 if (samePercent.length() > 0)
922    {
923  0 mouseOver.append(samePercent).append(": ").append(lastPercent)
924    .append("% ");
925    }
926  953 samePercent.setLength(0);
927  953 samePercent.append(codon);
928    }
929  953 lastPercent = percent;
930    }
931    }
932   
933  978 consensusAnnotation.annotations[col] = new Annotation(modalCodon,
934    mouseOver.toString(), ' ', pid);
935    }
936    }
937   
938    /**
939    * Returns the number of decimal places to show for profile percentages. For
940    * less than 100 sequences, returns zero (the integer percentage value will be
941    * displayed). For 100-999 sequences, returns 1, for 1000-9999 returns 2, etc.
942    *
943    * @param nseq
944    * @return
945    */
 
946  1174524 toggle protected static int getPercentageDp(long nseq)
947    {
948  1174607 int scale = 0;
949  1174625 while (nseq >= 100)
950    {
951  0 scale++;
952  0 nseq /= 10;
953    }
954  1174651 return scale;
955    }
956    }