Clover icon

Coverage Report

  1. Project Clover database Mon Nov 11 2024 20:42:03 GMT
  2. Package jalview.analysis

File AAFrequency.java

 

Coverage histogram

../../img/srcFileCovDistChart9.png
12% of files have more coverage

Code metrics

152
316
18
1
975
618
112
0.35
17.56
18
6.22

Classes

Class Line # Actions
AAFrequency 55 316 112
0.8662551686.6%
 

Contributing tests

This file is covered by 197 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.analysis;
22   
23    import jalview.datamodel.AlignedCodonFrame;
24    import jalview.datamodel.AlignmentAnnotation;
25    import jalview.datamodel.AlignmentI;
26    import jalview.datamodel.Annotation;
27    import jalview.datamodel.Profile;
28    import jalview.datamodel.ProfileI;
29    import jalview.datamodel.Profiles;
30    import jalview.datamodel.ProfilesI;
31    import jalview.datamodel.ResidueCount;
32    import jalview.datamodel.ResidueCount.SymbolCounts;
33    import jalview.datamodel.SecondaryStructureCount;
34    import jalview.datamodel.SequenceI;
35    import jalview.ext.android.SparseIntArray;
36    import jalview.util.Comparison;
37    import jalview.util.Format;
38    import jalview.util.MappingUtils;
39    import jalview.util.QuickSort;
40   
41    import java.awt.Color;
42    import java.util.Arrays;
43    import java.util.Hashtable;
44    import java.util.List;
45   
46    /**
47    * Takes in a vector or array of sequences and column start and column end and
48    * returns a new Hashtable[] of size maxSeqLength, if Hashtable not supplied.
49    * This class is used extensively in calculating alignment colourschemes that
50    * depend on the amount of conservation in each alignment column.
51    *
52    * @author $author$
53    * @version $Revision$
54    */
 
55    public class AAFrequency
56    {
57    public static final String PROFILE = "P";
58   
59    /*
60    * Quick look-up of String value of char 'A' to 'Z'
61    */
62    private static final String[] CHARS = new String['Z' - 'A' + 1];
63   
 
64  50 toggle static
65    {
66  1350 for (char c = 'A'; c <= 'Z'; c++)
67    {
68  1300 CHARS[c - 'A'] = String.valueOf(c);
69    }
70    }
71   
 
72  3 toggle public static final ProfilesI calculate(List<SequenceI> list, int start,
73    int end)
74    {
75  3 return calculate(list, start, end, false);
76    }
77   
 
78  384 toggle public static final ProfilesI calculate(List<SequenceI> sequences,
79    int start, int end, boolean profile)
80    {
81  384 SequenceI[] seqs = new SequenceI[sequences.size()];
82  384 int width = 0;
83  384 synchronized (sequences)
84    {
85  3233 for (int i = 0; i < sequences.size(); i++)
86    {
87  2849 seqs[i] = sequences.get(i);
88  2849 int length = seqs[i].getLength();
89  2849 if (length > width)
90    {
91  383 width = length;
92    }
93    }
94   
95  384 if (end >= width)
96    {
97  213 end = width;
98    }
99   
100  384 ProfilesI reply = calculate(seqs, width, start, end, profile);
101  384 return reply;
102    }
103    }
104   
105    /**
106    * Calculate the consensus symbol(s) for each column in the given range.
107    *
108    * @param sequences
109    * @param width
110    * the full width of the alignment
111    * @param start
112    * start column (inclusive, base zero)
113    * @param end
114    * end column (exclusive)
115    * @param saveFullProfile
116    * if true, store all symbol counts
117    */
 
118  1348 toggle public static final ProfilesI calculate(final SequenceI[] sequences,
119    int width, int start, int end, boolean saveFullProfile)
120    {
121    // long now = System.currentTimeMillis();
122  1348 int seqCount = sequences.length;
123  1348 boolean nucleotide = false;
124  1348 int nucleotideCount = 0;
125  1348 int peptideCount = 0;
126   
127  1348 ProfileI[] result = new ProfileI[width];
128   
129  611831 for (int column = start; column < end; column++)
130    {
131    /*
132    * Apply a heuristic to detect nucleotide data (which can
133    * be counted in more compact arrays); here we test for
134    * more than 90% nucleotide; recheck every 10 columns in case
135    * of misleading data e.g. highly conserved Alanine in peptide!
136    * Mistakenly guessing nucleotide has a small performance cost,
137    * as it will result in counting in sparse arrays.
138    * Mistakenly guessing peptide has a small space cost,
139    * as it will use a larger than necessary array to hold counts.
140    */
141  610488 if (nucleotideCount > 100 && column % 10 == 0)
142    {
143  54923 nucleotide = (9 * peptideCount < nucleotideCount);
144    }
145  610480 ResidueCount residueCounts = new ResidueCount(nucleotide);
146   
147  11300675 for (int row = 0; row < seqCount; row++)
148    {
149  10724643 if (sequences[row] == null)
150    {
151  0 jalview.bin.Console.errPrintln(
152    "WARNING: Consensus skipping null sequence - possible race condition.");
153  0 continue;
154    }
155  10716594 if (sequences[row].getLength() > column)
156    {
157  10690533 char c = sequences[row].getCharAt(column);
158  10691338 residueCounts.add(c);
159  10705756 if (Comparison.isNucleotide(c))
160    {
161  967915 nucleotideCount++;
162    }
163  9732442 else if (!Comparison.isGap(c))
164    {
165  877833 peptideCount++;
166    }
167    }
168    else
169    {
170    /*
171    * count a gap if the sequence doesn't reach this column
172    */
173  30127 residueCounts.addGap();
174    }
175    }
176   
177  610017 int maxCount = residueCounts.getModalCount();
178  610023 String maxResidue = residueCounts.getResiduesForCount(maxCount);
179  610345 int gapCount = residueCounts.getGapCount();
180  610323 ProfileI profile = new Profile(seqCount, gapCount, maxCount,
181    maxResidue);
182   
183  610431 if (saveFullProfile)
184    {
185  592121 profile.setCounts(residueCounts);
186    }
187   
188  610470 result[column] = profile;
189    }
190  1348 return new Profiles(result);
191    // long elapsed = System.currentTimeMillis() - now;
192    // jalview.bin.Console.outPrintln(elapsed);
193    }
194   
 
195  0 toggle public static final ProfilesI calculateSS(List<SequenceI> list, int start,
196    int end, String source)
197    {
198  0 return calculateSS(list, start, end, false, source);
199    }
200   
 
201  381 toggle public static final ProfilesI calculateSS(List<SequenceI> sequences,
202    int start, int end, boolean profile, String source)
203    {
204  381 SequenceI[] seqs = new SequenceI[sequences.size()];
205  381 int width = 0;
206  381 synchronized (sequences)
207    {
208  3227 for (int i = 0; i < sequences.size(); i++)
209    {
210  2846 seqs[i] = sequences.get(i);
211  2846 int length = seqs[i].getLength();
212  2846 if (length > width)
213    {
214  380 width = length;
215    }
216    }
217   
218  381 if (end >= width)
219    {
220  213 end = width;
221    }
222   
223  381 ProfilesI reply = calculateSS(seqs, width, start, end, profile,
224    source);
225  381 return reply;
226    }
227    }
228   
 
229  1384 toggle public static final ProfilesI calculateSS(final SequenceI[] sequences,
230    int width, int start, int end, boolean saveFullProfile,
231    String source)
232    {
233   
234  1384 int seqCount = sequences.length;
235   
236  1384 int seqWithSSCount = 0;
237   
238  1384 ProfileI[] result = new ProfileI[width];
239   
240  624390 for (int column = start; column < end; column++)
241    {
242   
243  623045 int ssCount = 0;
244   
245  623037 SecondaryStructureCount ssCounts = new SecondaryStructureCount();
246   
247  11421995 for (int row = 0; row < seqCount; row++)
248    {
249  10841726 if (sequences[row] == null)
250    {
251  0 jalview.bin.Console.errPrintln(
252    "WARNING: Consensus skipping null sequence - possible race condition.");
253  0 continue;
254    }
255   
256  10832732 char c = sequences[row].getCharAt(column);
257   
258  10826298 List<AlignmentAnnotation> annots = AlignmentUtils.getAlignmentAnnotationForSource(sequences[row], source);
259  10842092 if(annots!=null) {
260  99295 seqWithSSCount++;
261  99299 for (AlignmentAnnotation aa : annots)
262    {
263  108061 if (aa != null)
264    {
265  108063 ssCount++;
266    }
267   
268  108062 if (sequences[row].getLength() > column && !Comparison.isGap(c)
269    && aa != null)
270    {
271   
272  70406 int seqPosition = sequences[row].findPosition(column);
273   
274  70406 char ss = AlignmentUtils
275    .findSSAnnotationForGivenSeqposition(aa, seqPosition);
276  70404 if (ss == '*')
277    {
278  0 continue;
279    }
280  70406 ssCounts.add(ss);
281    }
282  37658 else if (Comparison.isGap(c) && aa != null)
283    {
284  37658 ssCounts.addGap();
285    }
286    }
287    }
288    }
289   
290  622997 int maxSSCount = ssCounts.getModalCount();
291  622880 String maxSS = ssCounts.getSSForCount(maxSSCount);
292  622923 int gapCount = ssCounts.getGapCount();
293  622960 ProfileI profile = new Profile(maxSS, ssCount, gapCount, maxSSCount,
294    seqWithSSCount);
295   
296  622994 if (saveFullProfile)
297    {
298  604693 profile.setSSCounts(ssCounts);
299    }
300   
301  622914 result[column] = profile;
302    }
303  1384 return new Profiles(result);
304    }
305   
306    /**
307    * Make an estimate of the profile size we are going to compute i.e. how many
308    * different characters may be present in it. Overestimating has a cost of
309    * using more memory than necessary. Underestimating has a cost of needing to
310    * extend the SparseIntArray holding the profile counts.
311    *
312    * @param profileSizes
313    * counts of sizes of profiles so far encountered
314    * @return
315    */
 
316  0 toggle static int estimateProfileSize(SparseIntArray profileSizes)
317    {
318  0 if (profileSizes.size() == 0)
319    {
320  0 return 4;
321    }
322   
323    /*
324    * could do a statistical heuristic here e.g. 75%ile
325    * for now just return the largest value
326    */
327  0 return profileSizes.keyAt(profileSizes.size() - 1);
328    }
329   
330    /**
331    * Derive the consensus annotations to be added to the alignment for display.
332    * This does not recompute the raw data, but may be called on a change in
333    * display options, such as 'ignore gaps', which may in turn result in a
334    * change in the derived values.
335    *
336    * @param consensus
337    * the annotation row to add annotations to
338    * @param profiles
339    * the source consensus data
340    * @param startCol
341    * start column (inclusive)
342    * @param endCol
343    * end column (exclusive)
344    * @param ignoreGaps
345    * if true, normalise residue percentages ignoring gaps
346    * @param showSequenceLogo
347    * if true include all consensus symbols, else just show modal
348    * residue
349    * @param nseq
350    * number of sequences
351    */
 
352  1093 toggle public static void completeConsensus(AlignmentAnnotation consensus,
353    ProfilesI profiles, int startCol, int endCol, boolean ignoreGaps,
354    boolean showSequenceLogo, long nseq)
355    {
356    // long now = System.currentTimeMillis();
357  1093 if (consensus == null || consensus.annotations == null
358    || consensus.annotations.length < endCol)
359    {
360    /*
361    * called with a bad alignment annotation row
362    * wait for it to be initialised properly
363    */
364  0 return;
365    }
366   
367  181162 for (int i = startCol; i < endCol; i++)
368    {
369  180069 ProfileI profile = profiles.get(i);
370  180069 if (profile == null)
371    {
372    /*
373    * happens if sequences calculated over were
374    * shorter than alignment width
375    */
376  0 consensus.annotations[i] = null;
377  0 return;
378    }
379   
380  180069 final int dp = getPercentageDp(nseq);
381   
382  180069 float value = profile.getPercentageIdentity(ignoreGaps);
383   
384  180069 String description = getTooltip(profile, value, showSequenceLogo,
385    ignoreGaps, dp);
386   
387  180069 String modalResidue = profile.getModalResidue();
388  180069 if ("".equals(modalResidue))
389    {
390  6718 modalResidue = "-";
391    }
392  173351 else if (modalResidue.length() > 1)
393    {
394  7839 modalResidue = "+";
395    }
396  180069 consensus.annotations[i] = new Annotation(modalResidue, description,
397    ' ', value);
398    }
399    // long elapsed = System.currentTimeMillis() - now;
400    // jalview.bin.Console.outPrintln(-elapsed);
401    }
402   
 
403  924 toggle public static void completeSSConsensus(AlignmentAnnotation ssConsensus,
404    ProfilesI profiles, int startCol, int endCol, boolean ignoreGaps,
405    boolean showSequenceLogo, long nseq)
406    {
407    // long now = System.currentTimeMillis();
408  924 if (ssConsensus == null || ssConsensus.annotations == null
409    || ssConsensus.annotations.length < endCol)
410    {
411    /*
412    * called with a bad alignment annotation row
413    * wait for it to be initialised properly
414    */
415  3 return;
416    }
417   
418  524062 for (int i = startCol; i < endCol; i++)
419    {
420  523141 ProfileI profile = profiles.get(i);
421  523141 if (profile == null)
422    {
423    /*
424    * happens if sequences calculated over were
425    * shorter than alignment width
426    */
427  0 ssConsensus.annotations[i] = null;
428  0 return;
429    }
430   
431  523141 if (ssConsensus.getNoOfSequencesIncluded() < 0)
432    {
433  0 ssConsensus.setNoOfSequencesIncluded(profile.getSeqWithSSCount());
434    }
435   
436  523141 final int dp = getPercentageDp(nseq);
437   
438  523141 float value = profile.getSSPercentageIdentity(ignoreGaps);
439   
440  523141 String description = getSSTooltip(profile, value, showSequenceLogo,
441    ignoreGaps, dp);
442   
443  523141 String modalSS = profile.getModalSS();
444  523141 if ("".equals(modalSS))
445    {
446  505657 modalSS = "-";
447    }
448  17484 else if (modalSS.length() > 1)
449    {
450  1082 modalSS = "+";
451    }
452  523141 ssConsensus.annotations[i] = new Annotation(modalSS, description,
453    ' ', value);
454    }
455   
456    //Hide consensus with no data to display
457  921 if(ssConsensus.getNoOfSequencesIncluded()<1)
458  844 ssConsensus.visible = false;
459   
460    // long elapsed = System.currentTimeMillis() - now;
461    // jalview.bin.Console.outPrintln(-elapsed);
462    }
463   
464    /**
465    * Derive the gap count annotation row.
466    *
467    * @param gaprow
468    * the annotation row to add annotations to
469    * @param profiles
470    * the source consensus data
471    * @param startCol
472    * start column (inclusive)
473    * @param endCol
474    * end column (exclusive)
475    */
 
476  1881 toggle public static void completeGapAnnot(AlignmentAnnotation gaprow,
477    ProfilesI profiles, int startCol, int endCol, long nseq)
478    {
479  1881 if (gaprow == null || gaprow.annotations == null
480    || gaprow.annotations.length < endCol)
481    {
482    /*
483    * called with a bad alignment annotation row
484    * wait for it to be initialised properly
485    */
486  0 return;
487    }
488    // always set ranges again
489  1881 gaprow.graphMax = nseq;
490  1881 gaprow.graphMin = 0;
491  1881 double scale = 0.8 / nseq;
492  684924 for (int i = startCol; i < endCol; i++)
493    {
494  683060 ProfileI profile = profiles.get(i);
495  683058 if (profile == null)
496    {
497    /*
498    * happens if sequences calculated over were
499    * shorter than alignment width
500    */
501  0 gaprow.annotations[i] = null;
502  0 return;
503    }
504   
505  683060 final int gapped = profile.getNonGapped();
506   
507  683051 String description = "" + gapped;
508   
509  683063 gaprow.annotations[i] = new Annotation("", description, '\0', gapped,
510    jalview.util.ColorUtils.bleachColour(Color.DARK_GRAY,
511    (float) scale * gapped));
512    }
513    }
514   
515    /**
516    * Returns a tooltip showing either
517    * <ul>
518    * <li>the full profile (percentages of all residues present), if
519    * showSequenceLogo is true, or</li>
520    * <li>just the modal (most common) residue(s), if showSequenceLogo is
521    * false</li>
522    * </ul>
523    * Percentages are as a fraction of all sequence, or only ungapped sequences
524    * if ignoreGaps is true.
525    *
526    * @param profile
527    * @param pid
528    * @param showSequenceLogo
529    * @param ignoreGaps
530    * @param dp
531    * the number of decimal places to format percentages to
532    * @return
533    */
 
534  180069 toggle static String getTooltip(ProfileI profile, float pid,
535    boolean showSequenceLogo, boolean ignoreGaps, int dp)
536    {
537  180069 ResidueCount counts = profile.getCounts();
538   
539  180069 String description = null;
540  180069 if (counts != null && showSequenceLogo)
541    {
542  64337 int normaliseBy = ignoreGaps ? profile.getNonGapped()
543    : profile.getHeight();
544  64337 description = counts.getTooltip(normaliseBy, dp);
545    }
546    else
547    {
548  115732 StringBuilder sb = new StringBuilder(64);
549  115732 String maxRes = profile.getModalResidue();
550  115732 if (maxRes.length() > 1)
551    {
552  2856 sb.append("[").append(maxRes).append("]");
553    }
554    else
555    {
556  112876 sb.append(maxRes);
557    }
558  115732 if (maxRes.length() > 0)
559    {
560  112752 sb.append(" ");
561  112752 Format.appendPercentage(sb, pid, dp);
562  112752 sb.append("%");
563    }
564  115732 description = sb.toString();
565    }
566  180069 return description;
567    }
568   
 
569  523141 toggle static String getSSTooltip(ProfileI profile, float pid,
570    boolean showSequenceLogo, boolean ignoreGaps, int dp)
571    {
572  523141 SecondaryStructureCount counts = profile.getSSCounts();
573   
574  523141 String description = null;
575  523141 if (counts != null && showSequenceLogo)
576    {
577  46848 int normaliseBy = ignoreGaps ? profile.getNonGapped()
578    : profile.getHeight();
579  46848 description = counts.getTooltip(normaliseBy, dp);
580    }
581    else
582    {
583  476293 StringBuilder sb = new StringBuilder(64);
584  476293 String maxSS = profile.getModalSS();
585  476293 if (maxSS.length() > 1)
586    {
587  742 sb.append("[").append(maxSS).append("]");
588    }
589    else
590    {
591  475551 sb.append(maxSS);
592    }
593  476293 if (maxSS.length() > 0)
594    {
595  12423 sb.append(" ");
596  12423 Format.appendPercentage(sb, pid, dp);
597  12423 sb.append("%");
598    }
599  476293 description = sb.toString();
600    }
601  523141 return description;
602    }
603   
604    /**
605    * Returns the sorted profile for the given consensus data. The returned array
606    * contains
607    *
608    * <pre>
609    * [profileType, numberOfValues, totalPercent, charValue1, percentage1, charValue2, percentage2, ...]
610    * in descending order of percentage value
611    * </pre>
612    *
613    * @param profile
614    * the data object from which to extract and sort values
615    * @param ignoreGaps
616    * if true, only non-gapped values are included in percentage
617    * calculations
618    * @return
619    */
 
620  139513 toggle public static int[] extractProfile(ProfileI profile, boolean ignoreGaps)
621    {
622  139513 char[] symbols;
623  139513 int[] values;
624   
625  139513 if (profile.getCounts() != null)
626    {
627  139513 ResidueCount counts = profile.getCounts();
628  139513 SymbolCounts symbolCounts = counts.getSymbolCounts();
629  139513 symbols = symbolCounts.symbols;
630  139513 values = symbolCounts.values;
631   
632    }
633  0 else if (profile.getSSCounts() != null)
634    {
635  0 SecondaryStructureCount counts = profile.getSSCounts();
636    // to do
637  0 SecondaryStructureCount.SymbolCounts symbolCounts = counts
638    .getSymbolCounts();
639  0 symbols = symbolCounts.symbols;
640  0 values = symbolCounts.values;
641    }
642    else
643    {
644  0 return null;
645    }
646   
647  139513 QuickSort.sort(values, symbols);
648  139513 int totalPercentage = 0;
649  139513 final int divisor = ignoreGaps ? profile.getNonGapped()
650    : profile.getHeight();
651   
652    /*
653    * traverse the arrays in reverse order (highest counts first)
654    */
655  139513 int[] result = new int[3 + 2 * symbols.length];
656  139513 int nextArrayPos = 3;
657  139513 int nonZeroCount = 0;
658   
659  389823 for (int i = symbols.length - 1; i >= 0; i--)
660    {
661  250312 int theChar = symbols[i];
662  250312 int charCount = values[i];
663  250312 final int percentage = (charCount * 100) / divisor;
664  250312 if (percentage == 0)
665    {
666    /*
667    * this count (and any remaining) round down to 0% - discard
668    */
669  2 break;
670    }
671  250310 nonZeroCount++;
672  250310 result[nextArrayPos++] = theChar;
673  250310 result[nextArrayPos++] = percentage;
674  250310 totalPercentage += percentage;
675    }
676   
677    /*
678    * truncate array if any zero values were discarded
679    */
680  139513 if (nonZeroCount < symbols.length)
681    {
682  2 int[] tmp = new int[3 + 2 * nonZeroCount];
683  2 System.arraycopy(result, 0, tmp, 0, tmp.length);
684  2 result = tmp;
685    }
686   
687    /*
688    * fill in 'header' values
689    */
690  139513 result[0] = AlignmentAnnotation.SEQUENCE_PROFILE;
691  139513 result[1] = nonZeroCount;
692  139513 result[2] = totalPercentage;
693   
694  139513 return result;
695    }
696   
697    /**
698    * Extract a sorted extract of cDNA codon profile data. The returned array
699    * contains
700    *
701    * <pre>
702    * [profileType, numberOfValues, totalPercentage, charValue1, percentage1, charValue2, percentage2, ...]
703    * in descending order of percentage value, where the character values encode codon triplets
704    * </pre>
705    *
706    * @param hashtable
707    * @return
708    */
 
709  2 toggle public static int[] extractCdnaProfile(
710    Hashtable<String, Object> hashtable, boolean ignoreGaps)
711    {
712    // this holds #seqs, #ungapped, and then codon count, indexed by encoded
713    // codon triplet
714  2 int[] codonCounts = (int[]) hashtable.get(PROFILE);
715  2 int[] sortedCounts = new int[codonCounts.length - 2];
716  2 System.arraycopy(codonCounts, 2, sortedCounts, 0,
717    codonCounts.length - 2);
718   
719  2 int[] result = new int[3 + 2 * sortedCounts.length];
720    // first value is just the type of profile data
721  2 result[0] = AlignmentAnnotation.CDNA_PROFILE;
722   
723  2 char[] codons = new char[sortedCounts.length];
724  130 for (int i = 0; i < codons.length; i++)
725    {
726  128 codons[i] = (char) i;
727    }
728  2 QuickSort.sort(sortedCounts, codons);
729  2 int totalPercentage = 0;
730  2 int distinctValuesCount = 0;
731  2 int j = 3;
732  2 int divisor = ignoreGaps ? codonCounts[1] : codonCounts[0];
733  8 for (int i = codons.length - 1; i >= 0; i--)
734    {
735  8 final int codonCount = sortedCounts[i];
736  8 if (codonCount == 0)
737    {
738  0 break; // nothing else of interest here
739    }
740  8 final int percentage = codonCount * 100 / divisor;
741  8 if (percentage == 0)
742    {
743    /*
744    * this (and any remaining) values rounded down to 0 - discard
745    */
746  2 break;
747    }
748  6 distinctValuesCount++;
749  6 result[j++] = codons[i];
750  6 result[j++] = percentage;
751  6 totalPercentage += percentage;
752    }
753  2 result[2] = totalPercentage;
754   
755    /*
756    * Just return the non-zero values
757    */
758    // todo next value is redundant if we limit the array to non-zero counts
759  2 result[1] = distinctValuesCount;
760  2 return Arrays.copyOfRange(result, 0, j);
761    }
762   
763    /**
764    * Compute a consensus for the cDNA coding for a protein alignment.
765    *
766    * @param alignment
767    * the protein alignment (which should hold mappings to cDNA
768    * sequences)
769    * @param hconsensus
770    * the consensus data stores to be populated (one per column)
771    */
 
772  4 toggle public static void calculateCdna(AlignmentI alignment,
773    Hashtable<String, Object>[] hconsensus)
774    {
775  4 final char gapCharacter = alignment.getGapCharacter();
776  4 List<AlignedCodonFrame> mappings = alignment.getCodonFrames();
777  4 if (mappings == null || mappings.isEmpty())
778    {
779  0 return;
780    }
781   
782  4 int cols = alignment.getWidth();
783  1928 for (int col = 0; col < cols; col++)
784    {
785    // todo would prefer a Java bean for consensus data
786  1924 Hashtable<String, Object> columnHash = new Hashtable<>();
787    // #seqs, #ungapped seqs, counts indexed by (codon encoded + 1)
788  1924 int[] codonCounts = new int[66];
789  1924 codonCounts[0] = alignment.getSequences().size();
790  1924 int ungappedCount = 0;
791  1924 for (SequenceI seq : alignment.getSequences())
792    {
793  20870 if (seq.getCharAt(col) == gapCharacter)
794    {
795  10166 continue;
796    }
797  10704 List<char[]> codons = MappingUtils.findCodonsFor(seq, col,
798    mappings);
799  10704 for (char[] codon : codons)
800    {
801  10657 int codonEncoded = CodingUtils.encodeCodon(codon);
802  10657 if (codonEncoded >= 0)
803    {
804  10657 codonCounts[codonEncoded + 2]++;
805  10657 ungappedCount++;
806  10657 break;
807    }
808    }
809    }
810  1924 codonCounts[1] = ungappedCount;
811    // todo: sort values here, save counts and codons?
812  1924 columnHash.put(PROFILE, codonCounts);
813  1924 hconsensus[col] = columnHash;
814    }
815    }
816   
817    /**
818    * Derive displayable cDNA consensus annotation from computed consensus data.
819    *
820    * @param consensusAnnotation
821    * the annotation row to be populated for display
822    * @param consensusData
823    * the computed consensus data
824    * @param showProfileLogo
825    * if true show all symbols present at each position, else only the
826    * modal value
827    * @param nseqs
828    * the number of sequences in the alignment
829    */
 
830  3 toggle public static void completeCdnaConsensus(
831    AlignmentAnnotation consensusAnnotation,
832    Hashtable<String, Object>[] consensusData,
833    boolean showProfileLogo, int nseqs)
834    {
835  3 if (consensusAnnotation == null
836    || consensusAnnotation.annotations == null
837    || consensusAnnotation.annotations.length < consensusData.length)
838    {
839    // called with a bad alignment annotation row - wait for it to be
840    // initialised properly
841  0 return;
842    }
843   
844    // ensure codon triplet scales with font size
845  3 consensusAnnotation.scaleColLabel = true;
846  981 for (int col = 0; col < consensusData.length; col++)
847    {
848  978 Hashtable<String, Object> hci = consensusData[col];
849  978 if (hci == null)
850    {
851    // gapped protein column?
852  0 continue;
853    }
854    // array holds #seqs, #ungapped, then codon counts indexed by codon
855  978 final int[] codonCounts = (int[]) hci.get(PROFILE);
856  978 int totalCount = 0;
857   
858    /*
859    * First pass - get total count and find the highest
860    */
861  978 final char[] codons = new char[codonCounts.length - 2];
862  63570 for (int j = 2; j < codonCounts.length; j++)
863    {
864  62592 final int codonCount = codonCounts[j];
865  62592 codons[j - 2] = (char) (j - 2);
866  62592 totalCount += codonCount;
867    }
868   
869    /*
870    * Sort array of encoded codons by count ascending - so the modal value
871    * goes to the end; start by copying the count (dropping the first value)
872    */
873  978 int[] sortedCodonCounts = new int[codonCounts.length - 2];
874  978 System.arraycopy(codonCounts, 2, sortedCodonCounts, 0,
875    codonCounts.length - 2);
876  978 QuickSort.sort(sortedCodonCounts, codons);
877   
878  978 int modalCodonEncoded = codons[codons.length - 1];
879  978 int modalCodonCount = sortedCodonCounts[codons.length - 1];
880  978 String modalCodon = String
881    .valueOf(CodingUtils.decodeCodon(modalCodonEncoded));
882  978 if (sortedCodonCounts.length > 1 && sortedCodonCounts[codons.length
883    - 2] == sortedCodonCounts[codons.length - 1])
884    {
885    /*
886    * two or more codons share the modal count
887    */
888  25 modalCodon = "+";
889    }
890  978 float pid = sortedCodonCounts[sortedCodonCounts.length - 1] * 100
891    / (float) totalCount;
892   
893    /*
894    * todo ? Replace consensus hashtable with sorted arrays of codons and
895    * counts (non-zero only). Include total count in count array [0].
896    */
897   
898    /*
899    * Scan sorted array backwards for most frequent values first. Show
900    * repeated values compactly.
901    */
902  978 StringBuilder mouseOver = new StringBuilder(32);
903  978 StringBuilder samePercent = new StringBuilder();
904  978 String percent = null;
905  978 String lastPercent = null;
906  978 int percentDecPl = getPercentageDp(nseqs);
907   
908  1931 for (int j = codons.length - 1; j >= 0; j--)
909    {
910  1931 int codonCount = sortedCodonCounts[j];
911  1931 if (codonCount == 0)
912    {
913    /*
914    * remaining codons are 0% - ignore, but finish off the last one if
915    * necessary
916    */
917  978 if (samePercent.length() > 0)
918    {
919  953 mouseOver.append(samePercent).append(": ").append(percent)
920    .append("% ");
921    }
922  978 break;
923    }
924  953 int codonEncoded = codons[j];
925  953 final int pct = codonCount * 100 / totalCount;
926  953 String codon = String
927    .valueOf(CodingUtils.decodeCodon(codonEncoded));
928  953 StringBuilder sb = new StringBuilder();
929  953 Format.appendPercentage(sb, pct, percentDecPl);
930  953 percent = sb.toString();
931  953 if (showProfileLogo || codonCount == modalCodonCount)
932    {
933  953 if (percent.equals(lastPercent) && j > 0)
934    {
935  0 samePercent.append(samePercent.length() == 0 ? "" : ", ");
936  0 samePercent.append(codon);
937    }
938    else
939    {
940  953 if (samePercent.length() > 0)
941    {
942  0 mouseOver.append(samePercent).append(": ").append(lastPercent)
943    .append("% ");
944    }
945  953 samePercent.setLength(0);
946  953 samePercent.append(codon);
947    }
948  953 lastPercent = percent;
949    }
950    }
951   
952  978 consensusAnnotation.annotations[col] = new Annotation(modalCodon,
953    mouseOver.toString(), ' ', pid);
954    }
955    }
956   
957    /**
958    * Returns the number of decimal places to show for profile percentages. For
959    * less than 100 sequences, returns zero (the integer percentage value will be
960    * displayed). For 100-999 sequences, returns 1, for 1000-9999 returns 2, etc.
961    *
962    * @param nseq
963    * @return
964    */
 
965  704154 toggle protected static int getPercentageDp(long nseq)
966    {
967  704159 int scale = 0;
968  704165 while (nseq >= 100)
969    {
970  0 scale++;
971  0 nseq /= 10;
972    }
973  704162 return scale;
974    }
975    }