Clover icon

Coverage Report

  1. Project Clover database Sun Jan 11 2026 02:28:45 GMT
  2. Package jalview.analysis

File AAFrequency.java

 

Coverage histogram

../../img/srcFileCovDistChart9.png
13% of files have more coverage

Code metrics

188
403
22
1
1,246
778
143
0.35
18.32
22
6.5

Classes

Class Line # Actions
AAFrequency 60 403 143
0.882544988.3%
 

Contributing tests

This file is covered by 204 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.analysis;
22   
23    import jalview.datamodel.AlignedCodonFrame;
24    import jalview.datamodel.AlignmentAnnotation;
25    import jalview.datamodel.AlignmentI;
26    import jalview.datamodel.AnnotatedCollectionI;
27    import jalview.datamodel.Annotation;
28    import jalview.datamodel.HiddenMarkovModel;
29    import jalview.datamodel.Profile;
30    import jalview.datamodel.ProfileI;
31    import jalview.datamodel.Profiles;
32    import jalview.datamodel.ProfilesI;
33    import jalview.datamodel.ResidueCount;
34    import jalview.datamodel.ResidueCount.SymbolCounts;
35    import jalview.datamodel.SecondaryStructureCount;
36    import jalview.datamodel.SequenceGroup;
37    import jalview.datamodel.SequenceI;
38    import jalview.ext.android.SparseIntArray;
39    import jalview.schemes.ResidueProperties;
40    import jalview.util.Comparison;
41    import jalview.util.Constants;
42    import jalview.util.Format;
43    import jalview.util.MappingUtils;
44    import jalview.util.QuickSort;
45   
46    import java.awt.Color;
47    import java.util.ArrayList;
48    import java.util.Arrays;
49    import java.util.Hashtable;
50    import java.util.List;
51    import java.util.Map;
52   
53    /**
54    * Takes in a vector or array of sequences and column start and column end and
55    * returns a new Hashtable[] of size maxSeqLength, if Hashtable not supplied.
56    * This class is used extensively in calculating alignment colourschemes that
57    * depend on the amount of conservation in each alignment column.
58    *
59    */
 
60    public class AAFrequency
61    {
62    private static final double LOG2 = Math.log(2);
63   
64    public static final String PROFILE = "P";
65    /**
66    * when true, ignore any unassigned or coil structure when reporting secondary structure consensus
67    */
68    public static final boolean IGNORE_COILS=true;
69   
 
70  3 toggle public static final ProfilesI calculate(List<SequenceI> list, int start,
71    int end)
72    {
73  3 return calculate(list, start, end, false);
74    }
75   
 
76  512 toggle public static final ProfilesI calculate(List<SequenceI> sequences,
77    int start, int end, boolean profile)
78    {
79  512 SequenceI[] seqs = new SequenceI[sequences.size()];
80  512 int width = 0;
81  512 synchronized (sequences)
82    {
83  4472 for (int i = 0; i < sequences.size(); i++)
84    {
85  3960 seqs[i] = sequences.get(i);
86  3960 int length = seqs[i].getLength();
87  3960 if (length > width)
88    {
89  511 width = length;
90    }
91    }
92   
93  512 if (end >= width)
94    {
95  289 end = width;
96    }
97   
98  512 ProfilesI reply = calculate(seqs, width, start, end, profile);
99  512 return reply;
100    }
101    }
102   
103    /**
104    * Calculate the consensus symbol(s) for each column in the given range.
105    *
106    * @param sequences
107    * @param width
108    * the full width of the alignment
109    * @param start
110    * start column (inclusive, base zero)
111    * @param end
112    * end column (exclusive)
113    * @param saveFullProfile
114    * if true, store all symbol counts
115    */
 
116  1461 toggle public static final ProfilesI calculate(final SequenceI[] sequences,
117    int width, int start, int end, boolean saveFullProfile)
118    {
119    // long now = System.currentTimeMillis();
120  1461 int seqCount = sequences.length;
121  1461 boolean nucleotide = false;
122  1461 int nucleotideCount = 0;
123  1461 int peptideCount = 0;
124   
125  1461 ProfileI[] result = new ProfileI[width];
126   
127  664919 for (int column = start; column < end; column++)
128    {
129    /*
130    * Apply a heuristic to detect nucleotide data (which can
131    * be counted in more compact arrays); here we test for
132    * more than 90% nucleotide; recheck every 10 columns in case
133    * of misleading data e.g. highly conserved Alanine in peptide!
134    * Mistakenly guessing nucleotide has a small performance cost,
135    * as it will result in counting in sparse arrays.
136    * Mistakenly guessing peptide has a small space cost,
137    * as it will use a larger than necessary array to hold counts.
138    */
139  663470 if (nucleotideCount > 100 && column % 10 == 0)
140    {
141  59292 nucleotide = (9 * peptideCount < nucleotideCount);
142    }
143  663461 ResidueCount residueCounts = new ResidueCount(nucleotide);
144   
145  11284762 for (int row = 0; row < seqCount; row++)
146    {
147  10620469 if (sequences[row] == null)
148    {
149  0 jalview.bin.Console.errPrintln(
150    "WARNING: Consensus skipping null sequence - possible race condition.");
151  0 continue;
152    }
153  10619592 if (sequences[row].getLength() > column)
154    {
155  10588996 char c = sequences[row].getCharAt(column);
156  10587772 residueCounts.add(c);
157  10592559 if (Comparison.isNucleotide(c))
158    {
159  995646 nucleotideCount++;
160    }
161  9598122 else if (!Comparison.isGap(c))
162    {
163  818350 peptideCount++;
164    }
165    }
166    else
167    {
168    /*
169    * count a gap if the sequence doesn't reach this column
170    */
171  30776 residueCounts.addGap();
172    }
173    }
174   
175  663449 int maxCount = residueCounts.getModalCount();
176  663434 String maxResidue = residueCounts.getResiduesForCount(maxCount);
177  663514 int gapCount = residueCounts.getGapCount();
178  663499 ProfileI profile = new Profile(seqCount, gapCount, maxCount,
179    maxResidue);
180   
181  663489 if (saveFullProfile)
182    {
183  636891 profile.setCounts(residueCounts);
184    }
185   
186  663461 result[column] = profile;
187    }
188  1461 return new Profiles(seqCount, result);
189    // long elapsed = System.currentTimeMillis() - now;
190    // jalview.bin.Console.outPrintln(elapsed);
191    }
192   
 
193  0 toggle public static final ProfilesI calculateSS(List<SequenceI> list, int start,
194    int end, String source)
195    {
196  0 return calculateSS(list, start, end, false, source);
197    }
 
198  0 toggle public static final ProfilesI calculateSS(List<SequenceI> sequences,
199    int start, int end, boolean profile, String source)
200    {
201  0 return calculateSS(sequences, start, end, profile, source,null);
202    }
 
203  549 toggle public static final ProfilesI calculateSS(List<SequenceI> sequences,
204    int start, int end, boolean profile, String source,
205    SequenceGroup sequenceGroup)
206    {
207  549 SequenceI[] seqs = new SequenceI[sequences.size()];
208  549 int width = 0;
209  549 synchronized (sequences)
210    {
211  4695 for (int i = 0; i < sequences.size(); i++)
212    {
213  4146 seqs[i] = sequences.get(i);
214  4146 int length = seqs[i].getLength();
215  4146 if (length > width)
216    {
217  548 width = length;
218    }
219    }
220   
221  549 if (end >= width)
222    {
223  329 end = width;
224    }
225   
226  549 ProfilesI reply = calculateSS(seqs, width, start, end, profile,
227    source,sequenceGroup);
228  549 return reply;
229    }
230    }
231   
232   
233    /**
234    * TODO - REFACTOR TO ANNOTATEDCOLLECTION!
235    * @param sequences
236    * @param width
237    * @param start
238    * @param end
239    * @param saveFullProfile
240    * @param source - if null, will be treated as 'All'
241    * @param sequenceGroup - if null all secondary structure annotations matching source on sequence will be considered
242    * @return
243    */
 
244  1827 toggle public static final ProfilesI calculateSS(final SequenceI[] sequences,
245    int width, int start, int end, boolean saveFullProfile,
246    String source, SequenceGroup sequenceGroup)
247    {
248   
249   
250  1827 int seqCount = sequences.length;
251   
252  1827 ProfileI[] result = new ProfileI[width];
253  1827 int maxSSannotcount=0,maxSeqWithSScount=0;
254  1827 if (source==null || "".equals(source)) {
255  2 source = Constants.SS_ALL_PROVIDERS;
256    }
257  1827 Map<SequenceI, ArrayList<AlignmentAnnotation>> sq_group_by_source = null;
258  1827 if (sequenceGroup!=null && sequenceGroup.getAnnotationsFromTree().size()>0 && source!=null)
259    {
260  40 sq_group_by_source = AlignmentUtils.getSequenceAssociatedAlignmentAnnotations(sequenceGroup.getAnnotationsFromTree().toArray(new AlignmentAnnotation[0]), source);
261    }
262  441217 for (int column = start; column < end; column++)
263    {
264   
265  439389 int seqWithSSCount = 0;
266  439391 int ssCount = 0;
267   
268  439393 SecondaryStructureCount ssCounts = new SecondaryStructureCount();
269   
270  6809903 for (int row = 0; row < seqCount; row++)
271    {
272  6371336 if (sequences[row] == null)
273    {
274  0 jalview.bin.Console.errPrintln(
275    "WARNING: Consensus skipping null sequence - possible race condition.");
276  0 continue;
277    }
278   
279  6371456 char c = sequences[row].getCharAt(column);
280   
281  6367815 List<AlignmentAnnotation> annots;
282   
283  6368705 if (sq_group_by_source==null) {
284  6352821 annots = AlignmentUtils.getAlignmentAnnotationForSource(sequences[row], source);
285    } else {
286  16018 annots = sq_group_by_source.get(sequences[row]);
287  16018 if (annots==null)
288    {
289  16018 annots = sq_group_by_source.get(sequences[row].getDatasetSequence());
290    }
291    }
292   
293  6369394 if(annots!=null) {
294  114522 if (annots.size()>0) {
295  114509 seqWithSSCount++;
296    }
297  114499 for (AlignmentAnnotation aa : annots)
298    {
299  145767 if (aa != null)
300    {
301  145741 ssCount++;
302    }
303   
304  145833 if (sequences[row].getLength() > column && !Comparison.isGap(c)
305    && aa != null)
306    {
307   
308  113617 int seqPosition = sequences[row].findPosition(column);
309   
310  113755 char ss = AlignmentUtils
311    .findSSAnnotationForGivenSeqposition(aa, seqPosition);
312  113765 if (ss == '*' || IGNORE_COILS && (ss == 'C' || ss == 'c' || ss == ' '))
313    {
314  70782 continue;
315    }
316  42968 ssCounts.add(ss);
317    }
318  32199 else if (Comparison.isGap(c) && aa != null)
319    {
320  32199 ssCounts.addGap();
321    }
322    }
323    }
324    }
325   
326  439372 int maxSSCount = ssCounts.getModalCount();
327  439361 String maxSS = ssCounts.getSSForCount(maxSSCount);
328  439344 int gapCount = ssCounts.getGapCount();
329  439322 ProfileI profile = new Profile(maxSS, ssCount, gapCount, maxSSCount,
330    seqWithSSCount);
331  439317 maxSeqWithSScount=Math.max(maxSeqWithSScount, seqWithSSCount);
332  439361 if (saveFullProfile)
333    {
334  412806 profile.setSSCounts(ssCounts);
335    }
336   
337  439352 result[column] = profile;
338  439397 maxSSannotcount=Math.max(maxSSannotcount, ssCount);
339    }
340  1827 return new Profiles(maxSSannotcount,result);
341    }
342   
343    /**
344    * Returns the full set of profiles for a hidden Markov model. The underlying
345    * data is the raw probabilities of a residue being emitted at each node,
346    * however the profiles returned by this function contain the percentage
347    * chance of a residue emission.
348    *
349    * @param hmm
350    * @param width
351    * The width of the Profile array (Profiles) to be returned.
352    * @param start
353    * The alignment column on which the first profile is based.
354    * @param end
355    * The alignment column on which the last profile is based.
356    * @param removeBelowBackground
357    * if true, symbols with a match emission probability less than
358    * background frequency are ignored
359    * @return
360    */
 
361  3 toggle public static ProfilesI calculateHMMProfiles(final HiddenMarkovModel hmm,
362    int width, int start, int end, boolean removeBelowBackground,
363    boolean infoLetterHeight)
364    {
365  3 ProfileI[] result = new ProfileI[width];
366  3 char[] symbols = hmm.getSymbols().toCharArray();
367  3 int symbolCount = symbols.length;
368  472 for (int column = start; column < end; column++)
369    {
370  469 ResidueCount counts = new ResidueCount();
371  469 for (char symbol : symbols)
372    {
373  9380 int value = getAnalogueCount(hmm, column, symbol,
374    removeBelowBackground, infoLetterHeight);
375  9380 counts.put(symbol, value);
376    }
377  469 int maxCount = counts.getModalCount();
378  469 String maxResidue = counts.getResiduesForCount(maxCount);
379  469 int gapCount = counts.getGapCount();
380  469 ProfileI profile = new Profile(symbolCount, gapCount, maxCount,
381    maxResidue);
382  469 profile.setCounts(counts);
383   
384  469 result[column] = profile;
385    }
386  3 return new Profiles(1,result); // TODO: JAL-4107 - HMM Profile seqcount tooltip needs to be different to a typical consensus tooltip ? where is seqcount ? - could report effective diversity
387    }
388   
389    /**
390    * Make an estimate of the profile size we are going to compute i.e. how many
391    * different characters may be present in it. Overestimating has a cost of
392    * using more memory than necessary. Underestimating has a cost of needing to
393    * extend the SparseIntArray holding the profile counts.
394    *
395    * @param profileSizes
396    * counts of sizes of profiles so far encountered
397    * @return
398    */
 
399  0 toggle static int estimateProfileSize(SparseIntArray profileSizes)
400    {
401  0 if (profileSizes.size() == 0)
402    {
403  0 return 4;
404    }
405   
406    /*
407    * could do a statistical heuristic here e.g. 75%ile
408    * for now just return the largest value
409    */
410  0 return profileSizes.keyAt(profileSizes.size() - 1);
411    }
412   
413    /**
414    * Derive the consensus annotations to be added to the alignment for display.
415    * This does not recompute the raw data, but may be called on a change in
416    * display options, such as 'ignore gaps', which may in turn result in a
417    * change in the derived values.
418    *
419    * @param consensus
420    * the annotation row to add annotations to
421    * @param profiles
422    * the source consensus data
423    * @param startCol
424    * start column (inclusive)
425    * @param endCol
426    * end column (exclusive)
427    * @param ignoreGaps
428    * if true, normalise residue percentages ignoring gaps
429    * @param showSequenceLogo
430    * if true include all consensus symbols, else just show modal
431    * residue
432    * @param nseq
433    * number of sequences
434    */
 
435  1089 toggle public static void completeConsensus(AlignmentAnnotation consensus,
436    ProfilesI profiles, int startCol, int endCol, boolean ignoreGaps,
437    boolean showSequenceLogo, long nseq)
438    {
439    // long now = System.currentTimeMillis();
440  1089 if (consensus == null || consensus.annotations == null
441    || consensus.annotations.length < endCol)
442    {
443    /*
444    * called with a bad alignment annotation row
445    * wait for it to be initialised properly
446    */
447  0 return;
448    }
449   
450  629720 for (int i = startCol; i < endCol; i++)
451    {
452  628626 ProfileI profile = profiles.get(i);
453  628615 if (profile == null)
454    {
455    /*
456    * happens if sequences calculated over were
457    * shorter than alignment width
458    */
459  0 consensus.annotations[i] = null;
460  0 return;
461    }
462   
463  628603 final int dp = getPercentageDp(nseq);
464   
465  628599 float value = profile.getPercentageIdentity(ignoreGaps);
466   
467  628666 String description = getTooltip(profile, value, showSequenceLogo,
468    ignoreGaps, dp);
469   
470  628707 String modalResidue = profile.getModalResidue();
471  628660 if ("".equals(modalResidue))
472    {
473  4111 modalResidue = "-";
474    }
475  624559 else if (modalResidue.length() > 1)
476    {
477  7038 modalResidue = "+";
478    }
479  628627 consensus.annotations[i] = new Annotation(modalResidue, description,
480    ' ', value);
481    }
482    // long elapsed = System.currentTimeMillis() - now;
483    // jalview.bin.Console.outPrintln(-elapsed);
484    }
485   
 
486  1271 toggle public static void completeSSConsensus(AlignmentAnnotation ssConsensus,
487    ProfilesI profiles, int startCol, int endCol, boolean ignoreGaps,
488    boolean showSequenceLogo, long nseq)
489    {
490    // long now = System.currentTimeMillis();
491  1271 if (ssConsensus == null || ssConsensus.annotations == null
492    || ssConsensus.annotations.length < endCol)
493    {
494    /*
495    * called with a bad alignment annotation row
496    * wait for it to be initialised properly
497    */
498  0 return;
499    }
500   
501  174232 for (int i = startCol; i < endCol; i++)
502    {
503  172961 ProfileI profile = profiles.get(i);
504  172961 if (profile == null)
505    {
506    /*
507    * happens if sequences calculated over were
508    * shorter than alignment width
509    */
510  0 ssConsensus.annotations[i] = null;
511  0 return;
512    }
513   
514  172961 if (ssConsensus.getNoOfSequencesIncluded() < 0)
515    {
516  504 ssConsensus.setNoOfSequencesIncluded(profile.getSeqWithSSCount());
517  504 ssConsensus.setNoOfTracksIncluded(profiles.getCount());
518    }
519   
520  172961 final int dp = getPercentageDp(nseq);
521   
522  172961 float value = profile.getSSPercentageIdentity(ignoreGaps);
523   
524  172961 String description = getSSTooltip(profile, value, showSequenceLogo,
525    ignoreGaps, dp);
526   
527  172961 String modalSS = profile.getModalSS();
528  172961 if ("".equals(modalSS))
529    {
530  161557 modalSS = "-";
531    }
532  11404 else if (modalSS.length() > 1)
533    {
534  120 modalSS = "+";
535    }
536  172961 ssConsensus.annotations[i] = new Annotation(modalSS, description,
537    ' ', value);
538    }
539   
540    //Hide consensus with no data to display
541  1271 if(ssConsensus.getNoOfSequencesIncluded()<1)
542  1150 ssConsensus.visible = false;
543   
544    // long elapsed = System.currentTimeMillis() - now;
545    // jalview.bin.Console.outPrintln(-elapsed);
546    }
547   
548    /**
549    * Derive the information annotations to be added to the alignment for
550    * display. This does not recompute the raw data, but may be called on a
551    * change in display options, such as 'ignore below background frequency',
552    * which may in turn result in a change in the derived values.
553    *
554    * @param information
555    * the annotation row to add annotations to
556    * @param profiles
557    * the source information data
558    * @param startCol
559    * start column (inclusive)
560    * @param endCol
561    * end column (exclusive)
562    * @param ignoreGaps
563    * if true, normalise residue percentages
564    * @param showSequenceLogo
565    * if true include all information symbols, else just show modal
566    * residue
567    */
 
568  4 toggle public static float completeInformation(AlignmentAnnotation information,
569    ProfilesI profiles, int startCol, int endCol)
570    {
571    // long now = System.currentTimeMillis();
572  4 if (information == null || information.annotations == null)
573    {
574    /*
575    * called with a bad alignment annotation row
576    * wait for it to be initialised properly
577    */
578  0 return 0;
579    }
580   
581  4 float max = 0f;
582  4 SequenceI hmmSeq = information.sequenceRef;
583   
584  4 int seqLength = hmmSeq.getLength();
585  4 if (information.annotations.length < seqLength)
586    {
587  0 return 0;
588    }
589   
590  4 HiddenMarkovModel hmm = hmmSeq.getHMM();
591   
592  473 for (int column = startCol; column < endCol; column++)
593    {
594  470 if (column >= seqLength)
595    {
596    // hmm consensus sequence is shorter than the alignment
597  1 break;
598    }
599   
600  469 float value = hmm.getInformationContent(column);
601  469 boolean isNaN = Float.isNaN(value);
602  469 if (!isNaN)
603    {
604  445 max = Math.max(max, value);
605    }
606   
607  469 String description = isNaN ? null
608    : String.format("%.4f bits", value);
609  469 information.annotations[column] = new Annotation(
610    Character.toString(
611    Character.toUpperCase(hmmSeq.getCharAt(column))),
612    description, ' ', value);
613    }
614   
615  4 information.graphMax = max;
616  4 return max;
617    }
618   
619    /**
620    * Derive the occupancy count annotation
621    *
622    * @param occupancy
623    * the annotation row to add annotations to
624    * @param profiles
625    * the source consensus data
626    * @param startCol
627    * start column (inclusive)
628    * @param endCol
629    * end column (exclusive)
630    */
 
631  934 toggle public static void completeGapAnnot(AlignmentAnnotation occupancy,
632    ProfilesI profiles, int startCol, int endCol, long nseq)
633    {
634  934 if (occupancy == null || occupancy.annotations == null
635    || occupancy.annotations.length < endCol)
636    {
637    /*
638    * called with a bad alignment annotation row
639    * wait for it to be initialised properly
640    */
641  0 return;
642    }
643    // always set ranges again
644  934 occupancy.graphMax = nseq;
645  934 occupancy.graphMin = 0;
646  934 double scale = 0.8 / nseq;
647  554643 for (int i = startCol; i < endCol; i++)
648    {
649  553700 ProfileI profile = profiles.get(i);
650  553690 if (profile == null)
651    {
652    /*
653    * happens if sequences calculated over were
654    * shorter than alignment width
655    */
656  0 occupancy.annotations[i] = null;
657  0 return;
658    }
659   
660  553690 final int gapped = profile.getNonGapped();
661   
662  553685 String description = "" + gapped;
663   
664  553692 occupancy.annotations[i] = new Annotation("", description, '\0',
665    gapped,
666    jalview.util.ColorUtils.bleachColour(Color.DARK_GRAY,
667    (float) scale * gapped));
668    }
669    }
670   
671    /**
672    * Returns a tooltip showing either
673    * <ul>
674    * <li>the full profile (percentages of all residues present), if
675    * showSequenceLogo is true, or</li>
676    * <li>just the modal (most common) residue(s), if showSequenceLogo is
677    * false</li>
678    * </ul>
679    * Percentages are as a fraction of all sequence, or only ungapped sequences
680    * if ignoreGaps is true.
681    *
682    * @param profile
683    * @param pid
684    * @param showSequenceLogo
685    * @param ignoreGaps
686    * @param dp
687    * the number of decimal places to format percentages to
688    * @return
689    */
 
690  628656 toggle static String getTooltip(ProfileI profile, float pid,
691    boolean showSequenceLogo, boolean ignoreGaps, int dp)
692    {
693  628658 ResidueCount counts = profile.getCounts();
694   
695  628644 String description = null;
696  628771 if (counts != null && showSequenceLogo)
697    {
698  53137 int normaliseBy = ignoreGaps ? profile.getNonGapped()
699    : profile.getHeight();
700  53122 description = counts.getTooltip(normaliseBy, dp);
701    }
702    else
703    {
704  575661 StringBuilder sb = new StringBuilder(64);
705  575661 String maxRes = profile.getModalResidue();
706  575661 if (maxRes.length() > 1)
707    {
708  2989 sb.append("[").append(maxRes).append("]");
709    }
710    else
711    {
712  572672 sb.append(maxRes);
713    }
714  575661 if (maxRes.length() > 0)
715    {
716  572275 sb.append(" ");
717  572275 Format.appendPercentage(sb, pid, dp);
718  572275 sb.append("%");
719    }
720  575661 description = sb.toString();
721    }
722  628720 return description;
723    }
724   
 
725  172961 toggle static String getSSTooltip(ProfileI profile, float pid,
726    boolean showSequenceLogo, boolean ignoreGaps, int dp)
727    {
728  172961 SecondaryStructureCount counts = profile.getSSCounts();
729   
730  172961 String description = null;
731  172961 if (counts != null && showSequenceLogo)
732    {
733  42531 int normaliseBy = ignoreGaps ? profile.getNonGapped()
734    : profile.getHeight();
735  42531 description = counts.getTooltip(normaliseBy, dp);
736    }
737    else
738    {
739  130430 StringBuilder sb = new StringBuilder(64);
740  130430 String maxSS = profile.getModalSS();
741  130430 if (maxSS.length() > 1)
742    {
743  120 sb.append("[").append(maxSS).append("]");
744    }
745    else
746    {
747  130310 sb.append(maxSS);
748    }
749  130430 if (maxSS.length() > 0)
750    {
751  8024 sb.append(" ");
752  8024 Format.appendPercentage(sb, pid, dp);
753  8024 sb.append("%");
754    }
755  130430 description = sb.toString();
756    }
757  172961 return description;
758    }
759   
760    /**
761    * Returns the sorted profile for the given consensus data. The returned array
762    * contains
763    *
764    * <pre>
765    * [profileType, numberOfValues, totalPercent, charValue1, percentage1, charValue2, percentage2, ...]
766    * in descending order of percentage value
767    * </pre>
768    *
769    * @param profile
770    * the data object from which to extract and sort values
771    * @param ignoreGaps
772    * if true, only non-gapped values are included in percentage
773    * calculations
774    * @return
775    */
 
776  32904 toggle public static int[] extractProfile(ProfileI profile, boolean ignoreGaps)
777    {
778  32904 char[] symbols;
779  32904 int[] values;
780   
781  32904 if (profile.getCounts() != null)
782    {
783  32904 ResidueCount counts = profile.getCounts();
784  32904 SymbolCounts symbolCounts = counts.getSymbolCounts();
785  32904 symbols = symbolCounts.symbols;
786  32904 values = symbolCounts.values;
787   
788    }
789  0 else if (profile.getSSCounts() != null)
790    {
791  0 SecondaryStructureCount counts = profile.getSSCounts();
792    // to do
793  0 SecondaryStructureCount.SymbolCounts symbolCounts = counts
794    .getSymbolCounts();
795  0 symbols = symbolCounts.symbols;
796  0 values = symbolCounts.values;
797    }
798    else
799    {
800  0 return null;
801    }
802   
803  32904 QuickSort.sort(values, symbols);
804  32904 int totalPercentage = 0;
805  32904 final int divisor = ignoreGaps ? profile.getNonGapped()
806    : profile.getHeight();
807   
808    /*
809    * traverse the arrays in reverse order (highest counts first)
810    */
811  32904 int[] result = new int[3 + 2 * symbols.length];
812  32904 int nextArrayPos = 3;
813  32904 int nonZeroCount = 0;
814   
815  113066 for (int i = symbols.length - 1; i >= 0; i--)
816    {
817  80164 int theChar = symbols[i];
818  80164 int charCount = values[i];
819  80164 final int percentage = (charCount * 100) / divisor;
820  80164 if (percentage == 0)
821    {
822    /*
823    * this count (and any remaining) round down to 0% - discard
824    */
825  2 break;
826    }
827  80162 nonZeroCount++;
828  80162 result[nextArrayPos++] = theChar;
829  80162 result[nextArrayPos++] = percentage;
830  80162 totalPercentage += percentage;
831    }
832   
833    /*
834    * truncate array if any zero values were discarded
835    */
836  32904 if (nonZeroCount < symbols.length)
837    {
838  2 int[] tmp = new int[3 + 2 * nonZeroCount];
839  2 System.arraycopy(result, 0, tmp, 0, tmp.length);
840  2 result = tmp;
841    }
842   
843    /*
844    * fill in 'header' values
845    */
846  32904 result[0] = AlignmentAnnotation.SEQUENCE_PROFILE;
847  32904 result[1] = nonZeroCount;
848  32904 result[2] = totalPercentage;
849   
850  32904 return result;
851    }
852   
853   
854    /**
855    * Extract a sorted extract of cDNA codon profile data. The returned array
856    * contains
857    *
858    * <pre>
859    * [profileType, numberOfValues, totalPercentage, charValue1, percentage1, charValue2, percentage2, ...]
860    * in descending order of percentage value, where the character values encode codon triplets
861    * </pre>
862    *
863    * @param hashtable
864    * @return
865    */
 
866  2 toggle public static int[] extractCdnaProfile(
867    Hashtable<String, Object> hashtable, boolean ignoreGaps)
868    {
869    // this holds #seqs, #ungapped, and then codon count, indexed by encoded
870    // codon triplet
871  2 int[] codonCounts = (int[]) hashtable.get(PROFILE);
872  2 int[] sortedCounts = new int[codonCounts.length - 2];
873  2 System.arraycopy(codonCounts, 2, sortedCounts, 0,
874    codonCounts.length - 2);
875   
876  2 int[] result = new int[3 + 2 * sortedCounts.length];
877    // first value is just the type of profile data
878  2 result[0] = AlignmentAnnotation.CDNA_PROFILE;
879   
880  2 char[] codons = new char[sortedCounts.length];
881  130 for (int i = 0; i < codons.length; i++)
882    {
883  128 codons[i] = (char) i;
884    }
885  2 QuickSort.sort(sortedCounts, codons);
886  2 int totalPercentage = 0;
887  2 int distinctValuesCount = 0;
888  2 int j = 3;
889  2 int divisor = ignoreGaps ? codonCounts[1] : codonCounts[0];
890  8 for (int i = codons.length - 1; i >= 0; i--)
891    {
892  8 final int codonCount = sortedCounts[i];
893  8 if (codonCount == 0)
894    {
895  0 break; // nothing else of interest here
896    }
897  8 final int percentage = codonCount * 100 / divisor;
898  8 if (percentage == 0)
899    {
900    /*
901    * this (and any remaining) values rounded down to 0 - discard
902    */
903  2 break;
904    }
905  6 distinctValuesCount++;
906  6 result[j++] = codons[i];
907  6 result[j++] = percentage;
908  6 totalPercentage += percentage;
909    }
910  2 result[2] = totalPercentage;
911   
912    /*
913    * Just return the non-zero values
914    */
915    // todo next value is redundant if we limit the array to non-zero counts
916  2 result[1] = distinctValuesCount;
917  2 return Arrays.copyOfRange(result, 0, j);
918    }
919   
920    /**
921    * Compute a consensus for the cDNA coding for a protein alignment.
922    *
923    * @param alignment
924    * the protein alignment (which should hold mappings to cDNA
925    * sequences)
926    * @param hconsensus
927    * the consensus data stores to be populated (one per column)
928    */
 
929  4 toggle public static void calculateCdna(AlignmentI alignment,
930    Hashtable<String, Object>[] hconsensus)
931    {
932  4 final char gapCharacter = alignment.getGapCharacter();
933  4 List<AlignedCodonFrame> mappings = alignment.getCodonFrames();
934  4 if (mappings == null || mappings.isEmpty())
935    {
936  0 return;
937    }
938   
939  4 int cols = alignment.getWidth();
940  1928 for (int col = 0; col < cols; col++)
941    {
942    // todo would prefer a Java bean for consensus data
943  1924 Hashtable<String, Object> columnHash = new Hashtable<>();
944    // #seqs, #ungapped seqs, counts indexed by (codon encoded + 1)
945  1924 int[] codonCounts = new int[66];
946  1924 codonCounts[0] = alignment.getSequences().size();
947  1924 int ungappedCount = 0;
948  1924 for (SequenceI seq : alignment.getSequences())
949    {
950  20870 if (seq.getCharAt(col) == gapCharacter)
951    {
952  10166 continue;
953    }
954  10704 List<char[]> codons = MappingUtils.findCodonsFor(seq, col,
955    mappings);
956  10704 for (char[] codon : codons)
957    {
958  10657 int codonEncoded = CodingUtils.encodeCodon(codon);
959  10657 if (codonEncoded >= 0)
960    {
961  10657 codonCounts[codonEncoded + 2]++;
962  10657 ungappedCount++;
963  10657 break;
964    }
965    }
966    }
967  1924 codonCounts[1] = ungappedCount;
968    // todo: sort values here, save counts and codons?
969  1924 columnHash.put(PROFILE, codonCounts);
970  1924 hconsensus[col] = columnHash;
971    }
972    }
973   
974    /**
975    * Derive displayable cDNA consensus annotation from computed consensus data.
976    *
977    * @param consensusAnnotation
978    * the annotation row to be populated for display
979    * @param consensusData
980    * the computed consensus data
981    * @param showProfileLogo
982    * if true show all symbols present at each position, else only the
983    * modal value
984    * @param nseqs
985    * the number of sequences in the alignment
986    */
 
987  3 toggle public static void completeCdnaConsensus(
988    AlignmentAnnotation consensusAnnotation,
989    Hashtable<String, Object>[] consensusData,
990    boolean showProfileLogo, int nseqs)
991    {
992  3 if (consensusAnnotation == null
993    || consensusAnnotation.annotations == null
994    || consensusAnnotation.annotations.length < consensusData.length)
995    {
996    // called with a bad alignment annotation row - wait for it to be
997    // initialised properly
998  0 return;
999    }
1000   
1001    // ensure codon triplet scales with font size
1002  3 consensusAnnotation.scaleColLabel = true;
1003  981 for (int col = 0; col < consensusData.length; col++)
1004    {
1005  978 Hashtable<String, Object> hci = consensusData[col];
1006  978 if (hci == null)
1007    {
1008    // gapped protein column?
1009  0 continue;
1010    }
1011    // array holds #seqs, #ungapped, then codon counts indexed by codon
1012  978 final int[] codonCounts = (int[]) hci.get(PROFILE);
1013  978 int totalCount = 0;
1014   
1015    /*
1016    * First pass - get total count and find the highest
1017    */
1018  978 final char[] codons = new char[codonCounts.length - 2];
1019  63570 for (int j = 2; j < codonCounts.length; j++)
1020    {
1021  62592 final int codonCount = codonCounts[j];
1022  62592 codons[j - 2] = (char) (j - 2);
1023  62592 totalCount += codonCount;
1024    }
1025   
1026    /*
1027    * Sort array of encoded codons by count ascending - so the modal value
1028    * goes to the end; start by copying the count (dropping the first value)
1029    */
1030  978 int[] sortedCodonCounts = new int[codonCounts.length - 2];
1031  978 System.arraycopy(codonCounts, 2, sortedCodonCounts, 0,
1032    codonCounts.length - 2);
1033  978 QuickSort.sort(sortedCodonCounts, codons);
1034   
1035  978 int modalCodonEncoded = codons[codons.length - 1];
1036  978 int modalCodonCount = sortedCodonCounts[codons.length - 1];
1037  978 String modalCodon = String
1038    .valueOf(CodingUtils.decodeCodon(modalCodonEncoded));
1039  978 if (sortedCodonCounts.length > 1 && sortedCodonCounts[codons.length
1040    - 2] == sortedCodonCounts[codons.length - 1])
1041    {
1042    /*
1043    * two or more codons share the modal count
1044    */
1045  25 modalCodon = "+";
1046    }
1047  978 float pid = sortedCodonCounts[sortedCodonCounts.length - 1] * 100
1048    / (float) totalCount;
1049   
1050    /*
1051    * todo ? Replace consensus hashtable with sorted arrays of codons and
1052    * counts (non-zero only). Include total count in count array [0].
1053    */
1054   
1055    /*
1056    * Scan sorted array backwards for most frequent values first. Show
1057    * repeated values compactly.
1058    */
1059  978 StringBuilder mouseOver = new StringBuilder(32);
1060  978 StringBuilder samePercent = new StringBuilder();
1061  978 String percent = null;
1062  978 String lastPercent = null;
1063  978 int percentDecPl = getPercentageDp(nseqs);
1064   
1065  1931 for (int j = codons.length - 1; j >= 0; j--)
1066    {
1067  1931 int codonCount = sortedCodonCounts[j];
1068  1931 if (codonCount == 0)
1069    {
1070    /*
1071    * remaining codons are 0% - ignore, but finish off the last one if
1072    * necessary
1073    */
1074  978 if (samePercent.length() > 0)
1075    {
1076  953 mouseOver.append(samePercent).append(": ").append(percent)
1077    .append("% ");
1078    }
1079  978 break;
1080    }
1081  953 int codonEncoded = codons[j];
1082  953 final int pct = codonCount * 100 / totalCount;
1083  953 String codon = String
1084    .valueOf(CodingUtils.decodeCodon(codonEncoded));
1085  953 StringBuilder sb = new StringBuilder();
1086  953 Format.appendPercentage(sb, pct, percentDecPl);
1087  953 percent = sb.toString();
1088  953 if (showProfileLogo || codonCount == modalCodonCount)
1089    {
1090  953 if (percent.equals(lastPercent) && j > 0)
1091    {
1092  0 samePercent.append(samePercent.length() == 0 ? "" : ", ");
1093  0 samePercent.append(codon);
1094    }
1095    else
1096    {
1097  953 if (samePercent.length() > 0)
1098    {
1099  0 mouseOver.append(samePercent).append(": ").append(lastPercent)
1100    .append("% ");
1101    }
1102  953 samePercent.setLength(0);
1103  953 samePercent.append(codon);
1104    }
1105  953 lastPercent = percent;
1106    }
1107    }
1108   
1109  978 consensusAnnotation.annotations[col] = new Annotation(modalCodon,
1110    mouseOver.toString(), ' ', pid);
1111    }
1112    }
1113   
1114    /**
1115    * Returns the number of decimal places to show for profile percentages. For
1116    * less than 100 sequences, returns zero (the integer percentage value will be
1117    * displayed). For 100-999 sequences, returns 1, for 1000-9999 returns 2, etc.
1118    *
1119    * @param nseq
1120    * @return
1121    */
 
1122  802511 toggle protected static int getPercentageDp(long nseq)
1123    {
1124  802512 int scale = 0;
1125  802510 while (nseq >= 100)
1126    {
1127  0 scale++;
1128  0 nseq /= 10;
1129    }
1130  802510 return scale;
1131    }
1132   
1133    /**
1134    * Returns the sorted HMM profile for the given column of the alignment. The
1135    * returned array contains
1136    *
1137    * <pre>
1138    * [profileType=0, numberOfValues, 100, charValue1, percentage1, charValue2, percentage2, ...]
1139    * in descending order of percentage value
1140    * </pre>
1141    *
1142    * @param hmm
1143    * @param column
1144    * @param removeBelowBackground
1145    * if true, ignores residues with probability less than their
1146    * background frequency
1147    * @param infoHeight
1148    * if true, uses the log ratio 'information' measure to scale the
1149    * value
1150    * @return
1151    */
 
1152  3 toggle public static int[] extractHMMProfile(HiddenMarkovModel hmm, int column,
1153    boolean removeBelowBackground, boolean infoHeight)
1154    {
1155  3 if (hmm == null)
1156    {
1157  1 return null;
1158    }
1159  2 String alphabet = hmm.getSymbols();
1160  2 int size = alphabet.length();
1161  2 char symbols[] = new char[size];
1162  2 int values[] = new int[size];
1163  2 int totalCount = 0;
1164   
1165  10 for (int i = 0; i < size; i++)
1166    {
1167  8 char symbol = alphabet.charAt(i);
1168  8 symbols[i] = symbol;
1169  8 int value = getAnalogueCount(hmm, column, symbol,
1170    removeBelowBackground, infoHeight);
1171  8 values[i] = value;
1172  8 totalCount += value;
1173    }
1174   
1175    /*
1176    * sort symbols by increasing emission probability
1177    */
1178  2 QuickSort.sort(values, symbols);
1179   
1180  2 int[] profile = new int[3 + size * 2];
1181   
1182  2 profile[0] = AlignmentAnnotation.SEQUENCE_PROFILE;
1183  2 profile[1] = size;
1184  2 profile[2] = 100;
1185   
1186    /*
1187    * order symbol/count profile by decreasing emission probability
1188    */
1189  2 if (totalCount != 0)
1190    {
1191  2 int arrayPos = 3;
1192  10 for (int k = size - 1; k >= 0; k--)
1193    {
1194  8 Float percentage;
1195  8 int value = values[k];
1196  8 if (removeBelowBackground)
1197    {
1198  4 percentage = ((float) value) / totalCount * 100f;
1199    }
1200    else
1201    {
1202  4 percentage = value / 100f;
1203    }
1204  8 int intPercent = Math.round(percentage);
1205  8 profile[arrayPos] = symbols[k];
1206  8 profile[arrayPos + 1] = intPercent;
1207  8 arrayPos += 2;
1208    }
1209    }
1210  2 return profile;
1211    }
1212   
1213    /**
1214    * Converts the emission probability of a residue at a column in the alignment
1215    * to a 'count', suitable for rendering as an annotation value
1216    *
1217    * @param hmm
1218    * @param column
1219    * @param symbol
1220    * @param removeBelowBackground
1221    * if true, returns 0 for any symbol with a match emission
1222    * probability less than the background frequency
1223    * @infoHeight if true, uses the log ratio 'information content' to scale the
1224    * value
1225    * @return
1226    */
 
1227  9392 toggle static int getAnalogueCount(HiddenMarkovModel hmm, int column,
1228    char symbol, boolean removeBelowBackground, boolean infoHeight)
1229    {
1230  9392 double value = hmm.getMatchEmissionProbability(column, symbol);
1231  9392 double freq = ResidueProperties.backgroundFrequencies
1232    .get(hmm.getAlphabetType()).get(symbol);
1233  9392 if (value < freq && removeBelowBackground)
1234    {
1235  4 return 0;
1236    }
1237   
1238  9388 if (infoHeight)
1239    {
1240  1 value = value * (Math.log(value / freq) / LOG2);
1241    }
1242   
1243  9388 value = value * 10000d;
1244  9388 return Math.round((float) value);
1245    }
1246    }