Clover icon

Coverage Report

  1. Project Clover database Thu Dec 4 2025 16:11:35 GMT
  2. Package jalview.analysis

File AAFrequency.java

 

Coverage histogram

../../img/srcFileCovDistChart9.png
13% of files have more coverage

Code metrics

188
403
22
1
1,242
777
139
0.34
18.32
22
6.32

Classes

Class Line # Actions
AAFrequency 60 403 139
0.8792822487.9%
 

Contributing tests

This file is covered by 219 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.analysis;
22   
23    import jalview.datamodel.AlignedCodonFrame;
24    import jalview.datamodel.AlignmentAnnotation;
25    import jalview.datamodel.AlignmentI;
26    import jalview.datamodel.AnnotatedCollectionI;
27    import jalview.datamodel.Annotation;
28    import jalview.datamodel.HiddenMarkovModel;
29    import jalview.datamodel.Profile;
30    import jalview.datamodel.ProfileI;
31    import jalview.datamodel.Profiles;
32    import jalview.datamodel.ProfilesI;
33    import jalview.datamodel.ResidueCount;
34    import jalview.datamodel.ResidueCount.SymbolCounts;
35    import jalview.datamodel.SecondaryStructureCount;
36    import jalview.datamodel.SequenceGroup;
37    import jalview.datamodel.SequenceI;
38    import jalview.ext.android.SparseIntArray;
39    import jalview.schemes.ResidueProperties;
40    import jalview.util.Comparison;
41    import jalview.util.Constants;
42    import jalview.util.Format;
43    import jalview.util.MappingUtils;
44    import jalview.util.QuickSort;
45   
46    import java.awt.Color;
47    import java.util.ArrayList;
48    import java.util.Arrays;
49    import java.util.Hashtable;
50    import java.util.List;
51    import java.util.Map;
52   
53    /**
54    * Takes in a vector or array of sequences and column start and column end and
55    * returns a new Hashtable[] of size maxSeqLength, if Hashtable not supplied.
56    * This class is used extensively in calculating alignment colourschemes that
57    * depend on the amount of conservation in each alignment column.
58    *
59    */
 
60    public class AAFrequency
61    {
62    private static final double LOG2 = Math.log(2);
63   
64    public static final String PROFILE = "P";
65   
 
66  3 toggle public static final ProfilesI calculate(List<SequenceI> list, int start,
67    int end)
68    {
69  3 return calculate(list, start, end, false);
70    }
71   
 
72  512 toggle public static final ProfilesI calculate(List<SequenceI> sequences,
73    int start, int end, boolean profile)
74    {
75  512 SequenceI[] seqs = new SequenceI[sequences.size()];
76  512 int width = 0;
77  512 synchronized (sequences)
78    {
79  4472 for (int i = 0; i < sequences.size(); i++)
80    {
81  3960 seqs[i] = sequences.get(i);
82  3960 int length = seqs[i].getLength();
83  3960 if (length > width)
84    {
85  511 width = length;
86    }
87    }
88   
89  512 if (end >= width)
90    {
91  289 end = width;
92    }
93   
94  512 ProfilesI reply = calculate(seqs, width, start, end, profile);
95  512 return reply;
96    }
97    }
98   
99    /**
100    * Calculate the consensus symbol(s) for each column in the given range.
101    *
102    * @param sequences
103    * @param width
104    * the full width of the alignment
105    * @param start
106    * start column (inclusive, base zero)
107    * @param end
108    * end column (exclusive)
109    * @param saveFullProfile
110    * if true, store all symbol counts
111    */
 
112  1462 toggle public static final ProfilesI calculate(final SequenceI[] sequences,
113    int width, int start, int end, boolean saveFullProfile)
114    {
115    // long now = System.currentTimeMillis();
116  1462 int seqCount = sequences.length;
117  1462 boolean nucleotide = false;
118  1462 int nucleotideCount = 0;
119  1462 int peptideCount = 0;
120   
121  1462 ProfileI[] result = new ProfileI[width];
122   
123  664580 for (int column = start; column < end; column++)
124    {
125    /*
126    * Apply a heuristic to detect nucleotide data (which can
127    * be counted in more compact arrays); here we test for
128    * more than 90% nucleotide; recheck every 10 columns in case
129    * of misleading data e.g. highly conserved Alanine in peptide!
130    * Mistakenly guessing nucleotide has a small performance cost,
131    * as it will result in counting in sparse arrays.
132    * Mistakenly guessing peptide has a small space cost,
133    * as it will use a larger than necessary array to hold counts.
134    */
135  663125 if (nucleotideCount > 100 && column % 10 == 0)
136    {
137  59210 nucleotide = (9 * peptideCount < nucleotideCount);
138    }
139  663120 ResidueCount residueCounts = new ResidueCount(nucleotide);
140   
141  11275017 for (int row = 0; row < seqCount; row++)
142    {
143  10612313 if (sequences[row] == null)
144    {
145  0 jalview.bin.Console.errPrintln(
146    "WARNING: Consensus skipping null sequence - possible race condition.");
147  0 continue;
148    }
149  10611711 if (sequences[row].getLength() > column)
150    {
151  10581626 char c = sequences[row].getCharAt(column);
152  10581089 residueCounts.add(c);
153  10583545 if (Comparison.isNucleotide(c))
154    {
155  993166 nucleotideCount++;
156    }
157  9592780 else if (!Comparison.isGap(c))
158    {
159  813499 peptideCount++;
160    }
161    }
162    else
163    {
164    /*
165    * count a gap if the sequence doesn't reach this column
166    */
167  30225 residueCounts.addGap();
168    }
169    }
170   
171  663083 int maxCount = residueCounts.getModalCount();
172  663080 String maxResidue = residueCounts.getResiduesForCount(maxCount);
173  663131 int gapCount = residueCounts.getGapCount();
174  663120 ProfileI profile = new Profile(seqCount, gapCount, maxCount,
175    maxResidue);
176   
177  663120 if (saveFullProfile)
178    {
179  636520 profile.setCounts(residueCounts);
180    }
181   
182  663114 result[column] = profile;
183    }
184  1462 return new Profiles(seqCount, result);
185    // long elapsed = System.currentTimeMillis() - now;
186    // jalview.bin.Console.outPrintln(elapsed);
187    }
188   
 
189  0 toggle public static final ProfilesI calculateSS(List<SequenceI> list, int start,
190    int end, String source)
191    {
192  0 return calculateSS(list, start, end, false, source);
193    }
 
194  0 toggle public static final ProfilesI calculateSS(List<SequenceI> sequences,
195    int start, int end, boolean profile, String source)
196    {
197  0 return calculateSS(sequences, start, end, profile, source,null);
198    }
 
199  549 toggle public static final ProfilesI calculateSS(List<SequenceI> sequences,
200    int start, int end, boolean profile, String source,
201    SequenceGroup sequenceGroup)
202    {
203  549 SequenceI[] seqs = new SequenceI[sequences.size()];
204  549 int width = 0;
205  549 synchronized (sequences)
206    {
207  4695 for (int i = 0; i < sequences.size(); i++)
208    {
209  4146 seqs[i] = sequences.get(i);
210  4146 int length = seqs[i].getLength();
211  4146 if (length > width)
212    {
213  548 width = length;
214    }
215    }
216   
217  549 if (end >= width)
218    {
219  329 end = width;
220    }
221   
222  549 ProfilesI reply = calculateSS(seqs, width, start, end, profile,
223    source,sequenceGroup);
224  549 return reply;
225    }
226    }
227   
228   
229    /**
230    * TODO - REFACTOR TO ANNOTATEDCOLLECTION!
231    * @param sequences
232    * @param width
233    * @param start
234    * @param end
235    * @param saveFullProfile
236    * @param source - if null, will be treated as 'All'
237    * @param sequenceGroup - if null all secondary structure annotations matching source on sequence will be considered
238    * @return
239    */
 
240  1845 toggle public static final ProfilesI calculateSS(final SequenceI[] sequences,
241    int width, int start, int end, boolean saveFullProfile,
242    String source, SequenceGroup sequenceGroup)
243    {
244   
245   
246  1845 int seqCount = sequences.length;
247   
248  1845 ProfileI[] result = new ProfileI[width];
249  1845 int maxSSannotcount=0,maxSeqWithSScount=0;
250  1845 if (source==null || "".equals(source)) {
251  2 source = Constants.SS_ALL_PROVIDERS;
252    }
253  1845 Map<SequenceI, ArrayList<AlignmentAnnotation>> sq_group_by_source = null;
254  1845 if (sequenceGroup!=null && sequenceGroup.getAnnotationsFromTree().size()>0 && source!=null)
255    {
256  40 sq_group_by_source = AlignmentUtils.getSequenceAssociatedAlignmentAnnotations(sequenceGroup.getAnnotationsFromTree().toArray(new AlignmentAnnotation[0]), source);
257    }
258  443478 for (int column = start; column < end; column++)
259    {
260   
261  441631 int seqWithSSCount = 0;
262  441632 int ssCount = 0;
263   
264  441631 SecondaryStructureCount ssCounts = new SecondaryStructureCount();
265   
266  6829541 for (int row = 0; row < seqCount; row++)
267    {
268  6387804 if (sequences[row] == null)
269    {
270  0 jalview.bin.Console.errPrintln(
271    "WARNING: Consensus skipping null sequence - possible race condition.");
272  0 continue;
273    }
274   
275  6387515 char c = sequences[row].getCharAt(column);
276   
277  6386712 List<AlignmentAnnotation> annots;
278   
279  6387548 if (sq_group_by_source==null) {
280  6371609 annots = AlignmentUtils.getAlignmentAnnotationForSource(sequences[row], source);
281    } else {
282  16018 annots = sq_group_by_source.get(sequences[row]);
283  16018 if (annots==null)
284    {
285  16018 annots = sq_group_by_source.get(sequences[row].getDatasetSequence());
286    }
287    }
288   
289  6388282 if(annots!=null) {
290  111669 if (annots.size()>0) {
291  111659 seqWithSSCount++;
292    }
293  111649 for (AlignmentAnnotation aa : annots)
294    {
295  144850 if (aa != null)
296    {
297  144825 ssCount++;
298    }
299   
300  144838 if (sequences[row].getLength() > column && !Comparison.isGap(c)
301    && aa != null)
302    {
303   
304  111332 int seqPosition = sequences[row].findPosition(column);
305   
306  111519 char ss = AlignmentUtils
307    .findSSAnnotationForGivenSeqposition(aa, seqPosition);
308  111465 if (ss == '*')
309    {
310  0 continue;
311    }
312  111455 ssCounts.add(ss);
313    }
314  33492 else if (Comparison.isGap(c) && aa != null)
315    {
316  33492 ssCounts.addGap();
317    }
318    }
319    }
320    }
321   
322  441637 int maxSSCount = ssCounts.getModalCount();
323  441632 String maxSS = ssCounts.getSSForCount(maxSSCount);
324  441625 int gapCount = ssCounts.getGapCount();
325  441626 ProfileI profile = new Profile(maxSS, ssCount, gapCount, maxSSCount,
326    seqWithSSCount);
327  441630 maxSeqWithSScount=Math.max(maxSeqWithSScount, seqWithSSCount);
328  441645 if (saveFullProfile)
329    {
330  415085 profile.setSSCounts(ssCounts);
331    }
332   
333  441633 result[column] = profile;
334  441633 maxSSannotcount=Math.max(maxSSannotcount, ssCount);
335    }
336  1845 return new Profiles(maxSSannotcount,result);
337    }
338   
339    /**
340    * Returns the full set of profiles for a hidden Markov model. The underlying
341    * data is the raw probabilities of a residue being emitted at each node,
342    * however the profiles returned by this function contain the percentage
343    * chance of a residue emission.
344    *
345    * @param hmm
346    * @param width
347    * The width of the Profile array (Profiles) to be returned.
348    * @param start
349    * The alignment column on which the first profile is based.
350    * @param end
351    * The alignment column on which the last profile is based.
352    * @param removeBelowBackground
353    * if true, symbols with a match emission probability less than
354    * background frequency are ignored
355    * @return
356    */
 
357  5 toggle public static ProfilesI calculateHMMProfiles(final HiddenMarkovModel hmm,
358    int width, int start, int end, boolean removeBelowBackground,
359    boolean infoLetterHeight)
360    {
361  5 ProfileI[] result = new ProfileI[width];
362  5 char[] symbols = hmm.getSymbols().toCharArray();
363  5 int symbolCount = symbols.length;
364  786 for (int column = start; column < end; column++)
365    {
366  781 ResidueCount counts = new ResidueCount();
367  781 for (char symbol : symbols)
368    {
369  15620 int value = getAnalogueCount(hmm, column, symbol,
370    removeBelowBackground, infoLetterHeight);
371  15620 counts.put(symbol, value);
372    }
373  781 int maxCount = counts.getModalCount();
374  781 String maxResidue = counts.getResiduesForCount(maxCount);
375  781 int gapCount = counts.getGapCount();
376  781 ProfileI profile = new Profile(symbolCount, gapCount, maxCount,
377    maxResidue);
378  781 profile.setCounts(counts);
379   
380  781 result[column] = profile;
381    }
382  5 return new Profiles(1,result); // TODO: JAL-4107 - HMM Profile seqcount tooltip needs to be different to a typical consensus tooltip ? where is seqcount ? - could report effective diversity
383    }
384   
385    /**
386    * Make an estimate of the profile size we are going to compute i.e. how many
387    * different characters may be present in it. Overestimating has a cost of
388    * using more memory than necessary. Underestimating has a cost of needing to
389    * extend the SparseIntArray holding the profile counts.
390    *
391    * @param profileSizes
392    * counts of sizes of profiles so far encountered
393    * @return
394    */
 
395  0 toggle static int estimateProfileSize(SparseIntArray profileSizes)
396    {
397  0 if (profileSizes.size() == 0)
398    {
399  0 return 4;
400    }
401   
402    /*
403    * could do a statistical heuristic here e.g. 75%ile
404    * for now just return the largest value
405    */
406  0 return profileSizes.keyAt(profileSizes.size() - 1);
407    }
408   
409    /**
410    * Derive the consensus annotations to be added to the alignment for display.
411    * This does not recompute the raw data, but may be called on a change in
412    * display options, such as 'ignore gaps', which may in turn result in a
413    * change in the derived values.
414    *
415    * @param consensus
416    * the annotation row to add annotations to
417    * @param profiles
418    * the source consensus data
419    * @param startCol
420    * start column (inclusive)
421    * @param endCol
422    * end column (exclusive)
423    * @param ignoreGaps
424    * if true, normalise residue percentages ignoring gaps
425    * @param showSequenceLogo
426    * if true include all consensus symbols, else just show modal
427    * residue
428    * @param nseq
429    * number of sequences
430    */
 
431  1091 toggle public static void completeConsensus(AlignmentAnnotation consensus,
432    ProfilesI profiles, int startCol, int endCol, boolean ignoreGaps,
433    boolean showSequenceLogo, long nseq)
434    {
435    // long now = System.currentTimeMillis();
436  1091 if (consensus == null || consensus.annotations == null
437    || consensus.annotations.length < endCol)
438    {
439    /*
440    * called with a bad alignment annotation row
441    * wait for it to be initialised properly
442    */
443  0 return;
444    }
445   
446  426662 for (int i = startCol; i < endCol; i++)
447    {
448  425571 ProfileI profile = profiles.get(i);
449  425568 if (profile == null)
450    {
451    /*
452    * happens if sequences calculated over were
453    * shorter than alignment width
454    */
455  0 consensus.annotations[i] = null;
456  0 return;
457    }
458   
459  425566 final int dp = getPercentageDp(nseq);
460   
461  425568 float value = profile.getPercentageIdentity(ignoreGaps);
462   
463  425611 String description = getTooltip(profile, value, showSequenceLogo,
464    ignoreGaps, dp);
465   
466  425612 String modalResidue = profile.getModalResidue();
467  425603 if ("".equals(modalResidue))
468    {
469  4091 modalResidue = "-";
470    }
471  421506 else if (modalResidue.length() > 1)
472    {
473  6996 modalResidue = "+";
474    }
475  425572 consensus.annotations[i] = new Annotation(modalResidue, description,
476    ' ', value);
477    }
478    // long elapsed = System.currentTimeMillis() - now;
479    // jalview.bin.Console.outPrintln(-elapsed);
480    }
481   
 
482  1292 toggle public static void completeSSConsensus(AlignmentAnnotation ssConsensus,
483    ProfilesI profiles, int startCol, int endCol, boolean ignoreGaps,
484    boolean showSequenceLogo, long nseq)
485    {
486    // long now = System.currentTimeMillis();
487  1292 if (ssConsensus == null || ssConsensus.annotations == null
488    || ssConsensus.annotations.length < endCol)
489    {
490    /*
491    * called with a bad alignment annotation row
492    * wait for it to be initialised properly
493    */
494  0 return;
495    }
496   
497  382229 for (int i = startCol; i < endCol; i++)
498    {
499  380937 ProfileI profile = profiles.get(i);
500  380935 if (profile == null)
501    {
502    /*
503    * happens if sequences calculated over were
504    * shorter than alignment width
505    */
506  0 ssConsensus.annotations[i] = null;
507  0 return;
508    }
509   
510  380937 if (ssConsensus.getNoOfSequencesIncluded() < 0)
511    {
512  513 ssConsensus.setNoOfSequencesIncluded(profile.getSeqWithSSCount());
513  513 ssConsensus.setNoOfTracksIncluded(profiles.getCount());
514    }
515   
516  380937 final int dp = getPercentageDp(nseq);
517   
518  380934 float value = profile.getSSPercentageIdentity(ignoreGaps);
519   
520  380933 String description = getSSTooltip(profile, value, showSequenceLogo,
521    ignoreGaps, dp);
522   
523  380937 String modalSS = profile.getModalSS();
524  380933 if ("".equals(modalSS))
525    {
526  356914 modalSS = "-";
527    }
528  24020 else if (modalSS.length() > 1)
529    {
530  1168 modalSS = "+";
531    }
532  380933 ssConsensus.annotations[i] = new Annotation(modalSS, description,
533    ' ', value);
534    }
535   
536    //Hide consensus with no data to display
537  1292 if(ssConsensus.getNoOfSequencesIncluded()<1)
538  1172 ssConsensus.visible = false;
539   
540    // long elapsed = System.currentTimeMillis() - now;
541    // jalview.bin.Console.outPrintln(-elapsed);
542    }
543   
544    /**
545    * Derive the information annotations to be added to the alignment for
546    * display. This does not recompute the raw data, but may be called on a
547    * change in display options, such as 'ignore below background frequency',
548    * which may in turn result in a change in the derived values.
549    *
550    * @param information
551    * the annotation row to add annotations to
552    * @param profiles
553    * the source information data
554    * @param startCol
555    * start column (inclusive)
556    * @param endCol
557    * end column (exclusive)
558    * @param ignoreGaps
559    * if true, normalise residue percentages
560    * @param showSequenceLogo
561    * if true include all information symbols, else just show modal
562    * residue
563    */
 
564  6 toggle public static float completeInformation(AlignmentAnnotation information,
565    ProfilesI profiles, int startCol, int endCol)
566    {
567    // long now = System.currentTimeMillis();
568  6 if (information == null || information.annotations == null)
569    {
570    /*
571    * called with a bad alignment annotation row
572    * wait for it to be initialised properly
573    */
574  0 return 0;
575    }
576   
577  6 float max = 0f;
578  6 SequenceI hmmSeq = information.sequenceRef;
579   
580  6 int seqLength = hmmSeq.getLength();
581  6 if (information.annotations.length < seqLength)
582    {
583  0 return 0;
584    }
585   
586  6 HiddenMarkovModel hmm = hmmSeq.getHMM();
587   
588  787 for (int column = startCol; column < endCol; column++)
589    {
590  782 if (column >= seqLength)
591    {
592    // hmm consensus sequence is shorter than the alignment
593  1 break;
594    }
595   
596  781 float value = hmm.getInformationContent(column);
597  781 boolean isNaN = Float.isNaN(value);
598  781 if (!isNaN)
599    {
600  741 max = Math.max(max, value);
601    }
602   
603  781 String description = isNaN ? null
604    : String.format("%.4f bits", value);
605  781 information.annotations[column] = new Annotation(
606    Character.toString(
607    Character.toUpperCase(hmmSeq.getCharAt(column))),
608    description, ' ', value);
609    }
610   
611  6 information.graphMax = max;
612  6 return max;
613    }
614   
615    /**
616    * Derive the occupancy count annotation
617    *
618    * @param occupancy
619    * the annotation row to add annotations to
620    * @param profiles
621    * the source consensus data
622    * @param startCol
623    * start column (inclusive)
624    * @param endCol
625    * end column (exclusive)
626    */
 
627  938 toggle public static void completeGapAnnot(AlignmentAnnotation occupancy,
628    ProfilesI profiles, int startCol, int endCol, long nseq)
629    {
630  938 if (occupancy == null || occupancy.annotations == null
631    || occupancy.annotations.length < endCol)
632    {
633    /*
634    * called with a bad alignment annotation row
635    * wait for it to be initialised properly
636    */
637  0 return;
638    }
639    // always set ranges again
640  938 occupancy.graphMax = nseq;
641  938 occupancy.graphMin = 0;
642  938 double scale = 0.8 / nseq;
643  402833 for (int i = startCol; i < endCol; i++)
644    {
645  401899 ProfileI profile = profiles.get(i);
646  401891 if (profile == null)
647    {
648    /*
649    * happens if sequences calculated over were
650    * shorter than alignment width
651    */
652  0 occupancy.annotations[i] = null;
653  0 return;
654    }
655   
656  401893 final int gapped = profile.getNonGapped();
657   
658  401886 String description = "" + gapped;
659   
660  401890 occupancy.annotations[i] = new Annotation("", description, '\0',
661    gapped,
662    jalview.util.ColorUtils.bleachColour(Color.DARK_GRAY,
663    (float) scale * gapped));
664    }
665    }
666   
667    /**
668    * Returns a tooltip showing either
669    * <ul>
670    * <li>the full profile (percentages of all residues present), if
671    * showSequenceLogo is true, or</li>
672    * <li>just the modal (most common) residue(s), if showSequenceLogo is
673    * false</li>
674    * </ul>
675    * Percentages are as a fraction of all sequence, or only ungapped sequences
676    * if ignoreGaps is true.
677    *
678    * @param profile
679    * @param pid
680    * @param showSequenceLogo
681    * @param ignoreGaps
682    * @param dp
683    * the number of decimal places to format percentages to
684    * @return
685    */
 
686  425609 toggle static String getTooltip(ProfileI profile, float pid,
687    boolean showSequenceLogo, boolean ignoreGaps, int dp)
688    {
689  425609 ResidueCount counts = profile.getCounts();
690   
691  425608 String description = null;
692  425625 if (counts != null && showSequenceLogo)
693    {
694  53835 int normaliseBy = ignoreGaps ? profile.getNonGapped()
695    : profile.getHeight();
696  53825 description = counts.getTooltip(normaliseBy, dp);
697    }
698    else
699    {
700  371803 StringBuilder sb = new StringBuilder(64);
701  371803 String maxRes = profile.getModalResidue();
702  371803 if (maxRes.length() > 1)
703    {
704  2919 sb.append("[").append(maxRes).append("]");
705    }
706    else
707    {
708  368884 sb.append(maxRes);
709    }
710  371803 if (maxRes.length() > 0)
711    {
712  368437 sb.append(" ");
713  368437 Format.appendPercentage(sb, pid, dp);
714  368437 sb.append("%");
715    }
716  371803 description = sb.toString();
717    }
718  425619 return description;
719    }
720   
 
721  380932 toggle static String getSSTooltip(ProfileI profile, float pid,
722    boolean showSequenceLogo, boolean ignoreGaps, int dp)
723    {
724  380932 SecondaryStructureCount counts = profile.getSSCounts();
725   
726  380929 String description = null;
727  380946 if (counts != null && showSequenceLogo)
728    {
729  42803 int normaliseBy = ignoreGaps ? profile.getNonGapped()
730    : profile.getHeight();
731  42803 description = counts.getTooltip(normaliseBy, dp);
732    }
733    else
734    {
735  338143 StringBuilder sb = new StringBuilder(64);
736  338143 String maxSS = profile.getModalSS();
737  338143 if (maxSS.length() > 1)
738    {
739  1132 sb.append("[").append(maxSS).append("]");
740    }
741    else
742    {
743  337011 sb.append(maxSS);
744    }
745  338143 if (maxSS.length() > 0)
746    {
747  16316 sb.append(" ");
748  16316 Format.appendPercentage(sb, pid, dp);
749  16316 sb.append("%");
750    }
751  338143 description = sb.toString();
752    }
753  380937 return description;
754    }
755   
756    /**
757    * Returns the sorted profile for the given consensus data. The returned array
758    * contains
759    *
760    * <pre>
761    * [profileType, numberOfValues, totalPercent, charValue1, percentage1, charValue2, percentage2, ...]
762    * in descending order of percentage value
763    * </pre>
764    *
765    * @param profile
766    * the data object from which to extract and sort values
767    * @param ignoreGaps
768    * if true, only non-gapped values are included in percentage
769    * calculations
770    * @return
771    */
 
772  28226 toggle public static int[] extractProfile(ProfileI profile, boolean ignoreGaps)
773    {
774  28226 char[] symbols;
775  28226 int[] values;
776   
777  28226 if (profile.getCounts() != null)
778    {
779  28226 ResidueCount counts = profile.getCounts();
780  28226 SymbolCounts symbolCounts = counts.getSymbolCounts();
781  28226 symbols = symbolCounts.symbols;
782  28226 values = symbolCounts.values;
783   
784    }
785  0 else if (profile.getSSCounts() != null)
786    {
787  0 SecondaryStructureCount counts = profile.getSSCounts();
788    // to do
789  0 SecondaryStructureCount.SymbolCounts symbolCounts = counts
790    .getSymbolCounts();
791  0 symbols = symbolCounts.symbols;
792  0 values = symbolCounts.values;
793    }
794    else
795    {
796  0 return null;
797    }
798   
799  28226 QuickSort.sort(values, symbols);
800  28226 int totalPercentage = 0;
801  28226 final int divisor = ignoreGaps ? profile.getNonGapped()
802    : profile.getHeight();
803   
804    /*
805    * traverse the arrays in reverse order (highest counts first)
806    */
807  28226 int[] result = new int[3 + 2 * symbols.length];
808  28226 int nextArrayPos = 3;
809  28226 int nonZeroCount = 0;
810   
811  101143 for (int i = symbols.length - 1; i >= 0; i--)
812    {
813  72919 int theChar = symbols[i];
814  72919 int charCount = values[i];
815  72919 final int percentage = (charCount * 100) / divisor;
816  72919 if (percentage == 0)
817    {
818    /*
819    * this count (and any remaining) round down to 0% - discard
820    */
821  2 break;
822    }
823  72917 nonZeroCount++;
824  72917 result[nextArrayPos++] = theChar;
825  72917 result[nextArrayPos++] = percentage;
826  72917 totalPercentage += percentage;
827    }
828   
829    /*
830    * truncate array if any zero values were discarded
831    */
832  28226 if (nonZeroCount < symbols.length)
833    {
834  2 int[] tmp = new int[3 + 2 * nonZeroCount];
835  2 System.arraycopy(result, 0, tmp, 0, tmp.length);
836  2 result = tmp;
837    }
838   
839    /*
840    * fill in 'header' values
841    */
842  28226 result[0] = AlignmentAnnotation.SEQUENCE_PROFILE;
843  28226 result[1] = nonZeroCount;
844  28226 result[2] = totalPercentage;
845   
846  28226 return result;
847    }
848   
849   
850    /**
851    * Extract a sorted extract of cDNA codon profile data. The returned array
852    * contains
853    *
854    * <pre>
855    * [profileType, numberOfValues, totalPercentage, charValue1, percentage1, charValue2, percentage2, ...]
856    * in descending order of percentage value, where the character values encode codon triplets
857    * </pre>
858    *
859    * @param hashtable
860    * @return
861    */
 
862  2 toggle public static int[] extractCdnaProfile(
863    Hashtable<String, Object> hashtable, boolean ignoreGaps)
864    {
865    // this holds #seqs, #ungapped, and then codon count, indexed by encoded
866    // codon triplet
867  2 int[] codonCounts = (int[]) hashtable.get(PROFILE);
868  2 int[] sortedCounts = new int[codonCounts.length - 2];
869  2 System.arraycopy(codonCounts, 2, sortedCounts, 0,
870    codonCounts.length - 2);
871   
872  2 int[] result = new int[3 + 2 * sortedCounts.length];
873    // first value is just the type of profile data
874  2 result[0] = AlignmentAnnotation.CDNA_PROFILE;
875   
876  2 char[] codons = new char[sortedCounts.length];
877  130 for (int i = 0; i < codons.length; i++)
878    {
879  128 codons[i] = (char) i;
880    }
881  2 QuickSort.sort(sortedCounts, codons);
882  2 int totalPercentage = 0;
883  2 int distinctValuesCount = 0;
884  2 int j = 3;
885  2 int divisor = ignoreGaps ? codonCounts[1] : codonCounts[0];
886  8 for (int i = codons.length - 1; i >= 0; i--)
887    {
888  8 final int codonCount = sortedCounts[i];
889  8 if (codonCount == 0)
890    {
891  0 break; // nothing else of interest here
892    }
893  8 final int percentage = codonCount * 100 / divisor;
894  8 if (percentage == 0)
895    {
896    /*
897    * this (and any remaining) values rounded down to 0 - discard
898    */
899  2 break;
900    }
901  6 distinctValuesCount++;
902  6 result[j++] = codons[i];
903  6 result[j++] = percentage;
904  6 totalPercentage += percentage;
905    }
906  2 result[2] = totalPercentage;
907   
908    /*
909    * Just return the non-zero values
910    */
911    // todo next value is redundant if we limit the array to non-zero counts
912  2 result[1] = distinctValuesCount;
913  2 return Arrays.copyOfRange(result, 0, j);
914    }
915   
916    /**
917    * Compute a consensus for the cDNA coding for a protein alignment.
918    *
919    * @param alignment
920    * the protein alignment (which should hold mappings to cDNA
921    * sequences)
922    * @param hconsensus
923    * the consensus data stores to be populated (one per column)
924    */
 
925  5 toggle public static void calculateCdna(AlignmentI alignment,
926    Hashtable<String, Object>[] hconsensus)
927    {
928  5 final char gapCharacter = alignment.getGapCharacter();
929  5 List<AlignedCodonFrame> mappings = alignment.getCodonFrames();
930  5 if (mappings == null || mappings.isEmpty())
931    {
932  0 return;
933    }
934   
935  5 int cols = alignment.getWidth();
936  1955 for (int col = 0; col < cols; col++)
937    {
938    // todo would prefer a Java bean for consensus data
939  1950 Hashtable<String, Object> columnHash = new Hashtable<>();
940    // #seqs, #ungapped seqs, counts indexed by (codon encoded + 1)
941  1950 int[] codonCounts = new int[66];
942  1950 codonCounts[0] = alignment.getSequences().size();
943  1950 int ungappedCount = 0;
944  1950 for (SequenceI seq : alignment.getSequences())
945    {
946  20922 if (seq.getCharAt(col) == gapCharacter)
947    {
948  10174 continue;
949    }
950  10748 List<char[]> codons = MappingUtils.findCodonsFor(seq, col,
951    mappings);
952  10748 for (char[] codon : codons)
953    {
954  10658 int codonEncoded = CodingUtils.encodeCodon(codon);
955  10658 if (codonEncoded >= 0)
956    {
957  10658 codonCounts[codonEncoded + 2]++;
958  10658 ungappedCount++;
959  10658 break;
960    }
961    }
962    }
963  1950 codonCounts[1] = ungappedCount;
964    // todo: sort values here, save counts and codons?
965  1950 columnHash.put(PROFILE, codonCounts);
966  1950 hconsensus[col] = columnHash;
967    }
968    }
969   
970    /**
971    * Derive displayable cDNA consensus annotation from computed consensus data.
972    *
973    * @param consensusAnnotation
974    * the annotation row to be populated for display
975    * @param consensusData
976    * the computed consensus data
977    * @param showProfileLogo
978    * if true show all symbols present at each position, else only the
979    * modal value
980    * @param nseqs
981    * the number of sequences in the alignment
982    */
 
983  5 toggle public static void completeCdnaConsensus(
984    AlignmentAnnotation consensusAnnotation,
985    Hashtable<String, Object>[] consensusData,
986    boolean showProfileLogo, int nseqs)
987    {
988  5 if (consensusAnnotation == null
989    || consensusAnnotation.annotations == null
990    || consensusAnnotation.annotations.length < consensusData.length)
991    {
992    // called with a bad alignment annotation row - wait for it to be
993    // initialised properly
994  0 return;
995    }
996   
997    // ensure codon triplet scales with font size
998  5 consensusAnnotation.scaleColLabel = true;
999  1955 for (int col = 0; col < consensusData.length; col++)
1000    {
1001  1950 Hashtable<String, Object> hci = consensusData[col];
1002  1950 if (hci == null)
1003    {
1004    // gapped protein column?
1005  0 continue;
1006    }
1007    // array holds #seqs, #ungapped, then codon counts indexed by codon
1008  1950 final int[] codonCounts = (int[]) hci.get(PROFILE);
1009  1950 int totalCount = 0;
1010   
1011    /*
1012    * First pass - get total count and find the highest
1013    */
1014  1950 final char[] codons = new char[codonCounts.length - 2];
1015  126750 for (int j = 2; j < codonCounts.length; j++)
1016    {
1017  124800 final int codonCount = codonCounts[j];
1018  124800 codons[j - 2] = (char) (j - 2);
1019  124800 totalCount += codonCount;
1020    }
1021   
1022    /*
1023    * Sort array of encoded codons by count ascending - so the modal value
1024    * goes to the end; start by copying the count (dropping the first value)
1025    */
1026  1950 int[] sortedCodonCounts = new int[codonCounts.length - 2];
1027  1950 System.arraycopy(codonCounts, 2, sortedCodonCounts, 0,
1028    codonCounts.length - 2);
1029  1950 QuickSort.sort(sortedCodonCounts, codons);
1030   
1031  1950 int modalCodonEncoded = codons[codons.length - 1];
1032  1950 int modalCodonCount = sortedCodonCounts[codons.length - 1];
1033  1950 String modalCodon = String
1034    .valueOf(CodingUtils.decodeCodon(modalCodonEncoded));
1035  1950 if (sortedCodonCounts.length > 1 && sortedCodonCounts[codons.length
1036    - 2] == sortedCodonCounts[codons.length - 1])
1037    {
1038    /*
1039    * two or more codons share the modal count
1040    */
1041  50 modalCodon = "+";
1042    }
1043  1950 float pid = sortedCodonCounts[sortedCodonCounts.length - 1] * 100
1044    / (float) totalCount;
1045   
1046    /*
1047    * todo ? Replace consensus hashtable with sorted arrays of codons and
1048    * counts (non-zero only). Include total count in count array [0].
1049    */
1050   
1051    /*
1052    * Scan sorted array backwards for most frequent values first. Show
1053    * repeated values compactly.
1054    */
1055  1950 StringBuilder mouseOver = new StringBuilder(32);
1056  1950 StringBuilder samePercent = new StringBuilder();
1057  1950 String percent = null;
1058  1950 String lastPercent = null;
1059  1950 int percentDecPl = getPercentageDp(nseqs);
1060   
1061  3850 for (int j = codons.length - 1; j >= 0; j--)
1062    {
1063  3850 int codonCount = sortedCodonCounts[j];
1064  3850 if (codonCount == 0)
1065    {
1066    /*
1067    * remaining codons are 0% - ignore, but finish off the last one if
1068    * necessary
1069    */
1070  1950 if (samePercent.length() > 0)
1071    {
1072  1900 mouseOver.append(samePercent).append(": ").append(percent)
1073    .append("% ");
1074    }
1075  1950 break;
1076    }
1077  1900 int codonEncoded = codons[j];
1078  1900 final int pct = codonCount * 100 / totalCount;
1079  1900 String codon = String
1080    .valueOf(CodingUtils.decodeCodon(codonEncoded));
1081  1900 StringBuilder sb = new StringBuilder();
1082  1900 Format.appendPercentage(sb, pct, percentDecPl);
1083  1900 percent = sb.toString();
1084  1900 if (showProfileLogo || codonCount == modalCodonCount)
1085    {
1086  1900 if (percent.equals(lastPercent) && j > 0)
1087    {
1088  0 samePercent.append(samePercent.length() == 0 ? "" : ", ");
1089  0 samePercent.append(codon);
1090    }
1091    else
1092    {
1093  1900 if (samePercent.length() > 0)
1094    {
1095  0 mouseOver.append(samePercent).append(": ").append(lastPercent)
1096    .append("% ");
1097    }
1098  1900 samePercent.setLength(0);
1099  1900 samePercent.append(codon);
1100    }
1101  1900 lastPercent = percent;
1102    }
1103    }
1104   
1105  1950 consensusAnnotation.annotations[col] = new Annotation(modalCodon,
1106    mouseOver.toString(), ' ', pid);
1107    }
1108    }
1109   
1110    /**
1111    * Returns the number of decimal places to show for profile percentages. For
1112    * less than 100 sequences, returns zero (the integer percentage value will be
1113    * displayed). For 100-999 sequences, returns 1, for 1000-9999 returns 2, etc.
1114    *
1115    * @param nseq
1116    * @return
1117    */
 
1118  808423 toggle protected static int getPercentageDp(long nseq)
1119    {
1120  808425 int scale = 0;
1121  808428 while (nseq >= 100)
1122    {
1123  0 scale++;
1124  0 nseq /= 10;
1125    }
1126  808428 return scale;
1127    }
1128   
1129    /**
1130    * Returns the sorted HMM profile for the given column of the alignment. The
1131    * returned array contains
1132    *
1133    * <pre>
1134    * [profileType=0, numberOfValues, 100, charValue1, percentage1, charValue2, percentage2, ...]
1135    * in descending order of percentage value
1136    * </pre>
1137    *
1138    * @param hmm
1139    * @param column
1140    * @param removeBelowBackground
1141    * if true, ignores residues with probability less than their
1142    * background frequency
1143    * @param infoHeight
1144    * if true, uses the log ratio 'information' measure to scale the
1145    * value
1146    * @return
1147    */
 
1148  3 toggle public static int[] extractHMMProfile(HiddenMarkovModel hmm, int column,
1149    boolean removeBelowBackground, boolean infoHeight)
1150    {
1151  3 if (hmm == null)
1152    {
1153  1 return null;
1154    }
1155  2 String alphabet = hmm.getSymbols();
1156  2 int size = alphabet.length();
1157  2 char symbols[] = new char[size];
1158  2 int values[] = new int[size];
1159  2 int totalCount = 0;
1160   
1161  10 for (int i = 0; i < size; i++)
1162    {
1163  8 char symbol = alphabet.charAt(i);
1164  8 symbols[i] = symbol;
1165  8 int value = getAnalogueCount(hmm, column, symbol,
1166    removeBelowBackground, infoHeight);
1167  8 values[i] = value;
1168  8 totalCount += value;
1169    }
1170   
1171    /*
1172    * sort symbols by increasing emission probability
1173    */
1174  2 QuickSort.sort(values, symbols);
1175   
1176  2 int[] profile = new int[3 + size * 2];
1177   
1178  2 profile[0] = AlignmentAnnotation.SEQUENCE_PROFILE;
1179  2 profile[1] = size;
1180  2 profile[2] = 100;
1181   
1182    /*
1183    * order symbol/count profile by decreasing emission probability
1184    */
1185  2 if (totalCount != 0)
1186    {
1187  2 int arrayPos = 3;
1188  10 for (int k = size - 1; k >= 0; k--)
1189    {
1190  8 Float percentage;
1191  8 int value = values[k];
1192  8 if (removeBelowBackground)
1193    {
1194  4 percentage = ((float) value) / totalCount * 100f;
1195    }
1196    else
1197    {
1198  4 percentage = value / 100f;
1199    }
1200  8 int intPercent = Math.round(percentage);
1201  8 profile[arrayPos] = symbols[k];
1202  8 profile[arrayPos + 1] = intPercent;
1203  8 arrayPos += 2;
1204    }
1205    }
1206  2 return profile;
1207    }
1208   
1209    /**
1210    * Converts the emission probability of a residue at a column in the alignment
1211    * to a 'count', suitable for rendering as an annotation value
1212    *
1213    * @param hmm
1214    * @param column
1215    * @param symbol
1216    * @param removeBelowBackground
1217    * if true, returns 0 for any symbol with a match emission
1218    * probability less than the background frequency
1219    * @infoHeight if true, uses the log ratio 'information content' to scale the
1220    * value
1221    * @return
1222    */
 
1223  15632 toggle static int getAnalogueCount(HiddenMarkovModel hmm, int column,
1224    char symbol, boolean removeBelowBackground, boolean infoHeight)
1225    {
1226  15632 double value = hmm.getMatchEmissionProbability(column, symbol);
1227  15632 double freq = ResidueProperties.backgroundFrequencies
1228    .get(hmm.getAlphabetType()).get(symbol);
1229  15632 if (value < freq && removeBelowBackground)
1230    {
1231  4 return 0;
1232    }
1233   
1234  15628 if (infoHeight)
1235    {
1236  1 value = value * (Math.log(value / freq) / LOG2);
1237    }
1238   
1239  15628 value = value * 10000d;
1240  15628 return Math.round((float) value);
1241    }
1242    }