Clover icon

Coverage Report

  1. Project Clover database Mon Jan 6 2025 10:27:51 GMT
  2. Package jalview.datamodel

File SecondaryStructureCount.java

 

Coverage histogram

../../img/srcFileCovDistChart1.png
56% of files have more coverage

Code metrics

90
146
23
2
609
353
71
0.49
6.35
11.5
3.09

Classes

Class Line # Actions
SecondaryStructureCount 35 144 70
0.066406256.6%
SecondaryStructureCount.SymbolCounts 40 2 1
0.00%
 

Contributing tests

This file is covered by 16 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.datamodel;
22   
23    import jalview.util.Comparison;
24    import jalview.util.Format;
25    import jalview.util.QuickSort;
26    import jalview.util.SparseCount;
27   
28    /**
29    * A class to count occurrences of residues in a profile, optimised for speed
30    * and memory footprint.
31    *
32    * @author gmcarstairs
33    *
34    */
 
35    public class SecondaryStructureCount
36    {
37    /**
38    * A data bean to hold the results of counting symbols
39    */
 
40    public class SymbolCounts
41    {
42    /**
43    * the symbols seen (as char values), in no particular order
44    */
45    public final char[] symbols;
46   
47    /**
48    * the counts for each symbol, in the same order as the symbols
49    */
50    public final int[] values;
51   
 
52  0 toggle SymbolCounts(char[] s, int[] v)
53    {
54  0 symbols = s;
55  0 values = v;
56    }
57    }
58   
59    private static final int TOUPPERCASE = 'A' - 'a';
60   
61    /*
62    * nucleotide symbols to count (including N unknown)
63    */
64    private static final String SS_SYMBOLS = "HEC";
65   
66    static final int GAP_COUNT = 0;
67   
68    /*
69    * fast lookup tables holding the index into our count
70    * arrays of each symbol; index 0 is reserved for gap counting
71    */
72    private static int[] SS_INDEX = new int[26];
73   
 
74  4 toggle static
75    {
76  16 for (int i = 0; i < SS_SYMBOLS.length(); i++)
77    {
78  12 SS_INDEX[SS_SYMBOLS.charAt(i) - 'A'] = i + 1;
79    }
80    }
81   
82    /*
83    * counts array, just big enough for the nucleotide or peptide
84    * character set (plus gap counts in position 0)
85    */
86    private short[] counts;
87   
88    /*
89    * alternative array of int counts for use if any count
90    * exceeds the maximum value of short (32767)
91    */
92    private int[] intCounts;
93   
94    /*
95    * flag set if we switch from short to int counts
96    */
97    private boolean useIntCounts;
98   
99    /*
100    * general-purpose counter, only for use for characters
101    * that are not in the expected alphabet
102    */
103    private SparseCount otherData;
104   
105    /*
106    * keeps track of the maximum count value recorded
107    * (if this class ever allows decrements, would need to
108    * calculate this on request instead)
109    */
110    int maxCount;
111   
112    /**
113    * Constructor that allocates an array just big enough for the anticipated
114    * characters, plus one position to count gaps
115    */
 
116  40058 toggle public SecondaryStructureCount()
117    {
118    // isSS = true;
119  40058 int charsToCount = SS_SYMBOLS.length();
120  40058 counts = new short[charsToCount + 1];
121    }
122   
123    /**
124    * Increments the count for the given character. The supplied character may be
125    * upper or lower case but counts are for the upper case only. Gap characters
126    * (space, ., -) are all counted together.
127    *
128    * @param c
129    * @return the new value of the count for the character
130    */
 
131  0 toggle public int add(final char c)
132    {
133  0 char u = toUpperCase(c);
134  0 int newValue = 0;
135  0 int offset = getOffset(u);
136   
137    /*
138    * offset 0 is reserved for gap counting, so 0 here means either
139    * an unexpected character, or a gap character passed in error
140    */
141  0 if (offset == 0)
142    {
143  0 if (Comparison.isGap(u))
144    {
145  0 newValue = addGap();
146    }
147    else
148    {
149  0 newValue = addOtherCharacter(u);
150    }
151    }
152    else
153    {
154  0 newValue = increment(offset);
155    }
156  0 return newValue;
157    }
158   
159    /**
160    * Increment the count at the specified offset. If this would result in short
161    * overflow, promote to counting int values instead.
162    *
163    * @param offset
164    * @return the new value of the count at this offset
165    */
 
166  0 toggle int increment(int offset)
167    {
168  0 int newValue = 0;
169  0 if (useIntCounts)
170    {
171  0 newValue = intCounts[offset];
172  0 intCounts[offset] = ++newValue;
173    }
174    else
175    {
176  0 if (counts[offset] == Short.MAX_VALUE)
177    {
178  0 handleOverflow();
179  0 newValue = intCounts[offset];
180  0 intCounts[offset] = ++newValue;
181    }
182    else
183    {
184  0 newValue = counts[offset];
185  0 counts[offset] = (short) ++newValue;
186    }
187    }
188   
189  0 if (offset != GAP_COUNT)
190    {
191    // update modal residue count
192  0 maxCount = Math.max(maxCount, newValue);
193    }
194  0 return newValue;
195    }
196   
197    /**
198    * Switch from counting in short to counting in int
199    */
 
200  0 toggle synchronized void handleOverflow()
201    {
202  0 intCounts = new int[counts.length];
203  0 for (int i = 0; i < counts.length; i++)
204    {
205  0 intCounts[i] = counts[i];
206    }
207  0 counts = null;
208  0 useIntCounts = true;
209    }
210   
211    /**
212    * Returns this character's offset in the count array
213    *
214    * @param c
215    * @return
216    */
 
217  0 toggle int getOffset(char c)
218    {
219  0 int offset = 0;
220  0 if ('A' <= c && c <= 'Z')
221    {
222  0 offset = SS_INDEX[c - 'A'];
223    }
224  0 return offset;
225    }
226   
227    /**
228    * @param c
229    * @return
230    */
 
231  0 toggle protected char toUpperCase(final char c)
232    {
233  0 char u = c;
234  0 if ('a' <= c && c <= 'z')
235    {
236  0 u = (char) (c + TOUPPERCASE);
237    }
238  0 return u;
239    }
240   
241    /**
242    * Increment count for some unanticipated character. The first time this
243    * called, a SparseCount is instantiated to hold these 'extra' counts.
244    *
245    * @param c
246    * @return the new value of the count for the character
247    */
 
248  0 toggle int addOtherCharacter(char c)
249    {
250  0 if (otherData == null)
251    {
252  0 otherData = new SparseCount();
253    }
254  0 int newValue = otherData.add(c, 1);
255  0 maxCount = Math.max(maxCount, newValue);
256  0 return newValue;
257    }
258   
259    /**
260    * Set count for some unanticipated character. The first time this called, a
261    * SparseCount is instantiated to hold these 'extra' counts.
262    *
263    * @param c
264    * @param value
265    */
 
266  0 toggle void setOtherCharacter(char c, int value)
267    {
268  0 if (otherData == null)
269    {
270  0 otherData = new SparseCount();
271    }
272  0 otherData.put(c, value);
273    }
274   
275    /**
276    * Increment count of gap characters
277    *
278    * @return the new count of gaps
279    */
 
280  0 toggle public int addGap()
281    {
282  0 int newValue = increment(GAP_COUNT);
283  0 return newValue;
284    }
285   
286    /**
287    * Answers true if we are counting ints (only after overflow of short counts)
288    *
289    * @return
290    */
 
291  0 toggle boolean isCountingInts()
292    {
293  0 return useIntCounts;
294    }
295   
296    /**
297    * Sets the count for the given character. The supplied character may be upper
298    * or lower case but counts are for the upper case only.
299    *
300    * @param c
301    * @param count
302    */
 
303  0 toggle public void put(char c, int count)
304    {
305  0 char u = toUpperCase(c);
306  0 int offset = getOffset(u);
307   
308    /*
309    * offset 0 is reserved for gap counting, so 0 here means either
310    * an unexpected character, or a gap character passed in error
311    */
312  0 if (offset == 0)
313    {
314  0 if (Comparison.isGap(u))
315    {
316  0 set(0, count);
317    }
318    else
319    {
320  0 setOtherCharacter(u, count);
321  0 maxCount = Math.max(maxCount, count);
322    }
323    }
324    else
325    {
326  0 set(offset, count);
327  0 maxCount = Math.max(maxCount, count);
328    }
329    }
330   
331    /**
332    * Sets the count at the specified offset. If this would result in short
333    * overflow, promote to counting int values instead.
334    *
335    * @param offset
336    * @param value
337    */
 
338  0 toggle void set(int offset, int value)
339    {
340  0 if (useIntCounts)
341    {
342  0 intCounts[offset] = value;
343    }
344    else
345    {
346  0 if (value > Short.MAX_VALUE || value < Short.MIN_VALUE)
347    {
348  0 handleOverflow();
349  0 intCounts[offset] = value;
350    }
351    else
352    {
353  0 counts[offset] = (short) value;
354    }
355    }
356    }
357   
358    /**
359    * Returns the count for the given character, or zero if no count held
360    *
361    * @param c
362    * @return
363    */
 
364  0 toggle public int getCount(char c)
365    {
366  0 char u = toUpperCase(c);
367  0 int offset = getOffset(u);
368  0 if (offset == 0)
369    {
370  0 if (!Comparison.isGap(u))
371    {
372    // should have called getGapCount()
373  0 return otherData == null ? 0 : otherData.get(u);
374    }
375    }
376  0 return useIntCounts ? intCounts[offset] : counts[offset];
377    }
378   
 
379  40058 toggle public int getGapCount()
380    {
381  40058 return useIntCounts ? intCounts[0] : counts[0];
382    }
383   
384    /**
385    * Answers true if this object wraps a counter for unexpected characters
386    *
387    * @return
388    */
 
389  0 toggle boolean isUsingOtherData()
390    {
391  0 return otherData != null;
392    }
393   
394    /**
395    * Returns the character (or concatenated characters) for the symbol(s) with
396    * the given count in the profile. Can be used to get the modal residue by
397    * supplying the modal count value. Returns an empty string if no symbol has
398    * the given count. The symbols are in alphabetic order of standard peptide or
399    * nucleotide characters, followed by 'other' symbols if any.
400    *
401    * @return
402    */
 
403  40058 toggle public String getSSForCount(int count)
404    {
405  40058 if (count == 0)
406    {
407  40058 return "";
408    }
409   
410    /*
411    * find counts for the given value and append the
412    * corresponding symbol
413    */
414  0 StringBuilder modal = new StringBuilder();
415  0 if (useIntCounts)
416    {
417  0 for (int i = 1; i < intCounts.length; i++)
418    {
419  0 if (intCounts[i] == count)
420    {
421  0 modal.append(SS_SYMBOLS.charAt(i - 1));
422    }
423    }
424    }
425    else
426    {
427  0 for (int i = 1; i < counts.length; i++)
428    {
429  0 if (counts[i] == count)
430    {
431  0 modal.append(SS_SYMBOLS.charAt(i - 1));
432    }
433    }
434    }
435  0 if (otherData != null)
436    {
437  0 for (int i = 0; i < otherData.size(); i++)
438    {
439  0 if (otherData.valueAt(i) == count)
440    {
441  0 modal.append((char) otherData.keyAt(i));
442    }
443    }
444    }
445  0 return modal.toString();
446    }
447   
448    /**
449    * Returns the highest count for any symbol(s) in the profile (excluding gap)
450    *
451    * @return
452    */
 
453  40058 toggle public int getModalCount()
454    {
455  40058 return maxCount;
456    }
457   
458    /**
459    * Returns the number of distinct symbols with a non-zero count (excluding the
460    * gap symbol)
461    *
462    * @return
463    */
 
464  0 toggle public int size()
465    {
466  0 int size = 0;
467  0 if (useIntCounts)
468    {
469  0 for (int i = 1; i < intCounts.length; i++)
470    {
471  0 if (intCounts[i] > 0)
472    {
473  0 size++;
474    }
475    }
476    }
477    else
478    {
479  0 for (int i = 1; i < counts.length; i++)
480    {
481  0 if (counts[i] > 0)
482    {
483  0 size++;
484    }
485    }
486    }
487   
488    /*
489    * include 'other' characters recorded (even if count is zero
490    * though that would be a strange use case)
491    */
492  0 if (otherData != null)
493    {
494  0 size += otherData.size();
495    }
496   
497  0 return size;
498    }
499   
500    /**
501    * Returns a data bean holding those symbols that have a non-zero count
502    * (excluding the gap symbol), with their counts.
503    *
504    * @return
505    */
 
506  0 toggle public SymbolCounts getSymbolCounts()
507    {
508  0 int size = size();
509  0 char[] symbols = new char[size];
510  0 int[] values = new int[size];
511  0 int j = 0;
512   
513  0 if (useIntCounts)
514    {
515  0 for (int i = 1; i < intCounts.length; i++)
516    {
517  0 if (intCounts[i] > 0)
518    {
519  0 char symbol = SS_SYMBOLS.charAt(i - 1);
520  0 symbols[j] = symbol;
521  0 values[j] = intCounts[i];
522  0 j++;
523    }
524    }
525    }
526    else
527    {
528  0 for (int i = 1; i < counts.length; i++)
529    {
530  0 if (counts[i] > 0)
531    {
532  0 char symbol = SS_SYMBOLS.charAt(i - 1);
533  0 symbols[j] = symbol;
534  0 values[j] = counts[i];
535  0 j++;
536    }
537    }
538    }
539  0 if (otherData != null)
540    {
541  0 for (int i = 0; i < otherData.size(); i++)
542    {
543  0 symbols[j] = (char) otherData.keyAt(i);
544  0 values[j] = otherData.valueAt(i);
545  0 j++;
546    }
547    }
548   
549  0 return new SymbolCounts(symbols, values);
550    }
551   
552    /**
553    * Returns a tooltip string showing residues in descending order of their
554    * percentage frequency in the profile
555    *
556    * @param normaliseBy
557    * the divisor for residue counts (may or may not include gapped
558    * sequence count)
559    * @param percentageDecPl
560    * the number of decimal places to show in percentages
561    * @return
562    */
 
563  0 toggle public String getTooltip(int normaliseBy, int percentageDecPl)
564    {
565  0 SymbolCounts symbolCounts = getSymbolCounts();
566  0 char[] ca = symbolCounts.symbols;
567  0 int[] vl = symbolCounts.values;
568   
569    /*
570    * sort characters into ascending order of their counts
571    */
572  0 QuickSort.sort(vl, ca);
573   
574    /*
575    * traverse in reverse order (highest count first) to build tooltip
576    */
577  0 boolean first = true;
578  0 StringBuilder sb = new StringBuilder(64);
579  0 for (int c = ca.length - 1; c >= 0; c--)
580    {
581  0 final char residue = ca[c];
582    // TODO combine residues which share a percentage
583    // (see AAFrequency.completeCdnaConsensus)
584  0 float tval = (vl[c] * 100f) / normaliseBy;
585  0 sb.append(first ? "" : "; ").append(residue).append(" ");
586  0 Format.appendPercentage(sb, tval, percentageDecPl);
587  0 sb.append("%");
588  0 first = false;
589    }
590  0 return sb.toString();
591    }
592   
593    /**
594    * Returns a string representation of the symbol counts, for debug purposes.
595    */
 
596  0 toggle @Override
597    public String toString()
598    {
599  0 StringBuilder sb = new StringBuilder();
600  0 sb.append("[ ");
601  0 SymbolCounts sc = getSymbolCounts();
602  0 for (int i = 0; i < sc.symbols.length; i++)
603    {
604  0 sb.append(sc.symbols[i]).append(":").append(sc.values[i]).append(" ");
605    }
606  0 sb.append("]");
607  0 return sb.toString();
608    }
609    }