Clover icon

Coverage Report

  1. Project Clover database Tue Nov 4 2025 11:21:43 GMT
  2. Package jalview.analysis.scoremodels

File ScoreMatrix.java

 

Coverage histogram

../../../img/srcFileCovDistChart10.png
0% of files have more coverage

Code metrics

84
138
26
1
656
366
75
0.54
5.31
26
2.88

Classes

Class Line # Actions
ScoreMatrix 46 138 75
0.9556451495.6%
 

Contributing tests

This file is covered by 266 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.analysis.scoremodels;
22   
23    import jalview.api.AlignmentViewPanel;
24    import jalview.api.analysis.PairwiseScoreModelI;
25    import jalview.api.analysis.ScoreModelI;
26    import jalview.api.analysis.SimilarityParamsI;
27    import jalview.datamodel.AlignmentView;
28    import jalview.math.Matrix;
29    import jalview.math.MatrixI;
30    import jalview.util.Comparison;
31   
32    import java.util.ArrayList;
33    import java.util.Arrays;
34    import java.util.List;
35    import java.util.concurrent.ForkJoinPool;
36    import java.util.stream.Collectors;
37    import java.util.stream.IntStream;
38    import java.util.stream.Stream;
39    import java.util.stream.StreamSupport;
40   
41    /**
42    * A class that models a substitution score matrix for any given alphabet of
43    * symbols. Instances of this class are immutable and thread-safe, so the same
44    * object is returned from calls to getInstance().
45    */
 
46    public class ScoreMatrix extends SimilarityScoreModel
47    implements PairwiseScoreModelI
48    {
49    private static final char GAP_CHARACTER = Comparison.GAP_DASH;
50   
51    /*
52    * an arbitrary score to assign for identity of an unknown symbol
53    * (this is the value on the diagonal in the * column of the NCBI matrix)
54    * (though a case could be made for using the minimum diagonal value)
55    */
56    private static final int UNKNOWN_IDENTITY_SCORE = 1;
57   
58    /*
59    * Jalview 2.10.1 treated gaps as X (peptide) or N (nucleotide)
60    * for pairwise scoring; 2.10.2 uses gap score (last column) in
61    * score matrix (JAL-2397)
62    * Set this flag to true (via Groovy) for 2.10.1 behaviour
63    */
64    private static boolean scoreGapAsAny = false;
65   
66    public static final short UNMAPPED = (short) -1;
67   
68    private static final String BAD_ASCII_ERROR = "Unexpected character %s in getPairwiseScore";
69   
70    private static final int MAX_ASCII = 127;
71   
72    /*
73    * the name of the model as shown in menus
74    * each score model in use should have a unique name
75    */
76    private String name;
77   
78    /*
79    * a description for the model as shown in tooltips
80    */
81    private String description;
82   
83    /*
84    * the characters that the model provides scores for
85    */
86    private char[] symbols;
87   
88    /*
89    * the score matrix; both dimensions must equal the number of symbols
90    * matrix[i][j] is the substitution score for replacing symbols[i] with symbols[j]
91    */
92    private float[][] matrix;
93   
94    /*
95    * quick lookup to convert from an ascii character value to the index
96    * of the corresponding symbol in the score matrix
97    */
98    private short[] symbolIndex;
99   
100    /*
101    * true for Protein Score matrix, false for dna score matrix
102    */
103    private boolean peptide;
104   
105    private float minValue;
106   
107    private float maxValue;
108   
109    private boolean symmetric;
110   
111    /**
112    * Constructor given a name, symbol alphabet, and matrix of scores for pairs
113    * of symbols. The matrix should be square and of the same size as the
114    * alphabet, for example 20x20 for a 20 symbol alphabet.
115    *
116    * @param theName
117    * Unique, human readable name for the matrix
118    * @param alphabet
119    * the symbols to which scores apply
120    * @param values
121    * Pairwise scores indexed according to the symbol alphabet
122    */
 
123  444 toggle public ScoreMatrix(String theName, char[] alphabet, float[][] values)
124    {
125  444 this(theName, null, alphabet, values);
126    }
127   
128    /**
129    * Constructor given a name, description, symbol alphabet, and matrix of
130    * scores for pairs of symbols. The matrix should be square and of the same
131    * size as the alphabet, for example 20x20 for a 20 symbol alphabet.
132    *
133    * @param theName
134    * Unique, human readable name for the matrix
135    * @param theDescription
136    * descriptive display name suitable for use in menus
137    * @param alphabet
138    * the symbols to which scores apply
139    * @param values
140    * Pairwise scores indexed according to the symbol alphabet
141    */
 
142  449 toggle public ScoreMatrix(String theName, String theDescription, char[] alphabet,
143    float[][] values)
144    {
145  449 if (alphabet.length != values.length)
146    {
147  2 throw new IllegalArgumentException(
148    "score matrix size must match alphabet size");
149    }
150  447 for (float[] row : values)
151    {
152  6904 if (row.length != alphabet.length)
153    {
154  1 throw new IllegalArgumentException(
155    "score matrix size must be square");
156    }
157    }
158   
159  446 this.matrix = values;
160  446 this.name = theName;
161  446 this.description = theDescription;
162  446 this.symbols = alphabet;
163   
164  446 symbolIndex = buildSymbolIndex(alphabet);
165   
166  446 findMinMax();
167   
168  446 symmetric = checkSymmetry();
169   
170    /*
171    * crude heuristic for now...
172    */
173  446 peptide = alphabet.length >= 20;
174    }
175   
176    /**
177    * Answers true if the matrix is symmetric, else false. Usually, substitution
178    * matrices are symmetric, which allows calculations to be short cut.
179    *
180    * @return
181    */
 
182  446 toggle private boolean checkSymmetry()
183    {
184  7319 for (int i = 0; i < matrix.length; i++)
185    {
186  80431 for (int j = i; j < matrix.length; j++)
187    {
188  73690 if (matrix[i][j] != matrix[j][i])
189    {
190  9 return false;
191    }
192    }
193    }
194  437 return true;
195    }
196   
197    /**
198    * Record the minimum and maximum score values
199    */
 
200  446 toggle protected void findMinMax()
201    {
202  446 float min = Float.MAX_VALUE;
203  446 float max = -Float.MAX_VALUE;
204  446 if (matrix != null)
205    {
206  446 for (float[] row : matrix)
207    {
208  6897 if (row != null)
209    {
210  6899 for (float f : row)
211    {
212  139688 min = Math.min(min, f);
213  139610 max = Math.max(max, f);
214    }
215    }
216    }
217    }
218  446 minValue = min;
219  446 maxValue = max;
220    }
221   
222    /**
223    * Returns an array A where A[i] is the position in the alphabet array of the
224    * character whose value is i. For example if the alphabet is { 'A', 'D', 'X'
225    * } then A['D'] = A[68] = 1.
226    * <p>
227    * Unmapped characters (not in the alphabet) get an index of -1.
228    * <p>
229    * Mappings are added automatically for lower case symbols (for non case
230    * sensitive scoring), unless they are explicitly present in the alphabet (are
231    * scored separately in the score matrix).
232    * <p>
233    * the gap character (space, dash or dot) included in the alphabet (if any) is
234    * recorded in a field
235    *
236    * @param alphabet
237    * @return
238    */
 
239  448 toggle short[] buildSymbolIndex(char[] alphabet)
240    {
241  448 short[] index = new short[MAX_ASCII + 1];
242  448 Arrays.fill(index, UNMAPPED);
243  448 short pos = 0;
244  448 for (char c : alphabet)
245    {
246  6912 if (c <= MAX_ASCII)
247    {
248  6910 index[c] = pos;
249    }
250   
251    /*
252    * also map lower-case character (unless separately mapped)
253    */
254  6911 if (c >= 'A' && c <= 'Z')
255    {
256  6465 short lowerCase = (short) (c + ('a' - 'A'));
257  6464 if (index[lowerCase] == UNMAPPED)
258    {
259  6465 index[lowerCase] = pos;
260    }
261    }
262  6911 pos++;
263    }
264  448 return index;
265    }
266   
 
267  1936 toggle @Override
268    public String getName()
269    {
270  1936 return name;
271    }
272   
 
273  4 toggle @Override
274    public String getDescription()
275    {
276  4 return description;
277    }
278   
 
279  30 toggle @Override
280    public boolean isDNA()
281    {
282  30 return !peptide;
283    }
284   
 
285  24 toggle @Override
286    public boolean isProtein()
287    {
288  24 return peptide;
289    }
290   
291    /**
292    * Returns a copy of the score matrix as used in getPairwiseScore. If using
293    * this matrix directly, callers <em>must</em> also call
294    * <code>getMatrixIndex</code> in order to get the matrix index for each
295    * character (symbol).
296    *
297    * @return
298    * @see #getMatrixIndex(char)
299    */
 
300  1504 toggle public float[][] getMatrix()
301    {
302  1504 float[][] v = new float[matrix.length][matrix.length];
303  37544 for (int i = 0; i < matrix.length; i++)
304    {
305  36040 v[i] = Arrays.copyOf(matrix[i], matrix[i].length);
306    }
307  1504 return v;
308    }
309   
310    /**
311    * Answers the matrix index for a given character, or -1 if unmapped in the
312    * matrix. Use this method only if using <code>getMatrix</code> in order to
313    * compute scores directly (without symbol lookup) for efficiency.
314    *
315    * @param c
316    * @return
317    * @see #getMatrix()
318    */
 
319  2323892 toggle public int getMatrixIndex(char c)
320    {
321  2323763 if (c < symbolIndex.length)
322    {
323  2323752 return symbolIndex[c];
324    }
325    else
326    {
327  1 return UNMAPPED;
328    }
329    }
330   
331    /**
332    * Returns the pairwise score for substituting c with d. If either c or d is
333    * an unexpected character, returns 1 for identity (c == d), else the minimum
334    * score value in the matrix.
335    */
 
336  15221774 toggle @Override
337    public float getPairwiseScore(char c, char d)
338    {
339  15221783 if (c >= symbolIndex.length)
340    {
341  1 jalview.bin.Console.errPrintln(String.format(BAD_ASCII_ERROR, c));
342  1 return 0;
343    }
344  15221839 if (d >= symbolIndex.length)
345    {
346  2 jalview.bin.Console.errPrintln(String.format(BAD_ASCII_ERROR, d));
347  2 return 0;
348    }
349   
350  15221900 int cIndex = symbolIndex[c];
351  15221907 int dIndex = symbolIndex[d];
352  15221933 if (cIndex != UNMAPPED && dIndex != UNMAPPED)
353    {
354  15217852 return matrix[cIndex][dIndex];
355    }
356   
357    /*
358    * one or both symbols not found in the matrix
359    * currently scoring as 1 (for identity) or the minimum
360    * matrix score value (otherwise)
361    * (a case could be made for using minimum row/column value instead)
362    */
363  4082 return c == d ? UNKNOWN_IDENTITY_SCORE : getMinimumScore();
364    }
365   
366    /**
367    * pretty print the matrix
368    */
 
369  0 toggle @Override
370    public String toString()
371    {
372  0 return outputMatrix(false);
373    }
374   
375    /**
376    * Print the score matrix, optionally formatted as html, with the alphabet
377    * symbols as column headings and at the start of each row.
378    * <p>
379    * The non-html format should give an output which can be parsed as a score
380    * matrix file
381    *
382    * @param html
383    * @return
384    */
 
385  2 toggle public String outputMatrix(boolean html)
386    {
387  2 StringBuilder sb = new StringBuilder(512);
388   
389    /*
390    * heading row with alphabet
391    */
392  2 if (html)
393    {
394  1 sb.append("<table border=\"1\">");
395  1 sb.append(html ? "<tr><th></th>" : "");
396    }
397    else
398    {
399  1 sb.append("ScoreMatrix ").append(getName()).append("\n");
400    }
401  2 for (char sym : symbols)
402    {
403  26 if (html)
404    {
405  2 sb.append("<th>&nbsp;").append(sym).append("&nbsp;</th>");
406    }
407    else
408    {
409  24 sb.append("\t").append(sym);
410    }
411    }
412  2 sb.append(html ? "</tr>\n" : "\n");
413   
414    /*
415    * table of scores
416    */
417  2 for (char c1 : symbols)
418    {
419  26 if (html)
420    {
421  2 sb.append("<tr><td>");
422    }
423  26 sb.append(c1).append(html ? "</td>" : "");
424  26 for (char c2 : symbols)
425    {
426  580 sb.append(html ? "<td>" : "\t")
427    .append(matrix[symbolIndex[c1]][symbolIndex[c2]])
428  580 .append(html ? "</td>" : "");
429    }
430  26 sb.append(html ? "</tr>\n" : "\n");
431    }
432  2 if (html)
433    {
434  1 sb.append("</table>");
435    }
436  2 return sb.toString();
437    }
438   
439    /**
440    * Answers the number of symbols coded for (also equal to the number of rows
441    * and columns of the score matrix)
442    *
443    * @return
444    */
 
445  1504 toggle public int getSize()
446    {
447  1504 return symbols.length;
448    }
449   
450    /**
451    * Computes an NxN matrix where N is the number of sequences, and entry [i, j]
452    * is sequence[i] pairwise multiplied with sequence[j], as a sum of scores
453    * computed using the current score matrix. For example
454    * <ul>
455    * <li>Sequences:</li>
456    * <li>FKL</li>
457    * <li>R-D</li>
458    * <li>QIA</li>
459    * <li>GWC</li>
460    * <li>Score matrix is BLOSUM62</li>
461    * <li>Gaps treated same as X (unknown)</li>
462    * <li>product [0, 0] = F.F + K.K + L.L = 6 + 5 + 4 = 15</li>
463    * <li>product [1, 1] = R.R + -.- + D.D = 5 + -1 + 6 = 10</li>
464    * <li>product [2, 2] = Q.Q + I.I + A.A = 5 + 4 + 4 = 13</li>
465    * <li>product [3, 3] = G.G + W.W + C.C = 6 + 11 + 9 = 26</li>
466    * <li>product[0, 1] = F.R + K.- + L.D = -3 + -1 + -3 = -8
467    * <li>and so on</li>
468    * </ul>
469    * This method is thread-safe.
470    */
 
471  1 toggle @Override
472    public MatrixI findSimilarities(AlignmentView seqstrings,
473    SimilarityParamsI options)
474    {
475  1 char gapChar = scoreGapAsAny ? (seqstrings.isNa() ? 'N' : 'X')
476    : GAP_CHARACTER;
477  1 String[] seqs = seqstrings.getSequenceStrings(gapChar);
478  1 return findSimilarities(seqs, options);
479    }
480   
481    /**
482    * Computes pairwise similarities of a set of sequences using the given
483    * parameters
484    *
485    * @param seqs
486    * @param params
487    * @return
488    */
 
489  3 toggle protected MatrixI findSimilarities(String[] seqs,
490    SimilarityParamsI params)
491    {
492  3 double[][] values = new double[seqs.length][seqs.length];
493   
494  3 List<char[]> seqChars = new ArrayList<char[]>();
495  3 for (String seq:seqs) {
496  21 seqChars.add(seq.toCharArray());
497    }
498  3 try
499    {
500  3 ForkJoinPool customPool = new ForkJoinPool(20);
501   
502  3 customPool.submit(() ->
503   
504    IntStream.range(0, seqs.length).parallel().forEach(
505    row -> {
506  21 for (int col = symmetric ? row : 0; col < seqChars.size(); col++)
507    {
508  133 double total = computeSimilarityForChars(seqChars.get(row), seqChars.get(col), params);
509  133 values[row][col] = total;
510  133 if (symmetric)
511    {
512  133 values[col][row] = total;
513    }
514    }
515    }));
516    } catch (Exception x)
517    {
518    }
519   
520  3 return new Matrix(values);
521    }
522   
523    /**
524    * Calculates the pairwise similarity of two strings using the given
525    * calculation parameters
526    *
527    * @param seq1
528    * @param seq2
529    * @param params
530    * @return
531    */
 
532  16 toggle protected double computeSimilarity(String seq1, String seq2,
533    SimilarityParamsI params)
534    {
535  16 return computeSimilarityForChars(seq1.toCharArray(),seq2.toCharArray(),params);
536    }
537   
 
538  149 toggle protected double computeSimilarityForChars(char[] seq1, char[] seq2,
539    SimilarityParamsI params)
540    {
541  149 int len1 = seq1.length;
542  149 int len2 = seq2.length;
543  149 double total = 0;
544   
545  149 int width = Math.max(len1, len2);
546  18983 for (int i = 0; i < width; i++)
547    {
548  18962 if (i >= len1 || i >= len2)
549    {
550    /*
551    * off the end of one sequence; stop if we are only matching
552    * on the shorter sequence length, else treat as trailing gap
553    */
554  16 if (params.denominateByShortestLength())
555    {
556  8 break;
557    }
558    }
559   
560  18947 char c1 = i >= len1 ? GAP_CHARACTER : seq1[i];
561  18976 char c2 = i >= len2 ? GAP_CHARACTER : seq2[i];
562  18951 boolean gap1 = Comparison.isGap(c1);
563  18931 boolean gap2 = Comparison.isGap(c2);
564   
565  18952 if (gap1 && gap2)
566    {
567    /*
568    * gap-gap: include if options say so, else ignore
569    */
570  1320 if (!params.includeGappedColumns())
571    {
572  8 continue;
573    }
574    }
575  17653 else if (gap1 || gap2)
576    {
577    /*
578    * gap-residue: score if options say so
579    */
580  2780 if (!params.includeGaps())
581    {
582  20 continue;
583    }
584    }
585  18898 float score = getPairwiseScore(c1, c2);
586  18852 total += score;
587    }
588  149 return total;
589    }
590   
591    /**
592    * Answers a hashcode computed from the symbol alphabet and the matrix score
593    * values
594    */
 
595  4 toggle @Override
596    public int hashCode()
597    {
598  4 int hs = Arrays.hashCode(symbols);
599  4 for (float[] row : matrix)
600    {
601  96 hs = hs * 31 + Arrays.hashCode(row);
602    }
603  4 return hs;
604    }
605   
606    /**
607    * Answers true if the argument is a ScoreMatrix with the same symbol alphabet
608    * and score values, else false
609    */
 
610  4 toggle @Override
611    public boolean equals(Object obj)
612    {
613  4 if (!(obj instanceof ScoreMatrix))
614    {
615  1 return false;
616    }
617  3 ScoreMatrix sm = (ScoreMatrix) obj;
618  3 if (Arrays.equals(symbols, sm.symbols)
619    && Arrays.deepEquals(matrix, sm.matrix))
620    {
621  2 return true;
622    }
623  1 return false;
624    }
625   
626    /**
627    * Returns the alphabet the matrix scores for, as a string of characters
628    *
629    * @return
630    */
 
631  1 toggle String getSymbols()
632    {
633  1 return new String(symbols);
634    }
635   
 
636  21110392 toggle public float getMinimumScore()
637    {
638  21131085 return minValue;
639    }
640   
 
641  9232 toggle public float getMaximumScore()
642    {
643  9232 return maxValue;
644    }
645   
 
646  4 toggle @Override
647    public ScoreModelI getInstance(AlignmentViewPanel avp)
648    {
649  4 return this;
650    }
651   
 
652  2 toggle public boolean isSymmetric()
653    {
654  2 return symmetric;
655    }
656    }