Clover icon

Coverage Report

  1. Project Clover database Thu Nov 28 2024 18:06:40 GMT
  2. Package jalview.analysis.scoremodels

File ScoreMatrix.java

 

Coverage histogram

../../../img/srcFileCovDistChart10.png
0% of files have more coverage

Code metrics

86
132
25
1
628
343
74
0.56
5.28
25
2.96

Classes

Class Line # Actions
ScoreMatrix 39 132 74
0.96296396.3%
 

Contributing tests

This file is covered by 249 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.analysis.scoremodels;
22   
23    import jalview.api.AlignmentViewPanel;
24    import jalview.api.analysis.PairwiseScoreModelI;
25    import jalview.api.analysis.ScoreModelI;
26    import jalview.api.analysis.SimilarityParamsI;
27    import jalview.datamodel.AlignmentView;
28    import jalview.math.Matrix;
29    import jalview.math.MatrixI;
30    import jalview.util.Comparison;
31   
32    import java.util.Arrays;
33   
34    /**
35    * A class that models a substitution score matrix for any given alphabet of
36    * symbols. Instances of this class are immutable and thread-safe, so the same
37    * object is returned from calls to getInstance().
38    */
 
39    public class ScoreMatrix extends SimilarityScoreModel
40    implements PairwiseScoreModelI
41    {
42    private static final char GAP_CHARACTER = Comparison.GAP_DASH;
43   
44    /*
45    * an arbitrary score to assign for identity of an unknown symbol
46    * (this is the value on the diagonal in the * column of the NCBI matrix)
47    * (though a case could be made for using the minimum diagonal value)
48    */
49    private static final int UNKNOWN_IDENTITY_SCORE = 1;
50   
51    /*
52    * Jalview 2.10.1 treated gaps as X (peptide) or N (nucleotide)
53    * for pairwise scoring; 2.10.2 uses gap score (last column) in
54    * score matrix (JAL-2397)
55    * Set this flag to true (via Groovy) for 2.10.1 behaviour
56    */
57    private static boolean scoreGapAsAny = false;
58   
59    public static final short UNMAPPED = (short) -1;
60   
61    private static final String BAD_ASCII_ERROR = "Unexpected character %s in getPairwiseScore";
62   
63    private static final int MAX_ASCII = 127;
64   
65    /*
66    * the name of the model as shown in menus
67    * each score model in use should have a unique name
68    */
69    private String name;
70   
71    /*
72    * a description for the model as shown in tooltips
73    */
74    private String description;
75   
76    /*
77    * the characters that the model provides scores for
78    */
79    private char[] symbols;
80   
81    /*
82    * the score matrix; both dimensions must equal the number of symbols
83    * matrix[i][j] is the substitution score for replacing symbols[i] with symbols[j]
84    */
85    private float[][] matrix;
86   
87    /*
88    * quick lookup to convert from an ascii character value to the index
89    * of the corresponding symbol in the score matrix
90    */
91    private short[] symbolIndex;
92   
93    /*
94    * true for Protein Score matrix, false for dna score matrix
95    */
96    private boolean peptide;
97   
98    private float minValue;
99   
100    private float maxValue;
101   
102    private boolean symmetric;
103   
104    /**
105    * Constructor given a name, symbol alphabet, and matrix of scores for pairs
106    * of symbols. The matrix should be square and of the same size as the
107    * alphabet, for example 20x20 for a 20 symbol alphabet.
108    *
109    * @param theName
110    * Unique, human readable name for the matrix
111    * @param alphabet
112    * the symbols to which scores apply
113    * @param values
114    * Pairwise scores indexed according to the symbol alphabet
115    */
 
116  349 toggle public ScoreMatrix(String theName, char[] alphabet, float[][] values)
117    {
118  349 this(theName, null, alphabet, values);
119    }
120   
121    /**
122    * Constructor given a name, description, symbol alphabet, and matrix of
123    * scores for pairs of symbols. The matrix should be square and of the same
124    * size as the alphabet, for example 20x20 for a 20 symbol alphabet.
125    *
126    * @param theName
127    * Unique, human readable name for the matrix
128    * @param theDescription
129    * descriptive display name suitable for use in menus
130    * @param alphabet
131    * the symbols to which scores apply
132    * @param values
133    * Pairwise scores indexed according to the symbol alphabet
134    */
 
135  354 toggle public ScoreMatrix(String theName, String theDescription, char[] alphabet,
136    float[][] values)
137    {
138  354 if (alphabet.length != values.length)
139    {
140  2 throw new IllegalArgumentException(
141    "score matrix size must match alphabet size");
142    }
143  352 for (float[] row : values)
144    {
145  5394 if (row.length != alphabet.length)
146    {
147  1 throw new IllegalArgumentException(
148    "score matrix size must be square");
149    }
150    }
151   
152  351 this.matrix = values;
153  351 this.name = theName;
154  351 this.description = theDescription;
155  351 this.symbols = alphabet;
156   
157  351 symbolIndex = buildSymbolIndex(alphabet);
158   
159  351 findMinMax();
160   
161  351 symmetric = checkSymmetry();
162   
163    /*
164    * crude heuristic for now...
165    */
166  351 peptide = alphabet.length >= 20;
167    }
168   
169    /**
170    * Answers true if the matrix is symmetric, else false. Usually, substitution
171    * matrices are symmetric, which allows calculations to be short cut.
172    *
173    * @return
174    */
 
175  351 toggle private boolean checkSymmetry()
176    {
177  5716 for (int i = 0; i < matrix.length; i++)
178    {
179  62988 for (int j = i; j < matrix.length; j++)
180    {
181  57623 if (matrix[i][j] != matrix[j][i])
182    {
183  10 return false;
184    }
185    }
186    }
187  341 return true;
188    }
189   
190    /**
191    * Record the minimum and maximum score values
192    */
 
193  351 toggle protected void findMinMax()
194    {
195  351 float min = Float.MAX_VALUE;
196  351 float max = -Float.MAX_VALUE;
197  351 if (matrix != null)
198    {
199  351 for (float[] row : matrix)
200    {
201  5392 if (row != null)
202    {
203  5392 for (float f : row)
204    {
205  109928 min = Math.min(min, f);
206  109928 max = Math.max(max, f);
207    }
208    }
209    }
210    }
211  351 minValue = min;
212  351 maxValue = max;
213    }
214   
215    /**
216    * Returns an array A where A[i] is the position in the alphabet array of the
217    * character whose value is i. For example if the alphabet is { 'A', 'D', 'X'
218    * } then A['D'] = A[68] = 1.
219    * <p>
220    * Unmapped characters (not in the alphabet) get an index of -1.
221    * <p>
222    * Mappings are added automatically for lower case symbols (for non case
223    * sensitive scoring), unless they are explicitly present in the alphabet (are
224    * scored separately in the score matrix).
225    * <p>
226    * the gap character (space, dash or dot) included in the alphabet (if any) is
227    * recorded in a field
228    *
229    * @param alphabet
230    * @return
231    */
 
232  353 toggle short[] buildSymbolIndex(char[] alphabet)
233    {
234  353 short[] index = new short[MAX_ASCII + 1];
235  353 Arrays.fill(index, UNMAPPED);
236  353 short pos = 0;
237  353 for (char c : alphabet)
238    {
239  5402 if (c <= MAX_ASCII)
240    {
241  5400 index[c] = pos;
242    }
243   
244    /*
245    * also map lower-case character (unless separately mapped)
246    */
247  5402 if (c >= 'A' && c <= 'Z')
248    {
249  5052 short lowerCase = (short) (c + ('a' - 'A'));
250  5052 if (index[lowerCase] == UNMAPPED)
251    {
252  5051 index[lowerCase] = pos;
253    }
254    }
255  5402 pos++;
256    }
257  353 return index;
258    }
259   
 
260  1792 toggle @Override
261    public String getName()
262    {
263  1792 return name;
264    }
265   
 
266  4 toggle @Override
267    public String getDescription()
268    {
269  4 return description;
270    }
271   
 
272  30 toggle @Override
273    public boolean isDNA()
274    {
275  30 return !peptide;
276    }
277   
 
278  24 toggle @Override
279    public boolean isProtein()
280    {
281  24 return peptide;
282    }
283   
284    /**
285    * Returns a copy of the score matrix as used in getPairwiseScore. If using
286    * this matrix directly, callers <em>must</em> also call
287    * <code>getMatrixIndex</code> in order to get the matrix index for each
288    * character (symbol).
289    *
290    * @return
291    * @see #getMatrixIndex(char)
292    */
 
293  1009 toggle public float[][] getMatrix()
294    {
295  1009 float[][] v = new float[matrix.length][matrix.length];
296  25169 for (int i = 0; i < matrix.length; i++)
297    {
298  24160 v[i] = Arrays.copyOf(matrix[i], matrix[i].length);
299    }
300  1009 return v;
301    }
302   
303    /**
304    * Answers the matrix index for a given character, or -1 if unmapped in the
305    * matrix. Use this method only if using <code>getMatrix</code> in order to
306    * compute scores directly (without symbol lookup) for efficiency.
307    *
308    * @param c
309    * @return
310    * @see #getMatrix()
311    */
 
312  1532529 toggle public int getMatrixIndex(char c)
313    {
314  1532309 if (c < symbolIndex.length)
315    {
316  1532162 return symbolIndex[c];
317    }
318    else
319    {
320  1 return UNMAPPED;
321    }
322    }
323   
324    /**
325    * Returns the pairwise score for substituting c with d. If either c or d is
326    * an unexpected character, returns 1 for identity (c == d), else the minimum
327    * score value in the matrix.
328    */
 
329  13133205 toggle @Override
330    public float getPairwiseScore(char c, char d)
331    {
332  13133205 if (c >= symbolIndex.length)
333    {
334  1 jalview.bin.Console.errPrintln(String.format(BAD_ASCII_ERROR, c));
335  1 return 0;
336    }
337  13133204 if (d >= symbolIndex.length)
338    {
339  2 jalview.bin.Console.errPrintln(String.format(BAD_ASCII_ERROR, d));
340  2 return 0;
341    }
342   
343  13133202 int cIndex = symbolIndex[c];
344  13133202 int dIndex = symbolIndex[d];
345  13133202 if (cIndex != UNMAPPED && dIndex != UNMAPPED)
346    {
347  13129119 return matrix[cIndex][dIndex];
348    }
349   
350    /*
351    * one or both symbols not found in the matrix
352    * currently scoring as 1 (for identity) or the minimum
353    * matrix score value (otherwise)
354    * (a case could be made for using minimum row/column value instead)
355    */
356  4083 return c == d ? UNKNOWN_IDENTITY_SCORE : getMinimumScore();
357    }
358   
359    /**
360    * pretty print the matrix
361    */
 
362  0 toggle @Override
363    public String toString()
364    {
365  0 return outputMatrix(false);
366    }
367   
368    /**
369    * Print the score matrix, optionally formatted as html, with the alphabet
370    * symbols as column headings and at the start of each row.
371    * <p>
372    * The non-html format should give an output which can be parsed as a score
373    * matrix file
374    *
375    * @param html
376    * @return
377    */
 
378  2 toggle public String outputMatrix(boolean html)
379    {
380  2 StringBuilder sb = new StringBuilder(512);
381   
382    /*
383    * heading row with alphabet
384    */
385  2 if (html)
386    {
387  1 sb.append("<table border=\"1\">");
388  1 sb.append(html ? "<tr><th></th>" : "");
389    }
390    else
391    {
392  1 sb.append("ScoreMatrix ").append(getName()).append("\n");
393    }
394  2 for (char sym : symbols)
395    {
396  26 if (html)
397    {
398  2 sb.append("<th>&nbsp;").append(sym).append("&nbsp;</th>");
399    }
400    else
401    {
402  24 sb.append("\t").append(sym);
403    }
404    }
405  2 sb.append(html ? "</tr>\n" : "\n");
406   
407    /*
408    * table of scores
409    */
410  2 for (char c1 : symbols)
411    {
412  26 if (html)
413    {
414  2 sb.append("<tr><td>");
415    }
416  26 sb.append(c1).append(html ? "</td>" : "");
417  26 for (char c2 : symbols)
418    {
419  580 sb.append(html ? "<td>" : "\t")
420    .append(matrix[symbolIndex[c1]][symbolIndex[c2]])
421  580 .append(html ? "</td>" : "");
422    }
423  26 sb.append(html ? "</tr>\n" : "\n");
424    }
425  2 if (html)
426    {
427  1 sb.append("</table>");
428    }
429  2 return sb.toString();
430    }
431   
432    /**
433    * Answers the number of symbols coded for (also equal to the number of rows
434    * and columns of the score matrix)
435    *
436    * @return
437    */
 
438  1009 toggle public int getSize()
439    {
440  1009 return symbols.length;
441    }
442   
443    /**
444    * Computes an NxN matrix where N is the number of sequences, and entry [i, j]
445    * is sequence[i] pairwise multiplied with sequence[j], as a sum of scores
446    * computed using the current score matrix. For example
447    * <ul>
448    * <li>Sequences:</li>
449    * <li>FKL</li>
450    * <li>R-D</li>
451    * <li>QIA</li>
452    * <li>GWC</li>
453    * <li>Score matrix is BLOSUM62</li>
454    * <li>Gaps treated same as X (unknown)</li>
455    * <li>product [0, 0] = F.F + K.K + L.L = 6 + 5 + 4 = 15</li>
456    * <li>product [1, 1] = R.R + -.- + D.D = 5 + -1 + 6 = 10</li>
457    * <li>product [2, 2] = Q.Q + I.I + A.A = 5 + 4 + 4 = 13</li>
458    * <li>product [3, 3] = G.G + W.W + C.C = 6 + 11 + 9 = 26</li>
459    * <li>product[0, 1] = F.R + K.- + L.D = -3 + -1 + -3 = -8
460    * <li>and so on</li>
461    * </ul>
462    * This method is thread-safe.
463    */
 
464  1 toggle @Override
465    public MatrixI findSimilarities(AlignmentView seqstrings,
466    SimilarityParamsI options)
467    {
468  1 char gapChar = scoreGapAsAny ? (seqstrings.isNa() ? 'N' : 'X')
469    : GAP_CHARACTER;
470  1 String[] seqs = seqstrings.getSequenceStrings(gapChar);
471  1 return findSimilarities(seqs, options);
472    }
473   
474    /**
475    * Computes pairwise similarities of a set of sequences using the given
476    * parameters
477    *
478    * @param seqs
479    * @param params
480    * @return
481    */
 
482  7 toggle protected MatrixI findSimilarities(String[] seqs,
483    SimilarityParamsI params)
484    {
485  7 double[][] values = new double[seqs.length][seqs.length];
486  36 for (int row = 0; row < seqs.length; row++)
487    {
488  176 for (int col = symmetric ? row : 0; col < seqs.length; col++)
489    {
490  147 double total = computeSimilarity(seqs[row], seqs[col], params);
491  147 values[row][col] = total;
492  147 if (symmetric)
493    {
494  139 values[col][row] = total;
495    }
496    }
497    }
498  7 return new Matrix(values);
499    }
500   
501    /**
502    * Calculates the pairwise similarity of two strings using the given
503    * calculation parameters
504    *
505    * @param seq1
506    * @param seq2
507    * @param params
508    * @return
509    */
 
510  163 toggle protected double computeSimilarity(String seq1, String seq2,
511    SimilarityParamsI params)
512    {
513  163 int len1 = seq1.length();
514  163 int len2 = seq2.length();
515  163 double total = 0;
516   
517  163 int width = Math.max(len1, len2);
518  19257 for (int i = 0; i < width; i++)
519    {
520  19102 if (i >= len1 || i >= len2)
521    {
522    /*
523    * off the end of one sequence; stop if we are only matching
524    * on the shorter sequence length, else treat as trailing gap
525    */
526  16 if (params.denominateByShortestLength())
527    {
528  8 break;
529    }
530    }
531   
532  19094 char c1 = i >= len1 ? GAP_CHARACTER : seq1.charAt(i);
533  19094 char c2 = i >= len2 ? GAP_CHARACTER : seq2.charAt(i);
534  19094 boolean gap1 = Comparison.isGap(c1);
535  19094 boolean gap2 = Comparison.isGap(c2);
536   
537  19094 if (gap1 && gap2)
538    {
539    /*
540    * gap-gap: include if options say so, else ignore
541    */
542  1320 if (!params.includeGappedColumns())
543    {
544  8 continue;
545    }
546    }
547  17774 else if (gap1 || gap2)
548    {
549    /*
550    * gap-residue: score if options say so
551    */
552  2781 if (!params.includeGaps())
553    {
554  20 continue;
555    }
556    }
557  19066 float score = getPairwiseScore(c1, c2);
558  19066 total += score;
559    }
560  163 return total;
561    }
562   
563    /**
564    * Answers a hashcode computed from the symbol alphabet and the matrix score
565    * values
566    */
 
567  4 toggle @Override
568    public int hashCode()
569    {
570  4 int hs = Arrays.hashCode(symbols);
571  4 for (float[] row : matrix)
572    {
573  96 hs = hs * 31 + Arrays.hashCode(row);
574    }
575  4 return hs;
576    }
577   
578    /**
579    * Answers true if the argument is a ScoreMatrix with the same symbol alphabet
580    * and score values, else false
581    */
 
582  4 toggle @Override
583    public boolean equals(Object obj)
584    {
585  4 if (!(obj instanceof ScoreMatrix))
586    {
587  1 return false;
588    }
589  3 ScoreMatrix sm = (ScoreMatrix) obj;
590  3 if (Arrays.equals(symbols, sm.symbols)
591    && Arrays.deepEquals(matrix, sm.matrix))
592    {
593  2 return true;
594    }
595  1 return false;
596    }
597   
598    /**
599    * Returns the alphabet the matrix scores for, as a string of characters
600    *
601    * @return
602    */
 
603  1 toggle String getSymbols()
604    {
605  1 return new String(symbols);
606    }
607   
 
608  13231641 toggle public float getMinimumScore()
609    {
610  13238744 return minValue;
611    }
612   
 
613  25 toggle public float getMaximumScore()
614    {
615  25 return maxValue;
616    }
617   
 
618  4 toggle @Override
619    public ScoreModelI getInstance(AlignmentViewPanel avp)
620    {
621  4 return this;
622    }
623   
 
624  4 toggle public boolean isSymmetric()
625    {
626  4 return symmetric;
627    }
628    }