Clover icon

jalviewX

  1. Project Clover database Wed Oct 31 2018 15:13:58 GMT
  2. Package jalview.analysis.scoremodels

File PIDModel.java

 

Coverage histogram

../../../img/srcFileCovDistChart7.png
28% of files have more coverage

Code metrics

34
61
12
1
263
155
34
0.56
5.08
12
2.83

Classes

Class Line # Actions
PIDModel 37 61 34 35
0.672897267.3%
 

Contributing tests

This file is covered by 5 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.analysis.scoremodels;
22   
23    import jalview.api.AlignmentViewPanel;
24    import jalview.api.analysis.PairwiseScoreModelI;
25    import jalview.api.analysis.ScoreModelI;
26    import jalview.api.analysis.SimilarityParamsI;
27    import jalview.datamodel.AlignmentView;
28    import jalview.math.Matrix;
29    import jalview.math.MatrixI;
30    import jalview.util.Comparison;
31   
32    /**
33    * A class to provide sequence pairwise similarity based on residue identity.
34    * Instances of this class are immutable and thread-safe, so the same object is
35    * returned from calls to getInstance().
36    */
 
37    public class PIDModel extends SimilarityScoreModel
38    implements PairwiseScoreModelI
39    {
40    private static final String NAME = "PID";
41   
42    /**
43    * Constructor
44    */
 
45  7 toggle public PIDModel()
46    {
47    }
48   
 
49  3 toggle @Override
50    public String getName()
51    {
52  3 return NAME;
53    }
54   
55    /**
56    * Answers null for description. If a display name is needed, use getName() or
57    * an internationalized string built from the name.
58    */
 
59  0 toggle @Override
60    public String getDescription()
61    {
62  0 return null;
63    }
64   
 
65  0 toggle @Override
66    public boolean isDNA()
67    {
68  0 return true;
69    }
70   
 
71  0 toggle @Override
72    public boolean isProtein()
73    {
74  0 return true;
75    }
76   
77    /**
78    * Answers 1 if c and d are the same residue (ignoring case), and not gap
79    * characters. Answers 0 for non-matching or gap characters.
80    */
 
81  10 toggle @Override
82    public float getPairwiseScore(char c, char d)
83    {
84  10 c = toUpper(c);
85  10 d = toUpper(d);
86  10 if (c == d && !Comparison.isGap(c))
87    {
88  4 return 1f;
89    }
90  6 return 0f;
91    }
92   
93    /**
94    * @param c
95    */
 
96  100 toggle protected static char toUpper(char c)
97    {
98  100 if ('a' <= c && c <= 'z')
99    {
100  3 c += 'A' - 'a';
101    }
102  100 return c;
103    }
104   
105    /**
106    * Computes similarity scores based on pairwise percentage identity of
107    * sequences. For consistency with Jalview 2.10.1's SeqSpace mode PCA
108    * calculation, the percentage scores are rescaled to the width of the
109    * sequences (as if counts of identical residues). This method is thread-safe.
110    */
 
111  0 toggle @Override
112    public MatrixI findSimilarities(AlignmentView seqData,
113    SimilarityParamsI options)
114    {
115  0 String[] seqs = seqData.getSequenceStrings(Comparison.GAP_DASH);
116   
117  0 MatrixI result = findSimilarities(seqs, options);
118   
119  0 result.multiply(seqData.getWidth() / 100d);
120   
121  0 return result;
122    }
123   
124    /**
125    * A distance score is computed in the usual way (by reversing the range of
126    * the similarity score results), and then rescaled to percentage values
127    * (reversing the rescaling to count values done in findSimilarities). This
128    * method is thread-safe.
129    */
 
130  0 toggle @Override
131    public MatrixI findDistances(AlignmentView seqData,
132    SimilarityParamsI options)
133    {
134  0 MatrixI result = super.findDistances(seqData, options);
135   
136  0 if (seqData.getWidth() != 0)
137    {
138  0 result.multiply(100d / seqData.getWidth());
139    }
140   
141  0 return result;
142    }
143   
144    /**
145    * Compute percentage identity scores, using the gap treatment and
146    * normalisation specified by the options parameter
147    *
148    * @param seqs
149    * @param options
150    * @return
151    */
 
152  0 toggle protected MatrixI findSimilarities(String[] seqs,
153    SimilarityParamsI options)
154    {
155    // TODO reuse code in ScoreMatrix instead somehow
156  0 double[][] values = new double[seqs.length][];
157  0 for (int row = 0; row < seqs.length; row++)
158    {
159  0 values[row] = new double[seqs.length];
160  0 for (int col = 0; col < seqs.length; col++)
161    {
162  0 double total = computePID(seqs[row], seqs[col], options);
163  0 values[row][col] = total;
164    }
165    }
166  0 return new Matrix(values);
167    }
168   
169    /**
170    * Computes a percentage identity for two sequences, using the algorithm
171    * choices specified by the options parameter
172    *
173    * @param seq1
174    * @param seq2
175    * @param options
176    * @return
177    */
 
178  13 toggle public static double computePID(String seq1, String seq2,
179    SimilarityParamsI options)
180    {
181  13 int len1 = seq1.length();
182  13 int len2 = seq2.length();
183  13 int width = Math.max(len1, len2);
184  13 int total = 0;
185  13 int divideBy = 0;
186   
187  91 for (int i = 0; i < width; i++)
188    {
189  85 if (i >= len1 || i >= len2)
190    {
191    /*
192    * off the end of one sequence; stop if we are only matching
193    * on the shorter sequence length, else treat as trailing gap
194    */
195  11 if (options.denominateByShortestLength())
196    {
197  7 break;
198    }
199  4 if (options.includeGaps())
200    {
201  4 divideBy++;
202    }
203  4 if (options.matchGaps())
204    {
205  2 total++;
206    }
207  4 continue;
208    }
209  74 char c1 = seq1.charAt(i);
210  74 char c2 = seq2.charAt(i);
211  74 boolean gap1 = Comparison.isGap(c1);
212  74 boolean gap2 = Comparison.isGap(c2);
213   
214  74 if (gap1 && gap2)
215    {
216    /*
217    * gap-gap: include if options say so, if so
218    * have to score as identity; else ignore
219    */
220  10 if (options.includeGappedColumns())
221    {
222  6 divideBy++;
223  6 total++;
224    }
225  10 continue;
226    }
227   
228  64 if (gap1 || gap2)
229    {
230    /*
231    * gap-residue: include if options say so,
232    * count as match if options say so
233    */
234  24 if (options.includeGaps())
235    {
236  24 divideBy++;
237    }
238  24 if (options.matchGaps())
239    {
240  16 total++;
241    }
242  24 continue;
243    }
244   
245    /*
246    * remaining case is gap-residue
247    */
248  40 if (toUpper(c1) == toUpper(c2))
249    {
250  25 total++;
251    }
252  40 divideBy++;
253    }
254   
255  13 return divideBy == 0 ? 0D : 100D * total / divideBy;
256    }
257   
 
258  0 toggle @Override
259    public ScoreModelI getInstance(AlignmentViewPanel avp)
260    {
261  0 return this;
262    }
263    }