Clover icon

Coverage Report

  1. Project Clover database Wed Nov 6 2024 14:47:21 GMT
  2. Package jalview.analysis.scoremodels

File SecondaryStructureDistanceModel.java

 

Coverage histogram

../../../img/srcFileCovDistChart7.png
29% of files have more coverage

Code metrics

44
93
13
1
416
240
43
0.46
7.15
13
3.31

Classes

Class Line # Actions
SecondaryStructureDistanceModel 45 93 43
0.6464%
 

Contributing tests

This file is covered by 5 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.analysis.scoremodels;
22   
23    import jalview.analysis.AlignmentUtils;
24    import jalview.api.AlignmentViewPanel;
25    import jalview.api.FeatureRenderer;
26    import jalview.api.analysis.ScoreModelI;
27    import jalview.api.analysis.SimilarityParamsI;
28    import jalview.datamodel.AlignmentAnnotation;
29    import jalview.datamodel.AlignmentView;
30    import jalview.datamodel.Annotation;
31    import jalview.datamodel.SeqCigar;
32    import jalview.math.Matrix;
33    import jalview.math.MatrixI;
34    import jalview.util.Constants;
35    import jalview.util.SetUtils;
36   
37    import java.util.HashMap;
38    import java.util.HashSet;
39    import java.util.Map;
40    import java.util.Set;
41   
42    /* This class contains methods to calculate distance score between
43    * secondary structure annotations of the sequences.
44    */
 
45    public class SecondaryStructureDistanceModel extends DistanceScoreModel
46    {
47    private static final String NAME = "Secondary Structure Similarity";
48   
49    private ScoreMatrix ssRateMatrix;
50   
51    private String description;
52   
53    FeatureRenderer fr;
54   
55    /**
56    * Constructor
57    */
 
58  92 toggle public SecondaryStructureDistanceModel()
59    {
60   
61    }
62   
 
63  3 toggle @Override
64    public ScoreModelI getInstance(AlignmentViewPanel view)
65    {
66  3 SecondaryStructureDistanceModel instance;
67  3 try
68    {
69  3 instance = this.getClass().getDeclaredConstructor().newInstance();
70  3 instance.configureFromAlignmentView(view);
71  3 return instance;
72    } catch (InstantiationException | IllegalAccessException e)
73    {
74  0 jalview.bin.Console.errPrintln("Error in " + getClass().getName()
75    + ".getInstance(): " + e.getMessage());
76  0 return null;
77    } catch (ReflectiveOperationException roe)
78    {
79  0 return null;
80    }
81    }
82   
 
83  3 toggle boolean configureFromAlignmentView(AlignmentViewPanel view)
84   
85    {
86  3 fr = view.cloneFeatureRenderer();
87  3 return true;
88    }
89   
90    /**
91    * Calculates distance score [i][j] between each pair of protein sequences
92    * based on their secondary structure annotations (H, E, C). The final score
93    * is normalised by the number of alignment columns processed, providing an
94    * average similarity score.
95    * <p>
96    * The parameters argument can include settings for handling gap-residue
97    * aligned positions and may determine if the score calculation is based on
98    * the longer or shorter sequence in each pair. This can be important for
99    * handling partial alignments or sequences of significantly different
100    * lengths.
101    *
102    * @param seqData
103    * The aligned sequence data including secondary structure
104    * annotations.
105    * @param params
106    * Additional parameters for customising the scoring process, such as
107    * gap handling and sequence length consideration.
108    */
 
109  6 toggle @Override
110    public MatrixI findDistances(AlignmentView seqData,
111    SimilarityParamsI params)
112    {
113   
114  6 SeqCigar[] seqs = seqData.getSequences();
115  6 int noseqs = seqs.length; // no of sequences
116  6 int cpwidth = 0; // = seqData.getWidth();
117  6 double[][] similarities = new double[noseqs][noseqs]; // matrix to store
118    // similarity score
119    // secondary structure source parameter selected by the user from the drop
120    // down.
121  6 String ssSource = params.getSecondaryStructureSource();
122  6 ssRateMatrix = ScoreModels.getInstance().getSecondaryStructureMatrix();
123   
124    // defining the default value for secondary structure source as 3d
125    // structures
126    // or JPred if user selected JPred
127  6 String selectedSSSource = Constants.SS_ANNOTATION_LABEL;
128  6 if (ssSource.equals(Constants.SECONDARY_STRUCTURE_LABELS
129    .get(Constants.SS_ANNOTATION_FROM_JPRED_LABEL)))
130    {
131  0 selectedSSSource = Constants.SS_ANNOTATION_FROM_JPRED_LABEL;
132    }
133   
134    // need to get real position for view position
135  6 int[] viscont = seqData.getVisibleContigs();
136   
137    /*
138    * Add secondary structure annotations that are added to the annotation track
139    * to the map
140    */
141  6 Map<String, HashSet<String>> ssAlignmentAnnotationForSequences = new HashMap<String, HashSet<String>>();
142   
143  6 AlignmentAnnotation[] alignAnnotList = fr.getViewport().getAlignment()
144    .getAlignmentAnnotation();
145   
146  6 if (alignAnnotList.length > 0)
147    {
148  6 for (AlignmentAnnotation aa : alignAnnotList)
149    {
150  24 if (aa.sequenceRef==null)
151    {
152  24 continue;
153    }
154  0 if (selectedSSSource.equals(aa.label))
155    {
156  0 ssAlignmentAnnotationForSequences
157    .computeIfAbsent(aa.sequenceRef.getName(),
158    k -> new HashSet<>())
159    .add(aa.description);
160    }
161    }
162    }
163   
164    /*
165    * Get the set of sequences which are not considered for the calculation.
166    * Following sequences are added:
167    * 1. Sequences without a defined secondary structure from the selected
168    * source.
169    * 2. Sequences whose secondary structure annotations are not added to
170    * the annotation track
171    */
172  6 Set<SeqCigar> seqsWithUndefinedSS = findSeqsWithUndefinedSS(seqs,
173    ssAlignmentAnnotationForSequences);
174   
175    /*
176    * scan each column, compute and add to each similarity[i, j]
177    * the number of secondary structure annotation that seqi
178    * and seqj do not share
179    */
180  12 for (int vc = 0; vc < viscont.length; vc += 2)
181    {
182    // Iterates for each column position
183  30 for (int cpos = viscont[vc]; cpos <= viscont[vc + 1]; cpos++)
184    {
185  24 cpwidth++; // used to normalise the similarity score
186   
187    /*
188    * get set of sequences without gap in the current column
189    */
190  24 Set<SeqCigar> seqsWithoutGapAtCol = findSeqsWithoutGapAtColumn(seqs,
191    cpos);
192   
193    /*
194    * calculate similarity score for each secondary structure annotation on i'th and j'th
195    * sequence and add this measure to the similarities matrix
196    * for [i, j] for j > i
197    */
198  48 for (int i = 0; i < (noseqs - 1); i++)
199    {
200    // Iterates for each sequences
201  48 for (int j = i + 1; j < noseqs; j++)
202    {
203  24 SeqCigar sc1 = seqs[i];
204  24 SeqCigar sc2 = seqs[j];
205   
206    // check if ss is defined
207  24 boolean undefinedSS1 = seqsWithUndefinedSS.contains(sc1);
208  24 boolean undefinedSS2 = seqsWithUndefinedSS.contains(sc2);
209   
210    // Set similarity to max score if both SS are not defined
211  24 if (undefinedSS1 && undefinedSS2)
212    {
213  24 similarities[i][j] += ssRateMatrix.getMaximumScore();
214  24 continue;
215    }
216   
217    // Set similarity to minimum score if either one SS is not defined
218  0 else if (undefinedSS1 || undefinedSS2)
219    {
220  0 similarities[i][j] += ssRateMatrix.getMinimumScore();
221  0 continue;
222    }
223   
224    // check if the sequence contains gap in the current column
225  0 boolean gap1 = !seqsWithoutGapAtCol.contains(sc1);
226  0 boolean gap2 = !seqsWithoutGapAtCol.contains(sc2);
227   
228    // Variable to store secondary structure at the current column
229  0 char ss1 = '*';
230  0 char ss2 = '*';
231   
232    // secondary structure is fetched only if the current column is not
233    // gap for the sequence
234  0 if (!gap1 && !undefinedSS1)
235    {
236    // fetch the position in sequence for the column and finds the
237    // corresponding secondary structure annotation
238    // TO DO - consider based on priority and displayed
239  0 int seqPosition = seqs[i].findPosition(cpos);
240  0 AlignmentAnnotation[] aa = seqs[i].getRefSeq()
241    .getAnnotation(selectedSSSource);
242  0 if (aa != null)
243  0 ss1 = AlignmentUtils.findSSAnnotationForGivenSeqposition(
244    aa[0], seqPosition);
245    }
246   
247  0 if (!gap2 && !undefinedSS2)
248    {
249  0 int seqPosition = seqs[j].findPosition(cpos);
250  0 AlignmentAnnotation[] aa = seqs[j].getRefSeq()
251    .getAnnotation(selectedSSSource);
252  0 if (aa != null)
253  0 ss2 = AlignmentUtils.findSSAnnotationForGivenSeqposition(
254    aa[0], seqPosition);
255    }
256   
257  0 if ((!gap1 && !gap2) || params.includeGaps())
258    {
259    // Calculate similarity score based on the substitution matrix
260  0 double similarityScore = ssRateMatrix.getPairwiseScore(ss1,
261    ss2);
262  0 similarities[i][j] += similarityScore;
263    }
264    }
265    }
266    }
267    }
268   
269    /*
270    * normalise the similarity scores (summed over columns) by the
271    * number of visible columns used in the calculation
272    * and fill in the bottom half of the matrix
273    */
274    // TODO JAL-2424 cpwidth may be out by 1 - affects scores but not tree shape
275   
276  18 for (int i = 0; i < noseqs; i++)
277    {
278  18 for (int j = i + 1; j < noseqs; j++)
279    {
280  6 similarities[i][j] /= cpwidth;
281  6 similarities[j][i] = similarities[i][j];
282    }
283    }
284  6 return ssRateMatrix.similarityToDistance(new Matrix(similarities));
285   
286    }
287   
288    /**
289    * Builds and returns a set containing sequences (SeqCigar) which do not have
290    * a gap at the given column position.
291    *
292    * @param seqs
293    * @param columnPosition
294    * (0..)
295    * @return
296    */
 
297  24 toggle private Set<SeqCigar> findSeqsWithoutGapAtColumn(SeqCigar[] seqs,
298    int columnPosition)
299    {
300  24 Set<SeqCigar> seqsWithoutGapAtCol = new HashSet<>();
301  24 for (SeqCigar seq : seqs)
302    {
303  48 int spos = seq.findPosition(columnPosition);
304  48 if (spos != -1)
305    {
306    /*
307    * position is not a gap
308    */
309  42 seqsWithoutGapAtCol.add(seq);
310    }
311    }
312  24 return seqsWithoutGapAtCol;
313    }
314   
315    /**
316    * Builds and returns a set containing sequences (SeqCigar) which are not
317    * considered for the similarity calculation. Following sequences are added:
318    * 1. Sequences without a defined secondary structure from the selected
319    * source. 2. Sequences whose secondary structure annotations are not added to
320    * the annotation track
321    *
322    * @param seqs
323    * @param ssAlignmentAnnotationForSequences
324    * @return
325    */
 
326  6 toggle private Set<SeqCigar> findSeqsWithUndefinedSS(SeqCigar[] seqs,
327    Map<String, HashSet<String>> ssAlignmentAnnotationForSequences)
328    {
329  6 Set<SeqCigar> seqsWithUndefinedSS = new HashSet<>();
330  6 for (SeqCigar seq : seqs)
331    {
332  12 if (isSSUndefinedOrNotAdded(seq, ssAlignmentAnnotationForSequences))
333    {
334  12 seqsWithUndefinedSS.add(seq);
335    }
336    }
337  6 return seqsWithUndefinedSS;
338    }
339   
340    /**
341    * Returns true if a sequence (SeqCigar) should not be considered for the
342    * similarity calculation. Following conditions are checked: 1. Sequence
343    * without a defined secondary structure from the selected source. 2.
344    * Sequences whose secondary structure annotations are not added to the
345    * annotation track
346    *
347    * @param seq
348    * @param ssAlignmentAnnotationForSequences
349    * @return
350    */
 
351  12 toggle private boolean isSSUndefinedOrNotAdded(SeqCigar seq,
352    Map<String, HashSet<String>> ssAlignmentAnnotationForSequences)
353    {
354  12 for (String label : Constants.SECONDARY_STRUCTURE_LABELS.keySet())
355    {
356  24 AlignmentAnnotation[] annotations = seq.getRefSeq()
357    .getAnnotation(label);
358  24 if (annotations != null)
359    {
360  6 for (AlignmentAnnotation annotation : annotations)
361    {
362  6 HashSet<String> descriptionSet = ssAlignmentAnnotationForSequences
363    .get(annotation.sequenceRef.getName());
364  6 if (descriptionSet != null)
365    {
366  0 if (descriptionSet.contains(annotation.description))
367    {
368    // Secondary structure annotation is present and
369    // added to the track, no need to add seq
370  0 return false;
371    }
372    }
373    }
374    }
375    }
376    // Either annotations are undefined or not added to the track
377  12 return true;
378    }
379   
 
380  179 toggle @Override
381    public String getName()
382    {
383  179 return NAME;
384    }
385   
 
386  0 toggle @Override
387    public String getDescription()
388    {
389  0 return description;
390    }
391   
 
392  4 toggle @Override
393    public boolean isDNA()
394    {
395  4 return false;
396    }
397   
 
398  2 toggle @Override
399    public boolean isProtein()
400    {
401  2 return false;
402    }
403   
 
404  6 toggle @Override
405    public boolean isSecondaryStructure()
406    {
407  6 return true;
408    }
409   
 
410  0 toggle @Override
411    public String toString()
412    {
413  0 return "Score between sequences based on similarity between binary "
414    + "vectors marking secondary structure displayed at each column";
415    }
416    }