Clover icon

Coverage Report

  1. Project Clover database Fri Dec 6 2024 13:47:14 GMT
  2. Package jalview.analysis.scoremodels

File SecondaryStructureDistanceModel.java

 

Coverage histogram

../../../img/srcFileCovDistChart7.png
29% of files have more coverage

Code metrics

42
100
12
1
385
239
44
0.44
8.33
12
3.67

Classes

Class Line # Actions
SecondaryStructureDistanceModel 47 100 44
0.6558441565.6%
 

Contributing tests

This file is covered by 5 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.analysis.scoremodels;
22   
23    import jalview.analysis.AlignmentUtils;
24    import jalview.api.AlignmentViewPanel;
25    import jalview.api.FeatureRenderer;
26    import jalview.api.analysis.ScoreModelI;
27    import jalview.api.analysis.SimilarityParamsI;
28    import jalview.datamodel.AlignmentAnnotation;
29    import jalview.datamodel.AlignmentView;
30    import jalview.datamodel.SeqCigar;
31    import jalview.datamodel.SequenceI;
32    import jalview.math.Matrix;
33    import jalview.math.MatrixI;
34    import jalview.util.Constants;
35    import jalview.util.MessageManager;
36   
37    import java.util.ArrayList;
38    import java.util.HashMap;
39    import java.util.HashSet;
40    import java.util.List;
41    import java.util.Map;
42    import java.util.Set;
43   
44    /* This class contains methods to calculate distance score between
45    * secondary structure annotations of the sequences.
46    */
 
47    public class SecondaryStructureDistanceModel extends DistanceScoreModel
48    {
49    private static final String NAME = "Secondary Structure Similarity";
50   
51    private ScoreMatrix ssRateMatrix;
52   
53    private String description;
54   
55    FeatureRenderer fr;
56   
57    /**
58    * Constructor
59    */
 
60  86 toggle public SecondaryStructureDistanceModel()
61    {
62   
63    }
64   
 
65  3 toggle @Override
66    public ScoreModelI getInstance(AlignmentViewPanel view)
67    {
68  3 SecondaryStructureDistanceModel instance;
69  3 try
70    {
71  3 instance = this.getClass().getDeclaredConstructor().newInstance();
72  3 instance.configureFromAlignmentView(view);
73  3 return instance;
74    } catch (InstantiationException | IllegalAccessException e)
75    {
76  0 jalview.bin.Console.errPrintln("Error in " + getClass().getName()
77    + ".getInstance(): " + e.getMessage());
78  0 return null;
79    } catch (ReflectiveOperationException roe)
80    {
81  0 return null;
82    }
83    }
84   
 
85  3 toggle boolean configureFromAlignmentView(AlignmentViewPanel view)
86   
87    {
88  3 fr = view.cloneFeatureRenderer();
89  3 return true;
90    }
91   
92    ArrayList<AlignmentAnnotation> ssForSeqs = null;
93   
 
94  3 toggle @Override
95    public SequenceI[] expandSeqData(SequenceI[] sequences,
96    AlignmentView seqData, SimilarityParamsI scoreParams,
97    List<String> labels, ArrayList<AlignmentAnnotation> ssAnnotationForSeqs)
98    {
99  3 ssForSeqs = new ArrayList<AlignmentAnnotation>();
100  3 List<SequenceI> newSequences = new ArrayList<SequenceI>();
101  3 List<SeqCigar> newCigs = new ArrayList<SeqCigar>();
102  3 int sq = 0;
103   
104  3 AlignmentAnnotation[] alignAnnotList = fr.getViewport().getAlignment()
105    .getAlignmentAnnotation();
106   
107  3 String ssSource = scoreParams.getSecondaryStructureSource();
108  3 if (ssSource == null || ssSource == "")
109    {
110  0 ssSource = Constants.SS_ALL_PROVIDERS;
111    }
112   
113    /*
114    * Add secondary structure annotations that are added to the annotation track
115    * to the map
116    */
117  3 Map<SequenceI, ArrayList<AlignmentAnnotation>> ssAlignmentAnnotationForSequences = AlignmentUtils
118    .getSequenceAssociatedAlignmentAnnotations(alignAnnotList,
119    ssSource);
120   
121  3 for (SeqCigar scig : seqData.getSequences())
122    {
123    // get the next sequence that should be bound to this scig: may be null
124  6 SequenceI alSeq = sequences[sq++];
125  6 List<AlignmentAnnotation> ssec = ssAlignmentAnnotationForSequences
126    .get(scig.getRefSeq());
127  6 if (ssec == null)
128    {
129    // not defined
130  6 newSequences.add(alSeq);
131  6 if (alSeq != null)
132    {
133    //labels.add("No Secondary Structure");
134  0 labels.add(Constants.STRUCTURE_PROVIDERS.get("None"));
135    }
136  6 SeqCigar newSeqCigar = scig; // new SeqCigar(scig);
137  6 newCigs.add(newSeqCigar);
138  6 ssForSeqs.add(null);
139    }
140    else
141    {
142  0 for (int i = 0; i < ssec.size(); i++)
143    {
144  0 if (alSeq != null)
145    {
146  0 String provider = AlignmentUtils.extractSSSourceFromAnnotationDescription(ssec.get(i));
147    //String providerAbbreviation = AlignmentUtils.getProviderKey(provider);
148  0 labels.add(provider);
149    }
150    // newSequences.add(seq);
151  0 newSequences.add(alSeq);
152  0 SeqCigar newSeqCigar = scig; // new SeqCigar(scig);
153  0 newCigs.add(newSeqCigar);
154  0 ssForSeqs.add(ssec.get(i));
155    }
156    }
157    }
158  3 ssAnnotationForSeqs.addAll(ssForSeqs);
159  3 seqData.setSequences(newCigs.toArray(new SeqCigar[0]));
160  3 return newSequences.toArray(new SequenceI[0]);
161   
162    }
163   
164    /**
165    * Calculates distance score [i][j] between each pair of protein sequences
166    * based on their secondary structure annotations (H, E, C). The final score
167    * is normalised by the number of alignment columns processed, providing an
168    * average similarity score.
169    * <p>
170    * The parameters argument can include settings for handling gap-residue
171    * aligned positions and may determine if the score calculation is based on
172    * the longer or shorter sequence in each pair. This can be important for
173    * handling partial alignments or sequences of significantly different
174    * lengths.
175    *
176    * @param seqData
177    * The aligned sequence data including secondary structure
178    * annotations.
179    * @param params
180    * Additional parameters for customising the scoring process, such as
181    * gap handling and sequence length consideration.
182    */
 
183  6 toggle @Override
184    public MatrixI findDistances(AlignmentView seqData,
185    SimilarityParamsI params)
186    {
187  6 if (ssForSeqs == null
188    || ssForSeqs.size() != seqData.getSequences().length)
189    {
190    // expandSeqData needs to be called to initialise the hash
191  3 SequenceI[] sequences = new SequenceI[seqData.getSequences().length];
192    // we throw away the new labels in this case..
193  3 expandSeqData(sequences, seqData, params, new ArrayList<String>(), new ArrayList<AlignmentAnnotation>());
194    }
195  6 SeqCigar[] seqs = seqData.getSequences();
196  6 int noseqs = seqs.length; // no of sequences
197  6 int cpwidth = 0;
198  6 double[][] similarities = new double[noseqs][noseqs]; // matrix to store
199    // similarity score
200    // secondary structure source parameter selected by the user from the drop
201    // down.
202  6 String ssSource = params.getSecondaryStructureSource();
203  6 if (ssSource == null || ssSource == "")
204    {
205  0 ssSource = Constants.SS_ALL_PROVIDERS;
206    }
207  6 ssRateMatrix = ScoreModels.getInstance().getSecondaryStructureMatrix();
208   
209    // need to get real position for view position
210  6 int[] viscont = seqData.getVisibleContigs();
211   
212    /*
213    * scan each column, compute and add to each similarity[i, j]
214    * the number of secondary structure annotation that seqi
215    * and seqj do not share
216    */
217  12 for (int vc = 0; vc < viscont.length; vc += 2)
218    {
219    // Iterates for each column position
220  30 for (int cpos = viscont[vc]; cpos <= viscont[vc + 1]; cpos++)
221    {
222  24 cpwidth++; // used to normalise the similarity score
223   
224    /*
225    * get set of sequences without gap in the current column
226    */
227  24 Set<SeqCigar> seqsWithoutGapAtCol = findSeqsWithoutGapAtColumn(seqs,
228    cpos);
229   
230    /*
231    * calculate similarity score for each secondary structure annotation on i'th and j'th
232    * sequence and add this measure to the similarities matrix
233    * for [i, j] for j > i
234    */
235  48 for (int i = 0; i < (noseqs - 1); i++)
236    {
237  24 AlignmentAnnotation aa_i = ssForSeqs.get(i);
238  24 boolean undefinedSS1 = aa_i == null;
239    // check if the sequence contains gap in the current column
240  24 boolean gap1 = !seqsWithoutGapAtCol.contains(seqs[i]);
241    // secondary structure is fetched only if the current column is not
242    // gap for the sequence
243  24 char ss1 = '*';
244  24 if (!gap1 && !undefinedSS1)
245    {
246    // fetch the position in sequence for the column and finds the
247    // corresponding secondary structure annotation
248    // TO DO - consider based on priority and displayed
249  0 int seqPosition_i = seqs[i].findPosition(cpos);
250  0 if (aa_i != null)
251  0 ss1 = AlignmentUtils.findSSAnnotationForGivenSeqposition(aa_i,
252    seqPosition_i);
253    }
254    // Iterates for each sequences
255  48 for (int j = i + 1; j < noseqs; j++)
256    {
257   
258    // check if ss is defined
259  24 AlignmentAnnotation aa_j = ssForSeqs.get(j);
260  24 boolean undefinedSS2 = aa_j == null;
261   
262    // Set similarity to max score if both SS are not defined
263  24 if (undefinedSS1 && undefinedSS2)
264    {
265  24 similarities[i][j] += ssRateMatrix.getMaximumScore();
266  24 continue;
267    }
268   
269    // Set similarity to minimum score if either one SS is not defined
270  0 else if (undefinedSS1 || undefinedSS2)
271    {
272  0 similarities[i][j] += ssRateMatrix.getMinimumScore();
273  0 continue;
274    }
275   
276  0 boolean gap2 = !seqsWithoutGapAtCol.contains(seqs[j]);
277   
278    // Variable to store secondary structure at the current column
279  0 char ss2 = '*';
280   
281  0 if (!gap2 && !undefinedSS2)
282    {
283  0 int seqPosition = seqs[j].findPosition(cpos);
284   
285  0 if (aa_j != null)
286  0 ss2 = AlignmentUtils.findSSAnnotationForGivenSeqposition(
287    aa_j, seqPosition);
288    }
289   
290  0 if ((!gap1 && !gap2) || params.includeGaps())
291    {
292    // Calculate similarity score based on the substitution matrix
293  0 double similarityScore = ssRateMatrix.getPairwiseScore(ss1,
294    ss2);
295  0 similarities[i][j] += similarityScore;
296    }
297    }
298    }
299    }
300    }
301   
302    /*
303    * normalise the similarity scores (summed over columns) by the
304    * number of visible columns used in the calculation
305    * and fill in the bottom half of the matrix
306    */
307    // TODO JAL-2424 cpwidth may be out by 1 - affects scores but not tree shape
308   
309  18 for (int i = 0; i < noseqs; i++)
310    {
311  18 for (int j = i + 1; j < noseqs; j++)
312    {
313  6 similarities[i][j] /= cpwidth;
314  6 similarities[j][i] = similarities[i][j];
315    }
316    }
317  6 return SimilarityScoreModel
318    .similarityToDistance(new Matrix(similarities));
319   
320    }
321   
322    /**
323    * Builds and returns a set containing sequences (SeqCigar) which do not have
324    * a gap at the given column position.
325    *
326    * @param seqs
327    * @param columnPosition
328    * (0..)
329    * @return
330    */
 
331  24 toggle private Set<SeqCigar> findSeqsWithoutGapAtColumn(SeqCigar[] seqs,
332    int columnPosition)
333    {
334  24 Set<SeqCigar> seqsWithoutGapAtCol = new HashSet<>();
335  24 for (SeqCigar seq : seqs)
336    {
337  48 int spos = seq.findPosition(columnPosition);
338  48 if (spos != -1)
339    {
340    /*
341    * position is not a gap
342    */
343  42 seqsWithoutGapAtCol.add(seq);
344    }
345    }
346  24 return seqsWithoutGapAtCol;
347    }
348   
 
349  167 toggle @Override
350    public String getName()
351    {
352  167 return NAME;
353    }
354   
 
355  0 toggle @Override
356    public String getDescription()
357    {
358  0 return description;
359    }
360   
 
361  4 toggle @Override
362    public boolean isDNA()
363    {
364  4 return false;
365    }
366   
 
367  2 toggle @Override
368    public boolean isProtein()
369    {
370  2 return false;
371    }
372   
 
373  6 toggle @Override
374    public boolean isSecondaryStructure()
375    {
376  6 return true;
377    }
378   
 
379  0 toggle @Override
380    public String toString()
381    {
382  0 return "Score between sequences based on similarity between binary "
383    + "vectors marking secondary structure displayed at each column";
384    }
385    }