Clover icon

Coverage Report

  1. Project Clover database Wed Dec 3 2025 16:47:11 GMT
  2. Package jalview.analysis.scoremodels

File SecondaryStructureDistanceModel.java

 

Coverage histogram

../../../img/srcFileCovDistChart9.png
13% of files have more coverage

Code metrics

44
102
12
1
395
246
45
0.44
8.5
12
3.75

Classes

Class Line # Actions
SecondaryStructureDistanceModel 47 102 45
0.892405189.2%
 

Contributing tests

This file is covered by 10 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.analysis.scoremodels;
22   
23    import jalview.analysis.AlignmentAnnotationUtils;
24    import jalview.analysis.AlignmentUtils;
25    import jalview.api.AlignmentViewPanel;
26    import jalview.api.FeatureRenderer;
27    import jalview.api.analysis.ScoreModelI;
28    import jalview.api.analysis.SimilarityParamsI;
29    import jalview.datamodel.AlignmentAnnotation;
30    import jalview.datamodel.AlignmentView;
31    import jalview.datamodel.SeqCigar;
32    import jalview.datamodel.SequenceI;
33    import jalview.math.Matrix;
34    import jalview.math.MatrixI;
35    import jalview.util.Constants;
36   
37    import java.util.ArrayList;
38    import java.util.HashMap;
39    import java.util.HashSet;
40    import java.util.List;
41    import java.util.Map;
42    import java.util.Set;
43   
44    /* This class contains methods to calculate distance score between
45    * secondary structure annotations of the sequences.
46    */
 
47    public class SecondaryStructureDistanceModel extends DistanceScoreModel
48    {
49    private static final String NAME = "Secondary Structure Similarity";
50   
51    private ScoreMatrix ssRateMatrix;
52   
53    private String description;
54   
55    FeatureRenderer fr;
56   
57    /**
58    * Constructor
59    */
 
60  104 toggle public SecondaryStructureDistanceModel()
61    {
62   
63    }
64   
 
65  8 toggle @Override
66    public ScoreModelI getInstance(AlignmentViewPanel view)
67    {
68  8 SecondaryStructureDistanceModel instance;
69  8 try
70    {
71  8 instance = this.getClass().getDeclaredConstructor().newInstance();
72  8 instance.configureFromAlignmentView(view);
73  8 return instance;
74    } catch (InstantiationException | IllegalAccessException e)
75    {
76  0 jalview.bin.Console.errPrintln("Error in " + getClass().getName()
77    + ".getInstance(): " + e.getMessage());
78  0 return null;
79    } catch (ReflectiveOperationException roe)
80    {
81  0 return null;
82    }
83    }
84   
 
85  8 toggle boolean configureFromAlignmentView(AlignmentViewPanel view)
86   
87    {
88  8 fr = view.cloneFeatureRenderer();
89  8 return true;
90    }
91   
92    ArrayList<AlignmentAnnotation> ssForSeqs = null;
93   
 
94  8 toggle @Override
95    public SequenceI[] expandSeqData(SequenceI[] sequences,
96    AlignmentView seqData, SimilarityParamsI scoreParams,
97    List<String> labels, ArrayList<AlignmentAnnotation> ssAnnotationForSeqs,
98    HashMap<Integer, String> annotationDetails)
99    {
100  8 ssForSeqs = new ArrayList<AlignmentAnnotation>();
101  8 List<SequenceI> newSequences = new ArrayList<SequenceI>();
102  8 List<SeqCigar> newCigs = new ArrayList<SeqCigar>();
103  8 int sq = 0;
104   
105  8 AlignmentAnnotation[] alignAnnotList = fr.getViewport().getAlignment()
106    .getAlignmentAnnotation();
107   
108  8 String ssSource = scoreParams.getSecondaryStructureSource();
109  8 if (ssSource == null || ssSource == "")
110    {
111  0 ssSource = Constants.SS_ALL_PROVIDERS;
112    }
113   
114    /*
115    * Add secondary structure annotations that are added to the annotation track
116    * to the map
117    */
118  8 Map<SequenceI, ArrayList<AlignmentAnnotation>> ssAlignmentAnnotationForSequences = AlignmentUtils
119    .getSequenceAssociatedAlignmentAnnotations(alignAnnotList,
120    ssSource);
121   
122  8 for (SeqCigar scig : seqData.getSequences())
123    {
124    // get the next sequence that should be bound to this scig: may be null
125  29 SequenceI alSeq = sequences[sq++];
126  29 List<AlignmentAnnotation> ssec = ssAlignmentAnnotationForSequences
127    .get(scig.getRefSeq());
128  29 if (ssec == null)
129    {
130    // not defined
131  28 newSequences.add(alSeq);
132  28 if (alSeq != null)
133    {
134    //labels.add("No Secondary Structure");
135  14 labels.add(Constants.STRUCTURE_PROVIDERS.get("None"));
136    }
137  28 SeqCigar newSeqCigar = scig; // new SeqCigar(scig);
138  28 newCigs.add(newSeqCigar);
139  28 ssForSeqs.add(null);
140    }
141    else
142    {
143  5 for (int i = 0; i < ssec.size(); i++)
144    {
145  4 if (alSeq != null)
146    {
147    // Add annotationDetails if the annotation has
148    // ANNOTATION_DETAILS property value (additional metadata)
149   
150  4 if (ssec.get(i).hasAnnotationDetailsProperty())
151    {
152    // using key = labels.size() gives the position of the node
153  0 annotationDetails.put(labels.size(), ssec.get(i).getAnnotationDetailsProperty());
154    }
155   
156  4 String provider = AlignmentAnnotationUtils
157    .extractSSSourceFromAnnotationDescription(ssec.get(i));
158  4 labels.add(provider);
159    }
160  4 newSequences.add(alSeq);
161  4 SeqCigar newSeqCigar = scig; // new SeqCigar(scig);
162  4 newCigs.add(newSeqCigar);
163  4 ssForSeqs.add(ssec.get(i));
164    }
165    }
166    }
167  8 ssAnnotationForSeqs.addAll(ssForSeqs);
168  8 seqData.setSequences(newCigs.toArray(new SeqCigar[0]));
169  8 return newSequences.toArray(new SequenceI[0]);
170   
171    }
172   
173    /**
174    * Calculates distance score [i][j] between each pair of protein sequences
175    * based on their secondary structure annotations (H, E, C). The final score
176    * is normalised by the number of alignment columns processed, providing an
177    * average similarity score.
178    * <p>
179    * The parameters argument can include settings for handling gap-residue
180    * aligned positions and may determine if the score calculation is based on
181    * the longer or shorter sequence in each pair. This can be important for
182    * handling partial alignments or sequences of significantly different
183    * lengths.
184    *
185    * @param seqData
186    * The aligned sequence data including secondary structure
187    * annotations.
188    * @param params
189    * Additional parameters for customising the scoring process, such as
190    * gap handling and sequence length consideration.
191    */
 
192  11 toggle @Override
193    public MatrixI findDistances(AlignmentView seqData,
194    SimilarityParamsI params)
195    {
196  11 if (ssForSeqs == null
197    || ssForSeqs.size() != seqData.getSequences().length)
198    {
199    // expandSeqData needs to be called to initialise the hash
200  7 SequenceI[] sequences = new SequenceI[seqData.getSequences().length];
201    // we throw away the new labels in this case..
202  7 expandSeqData(sequences, seqData, params, new ArrayList<String>(),
203    new ArrayList<AlignmentAnnotation>(), new HashMap<Integer, String>());
204    }
205  11 SeqCigar[] seqs = seqData.getSequences();
206  11 int noseqs = seqs.length; // no of sequences
207  11 int cpwidth = 0;
208  11 double[][] similarities = new double[noseqs][noseqs]; // matrix to store
209    // similarity score
210    // secondary structure source parameter selected by the user from the drop
211    // down.
212  11 String ssSource = params.getSecondaryStructureSource();
213  11 if (ssSource == null || ssSource == "")
214    {
215  0 ssSource = Constants.SS_ALL_PROVIDERS;
216    }
217  11 ssRateMatrix = ScoreModels.getInstance().getSecondaryStructureMatrix();
218   
219    // need to get real position for view position
220  11 int[] viscont = seqData.getVisibleContigs();
221   
222    /*
223    * scan each column, compute and add to each similarity[i, j]
224    * the number of secondary structure annotation that seqi
225    * and seqj do not share
226    */
227  22 for (int vc = 0; vc < viscont.length; vc += 2)
228    {
229    // Iterates for each column position
230  152 for (int cpos = viscont[vc]; cpos <= viscont[vc + 1]; cpos++)
231    {
232  141 cpwidth++; // used to normalise the similarity score
233   
234    /*
235    * get set of sequences without gap in the current column
236    */
237  141 Set<SeqCigar> seqsWithoutGapAtCol = findSeqsWithoutGapAtColumn(seqs,
238    cpos);
239   
240    /*
241    * calculate similarity score for each secondary structure annotation on i'th and j'th
242    * sequence and add this measure to the similarities matrix
243    * for [i, j] for j > i
244    */
245  1898 for (int i = 0; i < (noseqs - 1); i++)
246    {
247  1757 AlignmentAnnotation aa_i = ssForSeqs.get(i);
248  1757 boolean undefinedSS1 = aa_i == null;
249    // check if the sequence contains gap in the current column
250  1757 boolean gap1 = !seqsWithoutGapAtCol.contains(seqs[i]);
251    // secondary structure is fetched only if the current column is not
252    // gap for the sequence
253  1757 char ss1 = '*';
254  1757 if (!gap1 && !undefinedSS1)
255    {
256    // fetch the position in sequence for the column and finds the
257    // corresponding secondary structure annotation
258    // TO DO - consider based on priority and displayed
259  376 int seqPosition_i = seqs[i].findPosition(cpos);
260  376 if (aa_i != null)
261  376 ss1 = AlignmentUtils.findSSAnnotationForGivenSeqposition(aa_i,
262    seqPosition_i);
263    }
264    // Iterates for each sequences
265  17250 for (int j = i + 1; j < noseqs; j++)
266    {
267   
268    // check if ss is defined
269  15493 AlignmentAnnotation aa_j = ssForSeqs.get(j);
270  15493 boolean undefinedSS2 = aa_j == null;
271   
272    // Set similarity to max score if both SS are not defined
273  15493 if (undefinedSS1 && undefinedSS2)
274    {
275  9231 similarities[i][j] += ssRateMatrix.getMaximumScore();
276  9231 continue;
277    }
278   
279    // Set similarity to minimum score if either one SS is not defined
280  6262 else if (undefinedSS1 || undefinedSS2)
281    {
282  5656 similarities[i][j] += ssRateMatrix.getMinimumScore();
283  5656 continue;
284    }
285   
286  606 boolean gap2 = !seqsWithoutGapAtCol.contains(seqs[j]);
287   
288    // Variable to store secondary structure at the current column
289  606 char ss2 = '*';
290   
291  606 if (!gap2 && !undefinedSS2)
292    {
293  564 int seqPosition = seqs[j].findPosition(cpos);
294   
295  564 if (aa_j != null)
296  564 ss2 = AlignmentUtils.findSSAnnotationForGivenSeqposition(
297    aa_j, seqPosition);
298    }
299   
300  606 if ((!gap1 && !gap2) || params.includeGaps())
301    {
302    // Calculate similarity score based on the substitution matrix
303  606 double similarityScore = ssRateMatrix.getPairwiseScore(ss1,
304    ss2);
305  606 similarities[i][j] += similarityScore;
306    }
307    }
308    }
309    }
310    }
311   
312    /*
313    * normalise the similarity scores (summed over columns) by the
314    * number of visible columns used in the calculation
315    * and fill in the bottom half of the matrix
316    */
317    // TODO JAL-2424 cpwidth may be out by 1 - affects scores but not tree shape
318   
319  49 for (int i = 0; i < noseqs; i++)
320    {
321  201 for (int j = i + 1; j < noseqs; j++)
322    {
323  163 similarities[i][j] /= cpwidth;
324  163 similarities[j][i] = similarities[i][j];
325    }
326    }
327  11 return SimilarityScoreModel
328    .similarityToDistance(new Matrix(similarities));
329   
330    }
331   
332    /**
333    * Builds and returns a set containing sequences (SeqCigar) which do not have
334    * a gap at the given column position.
335    *
336    * @param seqs
337    * @param columnPosition
338    * (0..)
339    * @return
340    */
 
341  141 toggle private Set<SeqCigar> findSeqsWithoutGapAtColumn(SeqCigar[] seqs,
342    int columnPosition)
343    {
344  141 Set<SeqCigar> seqsWithoutGapAtCol = new HashSet<>();
345  141 for (SeqCigar seq : seqs)
346    {
347  1898 int spos = seq.findPosition(columnPosition);
348  1898 if (spos != -1)
349    {
350    /*
351    * position is not a gap
352    */
353  1587 seqsWithoutGapAtCol.add(seq);
354    }
355    }
356  141 return seqsWithoutGapAtCol;
357    }
358   
 
359  189 toggle @Override
360    public String getName()
361    {
362  189 return NAME;
363    }
364   
 
365  0 toggle @Override
366    public String getDescription()
367    {
368  0 return description;
369    }
370   
 
371  4 toggle @Override
372    public boolean isDNA()
373    {
374  4 return false;
375    }
376   
 
377  2 toggle @Override
378    public boolean isProtein()
379    {
380  2 return false;
381    }
382   
 
383  6 toggle @Override
384    public boolean isSecondaryStructure()
385    {
386  6 return true;
387    }
388   
 
389  0 toggle @Override
390    public String toString()
391    {
392  0 return "Score between sequences based on similarity between binary "
393    + "vectors marking secondary structure displayed at each column";
394    }
395    }