Clover icon

Coverage Report

  1. Project Clover database Mon Nov 11 2024 20:42:03 GMT
  2. Package jalview.analysis.scoremodels

File FeatureDistanceModel.java

 

Coverage histogram

../../../img/srcFileCovDistChart9.png
12% of files have more coverage

Code metrics

20
55
10
1
245
149
25
0.45
5.5
10
2.5

Classes

Class Line # Actions
FeatureDistanceModel 40 55 25
0.8823529588.2%
 

Contributing tests

This file is covered by 7 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.analysis.scoremodels;
22   
23    import jalview.api.AlignmentViewPanel;
24    import jalview.api.FeatureRenderer;
25    import jalview.api.analysis.ScoreModelI;
26    import jalview.api.analysis.SimilarityParamsI;
27    import jalview.datamodel.AlignmentView;
28    import jalview.datamodel.SeqCigar;
29    import jalview.datamodel.SequenceFeature;
30    import jalview.math.Matrix;
31    import jalview.math.MatrixI;
32    import jalview.util.SetUtils;
33   
34    import java.util.HashMap;
35    import java.util.HashSet;
36    import java.util.List;
37    import java.util.Map;
38    import java.util.Set;
39   
 
40    public class FeatureDistanceModel extends DistanceScoreModel
41    {
42    private static final String NAME = "Sequence Feature Similarity";
43   
44    private String description;
45   
46    FeatureRenderer fr;
47   
48    /**
49    * Constructor
50    */
 
51  89 toggle public FeatureDistanceModel()
52    {
53    }
54   
 
55  5 toggle @Override
56    public ScoreModelI getInstance(AlignmentViewPanel view)
57    {
58  5 FeatureDistanceModel instance;
59  5 try
60    {
61  5 instance = this.getClass().getDeclaredConstructor().newInstance();
62  5 instance.configureFromAlignmentView(view);
63  5 return instance;
64    } catch (InstantiationException | IllegalAccessException e)
65    {
66  0 jalview.bin.Console.errPrintln("Error in " + getClass().getName()
67    + ".getInstance(): " + e.getMessage());
68  0 return null;
69    } catch (ReflectiveOperationException roe)
70    {
71  0 return null;
72    }
73    }
74   
 
75  5 toggle boolean configureFromAlignmentView(AlignmentViewPanel view)
76   
77    {
78  5 fr = view.cloneFeatureRenderer();
79  5 return true;
80    }
81   
82    /**
83    * Calculates a distance measure [i][j] between each pair of sequences as the
84    * average number of features they have but do not share. That is, find the
85    * features each sequence pair has at each column, ignore feature types they
86    * have in common, and count the rest. The totals are normalised by the number
87    * of columns processed.
88    * <p>
89    * The parameters argument provides settings for treatment of gap-residue
90    * aligned positions, and whether the score is over the longer or shorter of
91    * each pair of sequences
92    *
93    * @param seqData
94    * @param params
95    */
 
96  6 toggle @Override
97    public MatrixI findDistances(AlignmentView seqData,
98    SimilarityParamsI params)
99    {
100  6 SeqCigar[] seqs = seqData.getSequences();
101  6 int noseqs = seqs.length;
102  6 int cpwidth = 0;// = seqData.getWidth();
103  6 double[][] distances = new double[noseqs][noseqs];
104  6 List<String> dft = null;
105  6 if (fr != null)
106    {
107  6 dft = fr.getDisplayedFeatureTypes();
108    }
109  6 if (dft == null || dft.isEmpty())
110    {
111  0 return new Matrix(distances);
112    }
113   
114    // need to get real position for view position
115  6 int[] viscont = seqData.getVisibleContigs();
116   
117    /*
118    * scan each column, compute and add to each distance[i, j]
119    * the number of feature types that seqi and seqj do not share
120    */
121  12 for (int vc = 0; vc < viscont.length; vc += 2)
122    {
123  32 for (int cpos = viscont[vc]; cpos <= viscont[vc + 1]; cpos++)
124    {
125  26 cpwidth++;
126   
127    /*
128    * first record feature types in this column for each sequence
129    */
130  26 Map<SeqCigar, Set<String>> sfap = findFeatureTypesAtColumn(seqs,
131    cpos);
132   
133    /*
134    * count feature types on either i'th or j'th sequence but not both
135    * and add this 'distance' measure to the total for [i, j] for j > i
136    */
137  70 for (int i = 0; i < (noseqs - 1); i++)
138    {
139  115 for (int j = i + 1; j < noseqs; j++)
140    {
141  71 SeqCigar sc1 = seqs[i];
142  71 SeqCigar sc2 = seqs[j];
143  71 Set<String> set1 = sfap.get(sc1);
144  71 Set<String> set2 = sfap.get(sc2);
145  71 boolean gap1 = set1 == null;
146  71 boolean gap2 = set2 == null;
147   
148    /*
149    * gap-gap always scores zero
150    * residue-residue is always scored
151    * include gap-residue score if params say to do so
152    */
153  71 if ((!gap1 && !gap2) || params.includeGaps())
154    {
155  67 int seqDistance = SetUtils.countDisjunction(set1, set2);
156  67 distances[i][j] += seqDistance;
157    }
158    }
159    }
160    }
161    }
162   
163    /*
164    * normalise the distance scores (summed over columns) by the
165    * number of visible columns used in the calculation
166    * and fill in the bottom half of the matrix
167    */
168    // TODO JAL-2424 cpwidth may be out by 1 - affects scores but not tree shape
169  24 for (int i = 0; i < noseqs; i++)
170    {
171  39 for (int j = i + 1; j < noseqs; j++)
172    {
173  21 distances[i][j] /= cpwidth;
174  21 distances[j][i] = distances[i][j];
175    }
176    }
177  6 return new Matrix(distances);
178    }
179   
180    /**
181    * Builds and returns a map containing a (possibly empty) list (one per
182    * SeqCigar) of visible feature types at the given column position. The map
183    * does not include entries for features which straddle a gapped column
184    * positions.
185    *
186    * @param seqs
187    * @param columnPosition
188    * (0..)
189    * @return
190    */
 
191  26 toggle protected Map<SeqCigar, Set<String>> findFeatureTypesAtColumn(
192    SeqCigar[] seqs, int columnPosition)
193    {
194  26 Map<SeqCigar, Set<String>> sfap = new HashMap<>();
195  26 for (SeqCigar seq : seqs)
196    {
197  70 int spos = seq.findPosition(columnPosition);
198  70 if (spos != -1)
199    {
200    /*
201    * position is not a gap
202    */
203  60 Set<String> types = new HashSet<>();
204  60 List<SequenceFeature> sfs = fr
205    .findFeaturesAtResidue(seq.getRefSeq(), spos, spos);
206  60 for (SequenceFeature sf : sfs)
207    {
208  70 types.add(sf.getType());
209    }
210  60 sfap.put(seq, types);
211    }
212    }
213  26 return sfap;
214    }
215   
 
216  170 toggle @Override
217    public String getName()
218    {
219  170 return NAME;
220    }
221   
 
222  0 toggle @Override
223    public String getDescription()
224    {
225  0 return description;
226    }
227   
 
228  4 toggle @Override
229    public boolean isDNA()
230    {
231  4 return true;
232    }
233   
 
234  2 toggle @Override
235    public boolean isProtein()
236    {
237  2 return true;
238    }
239   
 
240  0 toggle @Override
241    public String toString()
242    {
243  0 return "Score between sequences based on hamming distance between binary vectors marking features displayed at each column";
244    }
245    }