Clover icon

jalviewX

  1. Project Clover database Wed Oct 31 2018 15:13:58 GMT
  2. Package jalview.analysis.scoremodels

File FeatureDistanceModel.java

 

Coverage histogram

../../../img/srcFileCovDistChart9.png
12% of files have more coverage

Code metrics

20
54
10
1
242
146
24
0.44
5.4
10
2.4

Classes

Class Line # Actions
FeatureDistanceModel 40 54 24 13
0.845238184.5%
 

Contributing tests

This file is covered by 6 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.analysis.scoremodels;
22   
23    import jalview.api.AlignmentViewPanel;
24    import jalview.api.FeatureRenderer;
25    import jalview.api.analysis.ScoreModelI;
26    import jalview.api.analysis.SimilarityParamsI;
27    import jalview.datamodel.AlignmentView;
28    import jalview.datamodel.SeqCigar;
29    import jalview.datamodel.SequenceFeature;
30    import jalview.math.Matrix;
31    import jalview.math.MatrixI;
32    import jalview.util.SetUtils;
33   
34    import java.util.HashMap;
35    import java.util.HashSet;
36    import java.util.List;
37    import java.util.Map;
38    import java.util.Set;
39   
 
40    public class FeatureDistanceModel extends DistanceScoreModel
41    {
42    private static final String NAME = "Sequence Feature Similarity";
43   
44    private String description;
45   
46    FeatureRenderer fr;
47   
48    /**
49    * Constructor
50    */
 
51  11 toggle public FeatureDistanceModel()
52    {
53    }
54   
 
55  5 toggle @Override
56    public ScoreModelI getInstance(AlignmentViewPanel view)
57    {
58  5 FeatureDistanceModel instance;
59  5 try
60    {
61  5 instance = this.getClass().newInstance();
62  5 instance.configureFromAlignmentView(view);
63  5 return instance;
64    } catch (InstantiationException | IllegalAccessException e)
65    {
66  0 System.err.println("Error in " + getClass().getName()
67    + ".getInstance(): " + e.getMessage());
68  0 return null;
69    }
70    }
71   
 
72  5 toggle boolean configureFromAlignmentView(AlignmentViewPanel view)
73   
74    {
75  5 fr = view.cloneFeatureRenderer();
76  5 return true;
77    }
78   
79    /**
80    * Calculates a distance measure [i][j] between each pair of sequences as the
81    * average number of features they have but do not share. That is, find the
82    * features each sequence pair has at each column, ignore feature types they
83    * have in common, and count the rest. The totals are normalised by the number
84    * of columns processed.
85    * <p>
86    * The parameters argument provides settings for treatment of gap-residue
87    * aligned positions, and whether the score is over the longer or shorter of
88    * each pair of sequences
89    *
90    * @param seqData
91    * @param params
92    */
 
93  6 toggle @Override
94    public MatrixI findDistances(AlignmentView seqData,
95    SimilarityParamsI params)
96    {
97  6 SeqCigar[] seqs = seqData.getSequences();
98  6 int noseqs = seqs.length;
99  6 int cpwidth = 0;// = seqData.getWidth();
100  6 double[][] distances = new double[noseqs][noseqs];
101  6 List<String> dft = null;
102  6 if (fr != null)
103    {
104  6 dft = fr.getDisplayedFeatureTypes();
105    }
106  6 if (dft == null || dft.isEmpty())
107    {
108  0 return new Matrix(distances);
109    }
110   
111    // need to get real position for view position
112  6 int[] viscont = seqData.getVisibleContigs();
113   
114    /*
115    * scan each column, compute and add to each distance[i, j]
116    * the number of feature types that seqi and seqj do not share
117    */
118  12 for (int vc = 0; vc < viscont.length; vc += 2)
119    {
120  32 for (int cpos = viscont[vc]; cpos <= viscont[vc + 1]; cpos++)
121    {
122  26 cpwidth++;
123   
124    /*
125    * first record feature types in this column for each sequence
126    */
127  26 Map<SeqCigar, Set<String>> sfap = findFeatureTypesAtColumn(seqs,
128    cpos);
129   
130    /*
131    * count feature types on either i'th or j'th sequence but not both
132    * and add this 'distance' measure to the total for [i, j] for j > i
133    */
134  70 for (int i = 0; i < (noseqs - 1); i++)
135    {
136  115 for (int j = i + 1; j < noseqs; j++)
137    {
138  71 SeqCigar sc1 = seqs[i];
139  71 SeqCigar sc2 = seqs[j];
140  71 Set<String> set1 = sfap.get(sc1);
141  71 Set<String> set2 = sfap.get(sc2);
142  71 boolean gap1 = set1 == null;
143  71 boolean gap2 = set2 == null;
144   
145    /*
146    * gap-gap always scores zero
147    * residue-residue is always scored
148    * include gap-residue score if params say to do so
149    */
150  71 if ((!gap1 && !gap2) || params.includeGaps())
151    {
152  67 int seqDistance = SetUtils.countDisjunction(set1, set2);
153  67 distances[i][j] += seqDistance;
154    }
155    }
156    }
157    }
158    }
159   
160    /*
161    * normalise the distance scores (summed over columns) by the
162    * number of visible columns used in the calculation
163    * and fill in the bottom half of the matrix
164    */
165    // TODO JAL-2424 cpwidth may be out by 1 - affects scores but not tree shape
166  24 for (int i = 0; i < noseqs; i++)
167    {
168  39 for (int j = i + 1; j < noseqs; j++)
169    {
170  21 distances[i][j] /= cpwidth;
171  21 distances[j][i] = distances[i][j];
172    }
173    }
174  6 return new Matrix(distances);
175    }
176   
177    /**
178    * Builds and returns a map containing a (possibly empty) list (one per
179    * SeqCigar) of visible feature types at the given column position. The map
180    * does not include entries for features which straddle a gapped column
181    * positions.
182    *
183    * @param seqs
184    * @param columnPosition
185    * (0..)
186    * @return
187    */
 
188  26 toggle protected Map<SeqCigar, Set<String>> findFeatureTypesAtColumn(
189    SeqCigar[] seqs, int columnPosition)
190    {
191  26 Map<SeqCigar, Set<String>> sfap = new HashMap<SeqCigar, Set<String>>();
192  26 for (SeqCigar seq : seqs)
193    {
194  70 int spos = seq.findPosition(columnPosition);
195  70 if (spos != -1)
196    {
197    /*
198    * position is not a gap
199    */
200  60 Set<String> types = new HashSet<String>();
201  60 List<SequenceFeature> sfs = fr.findFeaturesAtResidue(
202    seq.getRefSeq(), spos);
203  60 for (SequenceFeature sf : sfs)
204    {
205  70 types.add(sf.getType());
206    }
207  60 sfap.put(seq, types);
208    }
209    }
210  26 return sfap;
211    }
212   
 
213  8 toggle @Override
214    public String getName()
215    {
216  8 return NAME;
217    }
218   
 
219  0 toggle @Override
220    public String getDescription()
221    {
222  0 return description;
223    }
224   
 
225  0 toggle @Override
226    public boolean isDNA()
227    {
228  0 return true;
229    }
230   
 
231  0 toggle @Override
232    public boolean isProtein()
233    {
234  0 return true;
235    }
236   
 
237  0 toggle @Override
238    public String toString()
239    {
240  0 return "Score between sequences based on hamming distance between binary vectors marking features displayed at each column";
241    }
242    }