1 |
|
|
2 |
|
|
3 |
|
|
4 |
|
|
5 |
|
|
6 |
|
|
7 |
|
|
8 |
|
|
9 |
|
|
10 |
|
|
11 |
|
|
12 |
|
|
13 |
|
|
14 |
|
|
15 |
|
|
16 |
|
|
17 |
|
|
18 |
|
|
19 |
|
|
20 |
|
|
21 |
|
package jalview.analysis.scoremodels; |
22 |
|
|
23 |
|
import jalview.analysis.AlignmentUtils; |
24 |
|
import jalview.api.AlignmentViewPanel; |
25 |
|
import jalview.api.FeatureRenderer; |
26 |
|
import jalview.api.analysis.ScoreModelI; |
27 |
|
import jalview.api.analysis.SimilarityParamsI; |
28 |
|
import jalview.datamodel.AlignmentAnnotation; |
29 |
|
import jalview.datamodel.AlignmentView; |
30 |
|
import jalview.datamodel.SeqCigar; |
31 |
|
import jalview.datamodel.SequenceI; |
32 |
|
import jalview.math.Matrix; |
33 |
|
import jalview.math.MatrixI; |
34 |
|
import jalview.util.Constants; |
35 |
|
import jalview.util.MessageManager; |
36 |
|
|
37 |
|
import java.util.ArrayList; |
38 |
|
import java.util.HashMap; |
39 |
|
import java.util.HashSet; |
40 |
|
import java.util.List; |
41 |
|
import java.util.Map; |
42 |
|
import java.util.Set; |
43 |
|
|
44 |
|
|
45 |
|
|
46 |
|
|
|
|
| 5.9% |
Uncovered Elements: 144 (153) |
Complexity: 44 |
Complexity Density: 0.44 |
|
47 |
|
public class SecondaryStructureDistanceModel extends DistanceScoreModel |
48 |
|
{ |
49 |
|
private static final String NAME = "Secondary Structure Similarity"; |
50 |
|
|
51 |
|
private ScoreMatrix ssRateMatrix; |
52 |
|
|
53 |
|
private String description; |
54 |
|
|
55 |
|
FeatureRenderer fr; |
56 |
|
|
57 |
|
|
58 |
|
|
59 |
|
|
|
|
| - |
Uncovered Elements: 0 (0) |
Complexity: 1 |
Complexity Density: - |
|
60 |
21 |
public SecondaryStructureDistanceModel()... |
61 |
|
{ |
62 |
|
|
63 |
|
} |
64 |
|
|
|
|
| 0% |
Uncovered Elements: 8 (8) |
Complexity: 3 |
Complexity Density: 0.38 |
|
65 |
0 |
@Override... |
66 |
|
public ScoreModelI getInstance(AlignmentViewPanel view) |
67 |
|
{ |
68 |
0 |
SecondaryStructureDistanceModel instance; |
69 |
0 |
try |
70 |
|
{ |
71 |
0 |
instance = this.getClass().getDeclaredConstructor().newInstance(); |
72 |
0 |
instance.configureFromAlignmentView(view); |
73 |
0 |
return instance; |
74 |
|
} catch (InstantiationException | IllegalAccessException e) |
75 |
|
{ |
76 |
0 |
jalview.bin.Console.errPrintln("Error in " + getClass().getName() |
77 |
|
+ ".getInstance(): " + e.getMessage()); |
78 |
0 |
return null; |
79 |
|
} catch (ReflectiveOperationException roe) |
80 |
|
{ |
81 |
0 |
return null; |
82 |
|
} |
83 |
|
} |
84 |
|
|
|
|
| 0% |
Uncovered Elements: 2 (2) |
Complexity: 1 |
Complexity Density: 0.5 |
|
85 |
0 |
boolean configureFromAlignmentView(AlignmentViewPanel view)... |
86 |
|
|
87 |
|
{ |
88 |
0 |
fr = view.cloneFeatureRenderer(); |
89 |
0 |
return true; |
90 |
|
} |
91 |
|
|
92 |
|
ArrayList<AlignmentAnnotation> ssForSeqs = null; |
93 |
|
|
|
|
| 0% |
Uncovered Elements: 39 (39) |
Complexity: 7 |
Complexity Density: 0.24 |
|
94 |
0 |
@Override... |
95 |
|
public SequenceI[] expandSeqData(SequenceI[] sequences, |
96 |
|
AlignmentView seqData, SimilarityParamsI scoreParams, |
97 |
|
List<String> labels) |
98 |
|
{ |
99 |
0 |
ssForSeqs = new ArrayList<AlignmentAnnotation>(); |
100 |
0 |
List<SequenceI> newSequences = new ArrayList<SequenceI>(); |
101 |
0 |
List<SeqCigar> newCigs = new ArrayList<SeqCigar>(); |
102 |
0 |
int sq = 0; |
103 |
|
|
104 |
0 |
AlignmentAnnotation[] alignAnnotList = fr.getViewport().getAlignment() |
105 |
|
.getAlignmentAnnotation(); |
106 |
|
|
107 |
0 |
String ssSource = scoreParams.getSecondaryStructureSource(); |
108 |
0 |
if (ssSource == null || ssSource == "") |
109 |
|
{ |
110 |
0 |
ssSource = Constants.SS_ALL_PROVIDERS; |
111 |
|
} |
112 |
|
|
113 |
|
|
114 |
|
|
115 |
|
|
116 |
|
|
117 |
0 |
Map<SequenceI, ArrayList<AlignmentAnnotation>> ssAlignmentAnnotationForSequences = AlignmentUtils |
118 |
|
.getSequenceAssociatedAlignmentAnnotations(alignAnnotList, |
119 |
|
ssSource); |
120 |
|
|
121 |
0 |
for (SeqCigar scig : seqData.getSequences()) |
122 |
|
{ |
123 |
|
|
124 |
0 |
SequenceI alSeq = sequences[sq++]; |
125 |
0 |
List<AlignmentAnnotation> ssec = ssAlignmentAnnotationForSequences |
126 |
|
.get(scig.getRefSeq()); |
127 |
0 |
if (ssec == null) |
128 |
|
{ |
129 |
|
|
130 |
0 |
newSequences.add(alSeq); |
131 |
0 |
if (alSeq != null) |
132 |
|
{ |
133 |
|
|
134 |
0 |
labels.add(Constants.STRUCTURE_PROVIDERS.get("None")); |
135 |
|
} |
136 |
0 |
SeqCigar newSeqCigar = scig; |
137 |
0 |
newCigs.add(newSeqCigar); |
138 |
0 |
ssForSeqs.add(null); |
139 |
|
} |
140 |
|
else |
141 |
|
{ |
142 |
0 |
for (int i = 0; i < ssec.size(); i++) |
143 |
|
{ |
144 |
0 |
if (alSeq != null) |
145 |
|
{ |
146 |
0 |
String provider = AlignmentUtils.extractSSSourceFromAnnotationDescription(ssec.get(i)); |
147 |
|
|
148 |
0 |
labels.add(provider); |
149 |
|
} |
150 |
|
|
151 |
0 |
newSequences.add(alSeq); |
152 |
0 |
SeqCigar newSeqCigar = scig; |
153 |
0 |
newCigs.add(newSeqCigar); |
154 |
0 |
ssForSeqs.add(ssec.get(i)); |
155 |
|
} |
156 |
|
} |
157 |
|
} |
158 |
|
|
159 |
0 |
seqData.setSequences(newCigs.toArray(new SeqCigar[0])); |
160 |
0 |
return newSequences.toArray(new SequenceI[0]); |
161 |
|
|
162 |
|
} |
163 |
|
|
164 |
|
|
165 |
|
|
166 |
|
|
167 |
|
|
168 |
|
|
169 |
|
|
170 |
|
|
171 |
|
|
172 |
|
|
173 |
|
|
174 |
|
|
175 |
|
|
176 |
|
@param |
177 |
|
|
178 |
|
|
179 |
|
@param |
180 |
|
|
181 |
|
|
182 |
|
|
|
|
| 0% |
Uncovered Elements: 78 (78) |
Complexity: 24 |
Complexity Density: 0.5 |
|
183 |
0 |
@Override... |
184 |
|
public MatrixI findDistances(AlignmentView seqData, |
185 |
|
SimilarityParamsI params) |
186 |
|
{ |
187 |
0 |
if (ssForSeqs == null |
188 |
|
|| ssForSeqs.size() != seqData.getSequences().length) |
189 |
|
{ |
190 |
|
|
191 |
0 |
SequenceI[] sequences = new SequenceI[seqData.getSequences().length]; |
192 |
|
|
193 |
0 |
expandSeqData(sequences, seqData, params, new ArrayList<String>()); |
194 |
|
} |
195 |
0 |
SeqCigar[] seqs = seqData.getSequences(); |
196 |
0 |
int noseqs = seqs.length; |
197 |
0 |
int cpwidth = 0; |
198 |
0 |
double[][] similarities = new double[noseqs][noseqs]; |
199 |
|
|
200 |
|
|
201 |
|
|
202 |
0 |
String ssSource = params.getSecondaryStructureSource(); |
203 |
0 |
if (ssSource == null || ssSource == "") |
204 |
|
{ |
205 |
0 |
ssSource = Constants.SS_ALL_PROVIDERS; |
206 |
|
} |
207 |
0 |
ssRateMatrix = ScoreModels.getInstance().getSecondaryStructureMatrix(); |
208 |
|
|
209 |
|
|
210 |
0 |
int[] viscont = seqData.getVisibleContigs(); |
211 |
|
|
212 |
|
|
213 |
|
|
214 |
|
|
215 |
|
|
216 |
|
|
217 |
0 |
for (int vc = 0; vc < viscont.length; vc += 2) |
218 |
|
{ |
219 |
|
|
220 |
0 |
for (int cpos = viscont[vc]; cpos <= viscont[vc + 1]; cpos++) |
221 |
|
{ |
222 |
0 |
cpwidth++; |
223 |
|
|
224 |
|
|
225 |
|
|
226 |
|
|
227 |
0 |
Set<SeqCigar> seqsWithoutGapAtCol = findSeqsWithoutGapAtColumn(seqs, |
228 |
|
cpos); |
229 |
|
|
230 |
|
|
231 |
|
|
232 |
|
|
233 |
|
|
234 |
|
|
235 |
0 |
for (int i = 0; i < (noseqs - 1); i++) |
236 |
|
{ |
237 |
0 |
AlignmentAnnotation aa_i = ssForSeqs.get(i); |
238 |
0 |
boolean undefinedSS1 = aa_i == null; |
239 |
|
|
240 |
0 |
boolean gap1 = !seqsWithoutGapAtCol.contains(seqs[i]); |
241 |
|
|
242 |
|
|
243 |
0 |
char ss1 = '*'; |
244 |
0 |
if (!gap1 && !undefinedSS1) |
245 |
|
{ |
246 |
|
|
247 |
|
|
248 |
|
|
249 |
0 |
int seqPosition_i = seqs[i].findPosition(cpos); |
250 |
0 |
if (aa_i != null) |
251 |
0 |
ss1 = AlignmentUtils.findSSAnnotationForGivenSeqposition(aa_i, |
252 |
|
seqPosition_i); |
253 |
|
} |
254 |
|
|
255 |
0 |
for (int j = i + 1; j < noseqs; j++) |
256 |
|
{ |
257 |
|
|
258 |
|
|
259 |
0 |
AlignmentAnnotation aa_j = ssForSeqs.get(j); |
260 |
0 |
boolean undefinedSS2 = aa_j == null; |
261 |
|
|
262 |
|
|
263 |
0 |
if (undefinedSS1 && undefinedSS2) |
264 |
|
{ |
265 |
0 |
similarities[i][j] += ssRateMatrix.getMaximumScore(); |
266 |
0 |
continue; |
267 |
|
} |
268 |
|
|
269 |
|
|
270 |
0 |
else if (undefinedSS1 || undefinedSS2) |
271 |
|
{ |
272 |
0 |
similarities[i][j] += ssRateMatrix.getMinimumScore(); |
273 |
0 |
continue; |
274 |
|
} |
275 |
|
|
276 |
0 |
boolean gap2 = !seqsWithoutGapAtCol.contains(seqs[j]); |
277 |
|
|
278 |
|
|
279 |
0 |
char ss2 = '*'; |
280 |
|
|
281 |
0 |
if (!gap2 && !undefinedSS2) |
282 |
|
{ |
283 |
0 |
int seqPosition = seqs[j].findPosition(cpos); |
284 |
|
|
285 |
0 |
if (aa_j != null) |
286 |
0 |
ss2 = AlignmentUtils.findSSAnnotationForGivenSeqposition( |
287 |
|
aa_j, seqPosition); |
288 |
|
} |
289 |
|
|
290 |
0 |
if ((!gap1 && !gap2) || params.includeGaps()) |
291 |
|
{ |
292 |
|
|
293 |
0 |
double similarityScore = ssRateMatrix.getPairwiseScore(ss1, |
294 |
|
ss2); |
295 |
0 |
similarities[i][j] += similarityScore; |
296 |
|
} |
297 |
|
} |
298 |
|
} |
299 |
|
} |
300 |
|
} |
301 |
|
|
302 |
|
|
303 |
|
|
304 |
|
|
305 |
|
|
306 |
|
|
307 |
|
|
308 |
|
|
309 |
0 |
for (int i = 0; i < noseqs; i++) |
310 |
|
{ |
311 |
0 |
for (int j = i + 1; j < noseqs; j++) |
312 |
|
{ |
313 |
0 |
similarities[i][j] /= cpwidth; |
314 |
0 |
similarities[j][i] = similarities[i][j]; |
315 |
|
} |
316 |
|
} |
317 |
0 |
return SimilarityScoreModel |
318 |
|
.similarityToDistance(new Matrix(similarities)); |
319 |
|
|
320 |
|
} |
321 |
|
|
322 |
|
|
323 |
|
|
324 |
|
|
325 |
|
|
326 |
|
@param |
327 |
|
@param |
328 |
|
|
329 |
|
@return |
330 |
|
|
|
|
| 0% |
Uncovered Elements: 8 (8) |
Complexity: 2 |
Complexity Density: 0.33 |
|
331 |
0 |
private Set<SeqCigar> findSeqsWithoutGapAtColumn(SeqCigar[] seqs,... |
332 |
|
int columnPosition) |
333 |
|
{ |
334 |
0 |
Set<SeqCigar> seqsWithoutGapAtCol = new HashSet<>(); |
335 |
0 |
for (SeqCigar seq : seqs) |
336 |
|
{ |
337 |
0 |
int spos = seq.findPosition(columnPosition); |
338 |
0 |
if (spos != -1) |
339 |
|
{ |
340 |
|
|
341 |
|
|
342 |
|
|
343 |
0 |
seqsWithoutGapAtCol.add(seq); |
344 |
|
} |
345 |
|
} |
346 |
0 |
return seqsWithoutGapAtCol; |
347 |
|
} |
348 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
349 |
44 |
@Override... |
350 |
|
public String getName() |
351 |
|
{ |
352 |
44 |
return NAME; |
353 |
|
} |
354 |
|
|
|
|
| 0% |
Uncovered Elements: 1 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
355 |
0 |
@Override... |
356 |
|
public String getDescription() |
357 |
|
{ |
358 |
0 |
return description; |
359 |
|
} |
360 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
361 |
4 |
@Override... |
362 |
|
public boolean isDNA() |
363 |
|
{ |
364 |
4 |
return false; |
365 |
|
} |
366 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
367 |
2 |
@Override... |
368 |
|
public boolean isProtein() |
369 |
|
{ |
370 |
2 |
return true; |
371 |
|
} |
372 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
373 |
2 |
@Override... |
374 |
|
public boolean isSecondaryStructure() |
375 |
|
{ |
376 |
2 |
return true; |
377 |
|
} |
378 |
|
|
|
|
| 0% |
Uncovered Elements: 1 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
379 |
0 |
@Override... |
380 |
|
public String toString() |
381 |
|
{ |
382 |
0 |
return "Score between sequences based on similarity between binary " |
383 |
|
+ "vectors marking secondary structure displayed at each column"; |
384 |
|
} |
385 |
|
} |