Class |
Line # |
Actions |
|||
---|---|---|---|---|---|
StructureFrequency | 41 | 173 | 58 |
1 | /* | |
2 | * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) | |
3 | * Copyright (C) $$Year-Rel$$ The Jalview Authors | |
4 | * | |
5 | * This file is part of Jalview. | |
6 | * | |
7 | * Jalview is free software: you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation, either version 3 | |
10 | * of the License, or (at your option) any later version. | |
11 | * | |
12 | * Jalview is distributed in the hope that it will be useful, but | |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty | |
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR | |
15 | * PURPOSE. See the GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with Jalview. If not, see <http://www.gnu.org/licenses/>. | |
19 | * The Jalview Authors are detailed in the 'AUTHORS' file. | |
20 | */ | |
21 | package jalview.analysis; | |
22 | ||
23 | import jalview.datamodel.AlignmentAnnotation; | |
24 | import jalview.datamodel.Annotation; | |
25 | import jalview.datamodel.SequenceFeature; | |
26 | import jalview.datamodel.SequenceI; | |
27 | import jalview.util.Comparison; | |
28 | import jalview.util.Format; | |
29 | ||
30 | import java.util.Hashtable; | |
31 | ||
32 | /** | |
33 | * Takes in a vector or array of sequences and column start and column end and | |
34 | * returns a new Hashtable[] of size maxSeqLength, if Hashtable not supplied. | |
35 | * This class is used extensively in calculating alignment colourschemes that | |
36 | * depend on the amount of conservation in each alignment column. | |
37 | * | |
38 | * @author $author$ | |
39 | * @version $Revision$ | |
40 | */ | |
41 | public class StructureFrequency | |
42 | { | |
43 | public static final int STRUCTURE_PROFILE_LENGTH = 74; | |
44 | ||
45 | // No need to store 1000s of strings which are not | |
46 | // visible to the user. | |
47 | public static final String MAXCOUNT = "C"; | |
48 | ||
49 | public static final String MAXRESIDUE = "R"; | |
50 | ||
51 | public static final String PID_GAPS = "G"; | |
52 | ||
53 | public static final String PID_NOGAPS = "N"; | |
54 | ||
55 | public static final String PROFILE = "P"; | |
56 | ||
57 | public static final String PAIRPROFILE = "B"; | |
58 | ||
59 | /** | |
60 | * Returns the 3' position of a base pair | |
61 | * | |
62 | * @param pairs | |
63 | * Secondary structure annotation | |
64 | * @param indice | |
65 | * 5' position of a base pair | |
66 | * @return 3' position of a base pair | |
67 | */ | |
68 | 72 | public static int findPair(SequenceFeature[] pairs, int indice) |
69 | { | |
70 | ||
71 | 684 | for (int i = 0; i < pairs.length; i++) |
72 | { | |
73 | 684 | if (pairs[i].getBegin() == indice) |
74 | ||
75 | { | |
76 | ||
77 | 72 | return pairs[i].getEnd(); |
78 | ||
79 | } | |
80 | } | |
81 | 0 | return -1; |
82 | } | |
83 | ||
84 | /** | |
85 | * Method to calculate a 'base pair consensus row', very similar to nucleotide | |
86 | * consensus but takes into account a given structure | |
87 | * | |
88 | * @param sequences | |
89 | * @param start | |
90 | * @param end | |
91 | * @param result | |
92 | * @param profile | |
93 | * @param rnaStruc | |
94 | */ | |
95 | 4 | public static final void calculate(SequenceI[] sequences, int start, |
96 | int end, Hashtable<String, Object>[] result, boolean profile, | |
97 | AlignmentAnnotation rnaStruc) | |
98 | { | |
99 | ||
100 | 4 | Hashtable<String, Object> residueHash; |
101 | 4 | String maxResidue; |
102 | 4 | char[] struc = rnaStruc.getRNAStruc().toCharArray(); |
103 | ||
104 | 4 | SequenceFeature[] rna = rnaStruc._rnasecstr; |
105 | 4 | char c, s, cEnd; |
106 | 4 | int bpEnd = -1; |
107 | 4 | int jSize = sequences.length; |
108 | 4 | int[] values; |
109 | 4 | int[][] pairs; |
110 | 4 | float percentage; |
111 | ||
112 | 376 | for (int i = start; i < end; i++) // foreach column |
113 | { | |
114 | 372 | int canonicalOrWobblePairCount = 0, canonical = 0; |
115 | 372 | int otherPairCount = 0; |
116 | 372 | int nongap = 0; |
117 | 372 | maxResidue = "-"; |
118 | 372 | values = new int[255]; |
119 | 372 | pairs = new int[255][255]; |
120 | 372 | bpEnd = -1; |
121 | 372 | if (i < struc.length) |
122 | { | |
123 | 372 | s = struc[i]; |
124 | } | |
125 | else | |
126 | { | |
127 | 0 | s = '-'; |
128 | } | |
129 | 372 | if (s == '.' || s == ' ') |
130 | { | |
131 | 228 | s = '-'; |
132 | } | |
133 | ||
134 | 372 | if (!Rna.isOpeningParenthesis(s)) |
135 | { | |
136 | 300 | if (s == '-') |
137 | { | |
138 | 228 | values['-']++; |
139 | } | |
140 | } | |
141 | else | |
142 | { | |
143 | 72 | bpEnd = findPair(rna, i); |
144 | ||
145 | 72 | if (bpEnd > -1) |
146 | { | |
147 | 4464 | for (int j = 0; j < jSize; j++) // foreach row |
148 | { | |
149 | 4392 | if (sequences[j] == null) |
150 | { | |
151 | 0 | jalview.bin.Console.errPrintln( |
152 | "WARNING: Consensus skipping null sequence - possible race condition."); | |
153 | 0 | continue; |
154 | } | |
155 | ||
156 | 4392 | c = sequences[j].getCharAt(i); |
157 | 4392 | cEnd = sequences[j].getCharAt(bpEnd); |
158 | ||
159 | 4392 | if (Comparison.isGap(c) || Comparison.isGap(cEnd)) |
160 | { | |
161 | 8 | values['-']++; |
162 | 8 | continue; |
163 | } | |
164 | 4384 | nongap++; |
165 | /* | |
166 | * ensure upper-case for counting purposes | |
167 | */ | |
168 | 4384 | if ('a' <= c && 'z' >= c) |
169 | { | |
170 | 0 | c += 'A' - 'a'; |
171 | } | |
172 | 4384 | if ('a' <= cEnd && 'z' >= cEnd) |
173 | { | |
174 | 0 | cEnd += 'A' - 'a'; |
175 | } | |
176 | 4384 | if (Rna.isCanonicalOrWobblePair(c, cEnd)) |
177 | { | |
178 | 3892 | canonicalOrWobblePairCount++; |
179 | 3892 | if (Rna.isCanonicalPair(c, cEnd)) |
180 | { | |
181 | 3452 | canonical++; |
182 | } | |
183 | } | |
184 | else | |
185 | { | |
186 | 492 | otherPairCount++; |
187 | } | |
188 | 4384 | pairs[c][cEnd]++; |
189 | } | |
190 | } | |
191 | } | |
192 | ||
193 | 372 | residueHash = new Hashtable<>(); |
194 | 372 | if (profile) |
195 | { | |
196 | // TODO 1-dim array with jsize in [0], nongapped in [1]; or Pojo | |
197 | 372 | residueHash.put(PROFILE, |
198 | new int[][] | |
199 | { values, new int[] { jSize, (jSize - values['-']) } }); | |
200 | ||
201 | 372 | residueHash.put(PAIRPROFILE, pairs); |
202 | } | |
203 | 372 | values['('] = canonicalOrWobblePairCount; |
204 | 372 | values['['] = canonical; |
205 | 372 | values['{'] = otherPairCount; |
206 | /* | |
207 | * the count is the number of valid pairs (as a percentage, determines | |
208 | * the relative size of the profile logo) | |
209 | */ | |
210 | 372 | int count = canonicalOrWobblePairCount; |
211 | ||
212 | /* | |
213 | * display '(' if most pairs are canonical, or as | |
214 | * '[' if there are more wobble pairs. | |
215 | */ | |
216 | 372 | if (canonicalOrWobblePairCount > 0 || otherPairCount > 0) |
217 | { | |
218 | 72 | if (canonicalOrWobblePairCount >= otherPairCount) |
219 | { | |
220 | 72 | maxResidue = (canonicalOrWobblePairCount - canonical) < canonical |
221 | ? "(" | |
222 | : "["; | |
223 | } | |
224 | else | |
225 | { | |
226 | 0 | maxResidue = "{"; |
227 | } | |
228 | } | |
229 | 372 | residueHash.put(MAXCOUNT, Integer.valueOf(count)); |
230 | 372 | residueHash.put(MAXRESIDUE, maxResidue); |
231 | ||
232 | 372 | percentage = ((float) count * 100) / jSize; |
233 | 372 | residueHash.put(PID_GAPS, Float.valueOf(percentage)); |
234 | ||
235 | 372 | percentage = ((float) count * 100) / nongap; |
236 | 372 | residueHash.put(PID_NOGAPS, Float.valueOf(percentage)); |
237 | ||
238 | 372 | if (result[i] == null) |
239 | { | |
240 | 300 | result[i] = residueHash; |
241 | } | |
242 | 372 | if (bpEnd > 0) |
243 | { | |
244 | 72 | values[')'] = values['(']; |
245 | 72 | values[']'] = values['[']; |
246 | 72 | values['}'] = values['{']; |
247 | 72 | values['('] = 0; |
248 | 72 | values['['] = 0; |
249 | 72 | values['{'] = 0; |
250 | 72 | maxResidue = maxResidue.equals("(") ? ")" |
251 | 0 | : maxResidue.equals("[") ? "]" : "}"; |
252 | ||
253 | 72 | residueHash = new Hashtable<>(); |
254 | 72 | if (profile) |
255 | { | |
256 | 72 | residueHash.put(PROFILE, |
257 | new int[][] | |
258 | { values, new int[] { jSize, (jSize - values['-']) } }); | |
259 | ||
260 | 72 | residueHash.put(PAIRPROFILE, pairs); |
261 | } | |
262 | ||
263 | 72 | residueHash.put(MAXCOUNT, Integer.valueOf(count)); |
264 | 72 | residueHash.put(MAXRESIDUE, maxResidue); |
265 | ||
266 | 72 | percentage = ((float) count * 100) / jSize; |
267 | 72 | residueHash.put(PID_GAPS, Float.valueOf(percentage)); |
268 | ||
269 | 72 | percentage = ((float) count * 100) / nongap; |
270 | 72 | residueHash.put(PID_NOGAPS, Float.valueOf(percentage)); |
271 | ||
272 | 72 | result[bpEnd] = residueHash; |
273 | } | |
274 | } | |
275 | } | |
276 | ||
277 | /** | |
278 | * Compute all or part of the annotation row from the given consensus | |
279 | * hashtable | |
280 | * | |
281 | * @param consensus | |
282 | * - pre-allocated annotation row | |
283 | * @param hconsensus | |
284 | * @param iStart | |
285 | * @param width | |
286 | * @param ignoreGapsInConsensusCalculation | |
287 | * @param includeAllConsSymbols | |
288 | */ | |
289 | 4 | public static void completeConsensus(AlignmentAnnotation consensus, |
290 | Hashtable<String, Object>[] hconsensus, int iStart, int width, | |
291 | boolean ignoreGapsInConsensusCalculation, | |
292 | boolean includeAllConsSymbols, long nseq) | |
293 | { | |
294 | 4 | float tval, value; |
295 | 4 | if (consensus == null || consensus.annotations == null |
296 | || consensus.annotations.length < width) | |
297 | { | |
298 | // called with a bad alignment annotation row - wait for it to be | |
299 | // initialised properly | |
300 | 0 | return; |
301 | } | |
302 | 4 | String fmtstr = "%3.1f"; |
303 | 4 | int precision = 2; |
304 | 4 | while (nseq > 100) |
305 | { | |
306 | 0 | precision++; |
307 | 0 | nseq /= 10; |
308 | } | |
309 | 4 | if (precision > 2) |
310 | { | |
311 | 0 | fmtstr = "%" + (2 + precision) + "." + precision + "f"; |
312 | } | |
313 | 4 | Format fmt = new Format(fmtstr); |
314 | ||
315 | 376 | for (int i = iStart; i < width; i++) |
316 | { | |
317 | 372 | Hashtable<String, Object> hci; |
318 | ? | if (i >= hconsensus.length || ((hci = hconsensus[i]) == null)) |
319 | { | |
320 | // happens if sequences calculated over were shorter than alignment | |
321 | // width | |
322 | 0 | consensus.annotations[i] = null; |
323 | 0 | continue; |
324 | } | |
325 | 372 | value = 0; |
326 | 372 | Float fv; |
327 | 372 | if (ignoreGapsInConsensusCalculation) |
328 | { | |
329 | 0 | fv = (Float) hci.get(StructureFrequency.PID_NOGAPS); |
330 | } | |
331 | else | |
332 | { | |
333 | 372 | fv = (Float) hci.get(StructureFrequency.PID_GAPS); |
334 | } | |
335 | 372 | if (fv == null) |
336 | { | |
337 | 0 | consensus.annotations[i] = null; |
338 | // data has changed below us .. give up and | |
339 | 0 | continue; |
340 | } | |
341 | 372 | value = fv.floatValue(); |
342 | 372 | String maxRes = hci.get(StructureFrequency.MAXRESIDUE).toString(); |
343 | 372 | String mouseOver = hci.get(StructureFrequency.MAXRESIDUE) + " "; |
344 | 372 | if (maxRes.length() > 1) |
345 | { | |
346 | 0 | mouseOver = "[" + maxRes + "] "; |
347 | 0 | maxRes = "+"; |
348 | } | |
349 | 372 | int[][] profile = (int[][]) hci.get(StructureFrequency.PROFILE); |
350 | 372 | int[][] pairs = (int[][]) hci.get(StructureFrequency.PAIRPROFILE); |
351 | ||
352 | 372 | if (pairs != null && includeAllConsSymbols) // Just responsible for the |
353 | // tooltip | |
354 | // TODO Update tooltips for Structure row | |
355 | { | |
356 | 0 | mouseOver = ""; |
357 | ||
358 | /* | |
359 | * TODO It's not sure what is the purpose of the alphabet and wheter it | |
360 | * is useful for structure? | |
361 | * | |
362 | * if (alphabet != null) { for (int c = 0; c < alphabet.length; c++) { | |
363 | * tval = ((float) profile[0][alphabet[c]]) 100f / (float) | |
364 | * profile[1][ignoreGapsInConsensusCalculation ? 1 : 0]; mouseOver += | |
365 | * ((c == 0) ? "" : "; ") + alphabet[c] + " " + ((int) tval) + "%"; } } | |
366 | * else { | |
367 | */ | |
368 | 0 | int[][] ca = new int[625][]; |
369 | 0 | float[] vl = new float[625]; |
370 | 0 | int x = 0; |
371 | 0 | for (int c = 65; c < 90; c++) |
372 | { | |
373 | 0 | for (int d = 65; d < 90; d++) |
374 | { | |
375 | 0 | ca[x] = new int[] { c, d }; |
376 | 0 | vl[x] = pairs[c][d]; |
377 | 0 | x++; |
378 | } | |
379 | } | |
380 | 0 | jalview.util.QuickSort.sort(vl, ca); |
381 | 0 | int p = 0; |
382 | ||
383 | /* | |
384 | * profile[1] is {total, ungappedTotal} | |
385 | */ | |
386 | 0 | final int divisor = profile[1][ignoreGapsInConsensusCalculation ? 1 |
387 | : 0]; | |
388 | 0 | for (int c = 624; c > 0; c--) |
389 | { | |
390 | 0 | if (vl[c] > 0) |
391 | { | |
392 | 0 | tval = (vl[c] * 100f / divisor); |
393 | 0 | mouseOver += ((p == 0) ? "" : "; ") + (char) ca[c][0] |
394 | + (char) ca[c][1] + " " + fmt.form(tval) + "%"; | |
395 | 0 | p++; |
396 | ||
397 | } | |
398 | } | |
399 | ||
400 | // } | |
401 | } | |
402 | else | |
403 | { | |
404 | 372 | mouseOver += (fmt.form(value) + "%"); |
405 | } | |
406 | 372 | consensus.annotations[i] = new Annotation(maxRes, mouseOver, ' ', |
407 | value); | |
408 | } | |
409 | } | |
410 | ||
411 | /** | |
412 | * get the sorted base-pair profile for the given position of the consensus | |
413 | * | |
414 | * @param hconsensus | |
415 | * @return profile of the given column | |
416 | */ | |
417 | 0 | public static int[] extractProfile(Hashtable<String, Object> hconsensus, |
418 | boolean ignoreGapsInConsensusCalculation) | |
419 | { | |
420 | 0 | int[] rtnval = new int[STRUCTURE_PROFILE_LENGTH]; // 2*(5*5)+2 |
421 | 0 | int[][] profile = (int[][]) hconsensus.get(StructureFrequency.PROFILE); |
422 | 0 | int[][] pairs = (int[][]) hconsensus |
423 | .get(StructureFrequency.PAIRPROFILE); | |
424 | ||
425 | 0 | if (profile == null) |
426 | { | |
427 | 0 | return null; |
428 | } | |
429 | ||
430 | // TODO fix the object length, also do it in completeConsensus | |
431 | // Object[] ca = new Object[625]; | |
432 | 0 | int[][] ca = new int[625][]; |
433 | 0 | float[] vl = new float[625]; |
434 | 0 | int x = 0; |
435 | 0 | for (int c = 65; c < 90; c++) |
436 | { | |
437 | 0 | for (int d = 65; d < 90; d++) |
438 | { | |
439 | 0 | ca[x] = new int[] { c, d }; |
440 | 0 | vl[x] = pairs[c][d]; |
441 | 0 | x++; |
442 | } | |
443 | } | |
444 | 0 | jalview.util.QuickSort.sort(vl, ca); |
445 | ||
446 | 0 | int valuesCount = 0; |
447 | 0 | rtnval[1] = 0; |
448 | 0 | int offset = 2; |
449 | 0 | final int divisor = profile[1][ignoreGapsInConsensusCalculation ? 1 |
450 | : 0]; | |
451 | 0 | for (int c = 624; c > 0; c--) |
452 | { | |
453 | 0 | if (vl[c] > 0) |
454 | { | |
455 | 0 | rtnval[offset++] = ca[c][0]; |
456 | 0 | rtnval[offset++] = ca[c][1]; |
457 | 0 | rtnval[offset] = (int) (vl[c] * 100f / divisor); |
458 | 0 | rtnval[1] += rtnval[offset++]; |
459 | 0 | valuesCount++; |
460 | } | |
461 | } | |
462 | 0 | rtnval[0] = valuesCount; |
463 | ||
464 | // insert profile type code in position 0 | |
465 | 0 | int[] result = new int[rtnval.length + 1]; |
466 | 0 | result[0] = AlignmentAnnotation.STRUCTURE_PROFILE; |
467 | 0 | System.arraycopy(rtnval, 0, result, 1, rtnval.length); |
468 | 0 | return result; |
469 | } | |
470 | } |