Class |
Line # |
Actions |
|||
---|---|---|---|---|---|
Conservation | 48 | 250 | 99 |
1 | /* | |
2 | * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) | |
3 | * Copyright (C) $$Year-Rel$$ The Jalview Authors | |
4 | * | |
5 | * This file is part of Jalview. | |
6 | * | |
7 | * Jalview is free software: you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation, either version 3 | |
10 | * of the License, or (at your option) any later version. | |
11 | * | |
12 | * Jalview is distributed in the hope that it will be useful, but | |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty | |
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR | |
15 | * PURPOSE. See the GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with Jalview. If not, see <http://www.gnu.org/licenses/>. | |
19 | * The Jalview Authors are detailed in the 'AUTHORS' file. | |
20 | */ | |
21 | package jalview.analysis; | |
22 | ||
23 | import java.util.Locale; | |
24 | ||
25 | import jalview.analysis.scoremodels.ScoreMatrix; | |
26 | import jalview.analysis.scoremodels.ScoreModels; | |
27 | import jalview.datamodel.AlignmentAnnotation; | |
28 | import jalview.datamodel.Annotation; | |
29 | import jalview.datamodel.ResidueCount; | |
30 | import jalview.datamodel.ResidueCount.SymbolCounts; | |
31 | import jalview.datamodel.Sequence; | |
32 | import jalview.datamodel.SequenceI; | |
33 | import jalview.schemes.ResidueProperties; | |
34 | import jalview.util.Comparison; | |
35 | import jalview.util.Format; | |
36 | ||
37 | import java.awt.Color; | |
38 | import java.util.List; | |
39 | import java.util.Map; | |
40 | import java.util.Map.Entry; | |
41 | import java.util.SortedMap; | |
42 | import java.util.TreeMap; | |
43 | import java.util.Vector; | |
44 | ||
45 | /** | |
46 | * Calculates conservation values for a given set of sequences | |
47 | */ | |
48 | public class Conservation | |
49 | { | |
50 | /* | |
51 | * need to have a minimum of 3% of sequences with a residue | |
52 | * for it to be included in the conservation calculation | |
53 | */ | |
54 | private static final int THRESHOLD_PERCENT = 3; | |
55 | ||
56 | private static final int TOUPPERCASE = 'a' - 'A'; | |
57 | ||
58 | private static final int GAP_INDEX = -1; | |
59 | ||
60 | private static final Format FORMAT_3DP = new Format("%2.5f"); | |
61 | ||
62 | SequenceI[] sequences; | |
63 | ||
64 | int start; | |
65 | ||
66 | int end; | |
67 | ||
68 | /* | |
69 | * a list whose i'th element is an array whose first entry is the checksum | |
70 | * of the i'th sequence, followed by residues encoded to score matrix index | |
71 | */ | |
72 | Vector<int[]> seqNums; | |
73 | ||
74 | int maxLength = 0; // used by quality calcs | |
75 | ||
76 | boolean seqNumsChanged = false; // updated after any change via calcSeqNum; | |
77 | ||
78 | /* | |
79 | * a map per column with {property, conservation} where conservation value is | |
80 | * 1 (property is conserved), 0 (absence of property is conserved) or -1 | |
81 | * (property is not conserved i.e. column has residues with and without it) | |
82 | */ | |
83 | Map<String, Integer>[] total; | |
84 | ||
85 | /* | |
86 | * if true then conservation calculation will map all symbols to canonical aa | |
87 | * numbering rather than consider conservation of that symbol | |
88 | */ | |
89 | boolean canonicaliseAa = true; | |
90 | ||
91 | private Vector<Double> quality; | |
92 | ||
93 | private double qualityMinimum; | |
94 | ||
95 | private double qualityMaximum; | |
96 | ||
97 | private Sequence consSequence; | |
98 | ||
99 | /* | |
100 | * percentage of residues in a column to qualify for counting conservation | |
101 | */ | |
102 | private int threshold; | |
103 | ||
104 | private String name = ""; | |
105 | ||
106 | /* | |
107 | * an array, for each column, of counts of symbols (by score matrix index) | |
108 | */ | |
109 | private int[][] cons2; | |
110 | ||
111 | /* | |
112 | * gap counts for each column | |
113 | */ | |
114 | private int[] cons2GapCounts; | |
115 | ||
116 | private String[] consSymbs; | |
117 | ||
118 | /** | |
119 | * Constructor using default threshold of 3% | |
120 | * | |
121 | * @param name | |
122 | * Name of conservation | |
123 | * @param sequences | |
124 | * sequences to be used in calculation | |
125 | * @param start | |
126 | * start residue position | |
127 | * @param end | |
128 | * end residue position | |
129 | */ | |
130 | 1029 | public Conservation(String name, List<SequenceI> sequences, int start, |
131 | int end) | |
132 | { | |
133 | 1029 | this(name, THRESHOLD_PERCENT, sequences, start, end); |
134 | } | |
135 | ||
136 | /** | |
137 | * Constructor | |
138 | * | |
139 | * @param name | |
140 | * Name of conservation | |
141 | * @param threshold | |
142 | * percentage of sequences at or below which property conservation is | |
143 | * ignored | |
144 | * @param sequences | |
145 | * sequences to be used in calculation | |
146 | * @param start | |
147 | * start column position | |
148 | * @param end | |
149 | * end column position | |
150 | */ | |
151 | 1034 | public Conservation(String name, int threshold, List<SequenceI> sequences, |
152 | int start, int end) | |
153 | { | |
154 | 1034 | this.name = name; |
155 | 1034 | this.threshold = threshold; |
156 | 1033 | this.start = start; |
157 | 1034 | this.end = end; |
158 | ||
159 | 1033 | maxLength = end - start + 1; // default width includes bounds of |
160 | // calculation | |
161 | ||
162 | 1034 | int s, sSize = sequences.size(); |
163 | 1034 | SequenceI[] sarray = new SequenceI[sSize]; |
164 | 1033 | this.sequences = sarray; |
165 | 1034 | try |
166 | { | |
167 | 12617 | for (s = 0; s < sSize; s++) |
168 | { | |
169 | 11584 | sarray[s] = sequences.get(s); |
170 | 11584 | if (sarray[s].getLength() > maxLength) |
171 | { | |
172 | 6 | maxLength = sarray[s].getLength(); |
173 | } | |
174 | } | |
175 | } catch (ArrayIndexOutOfBoundsException ex) | |
176 | { | |
177 | // bail - another thread has modified the sequence array, so the current | |
178 | // calculation is probably invalid. | |
179 | 0 | this.sequences = new SequenceI[0]; |
180 | 0 | maxLength = 0; |
181 | } | |
182 | } | |
183 | ||
184 | /** | |
185 | * Translate sequence i into score matrix indices and store it in the i'th | |
186 | * position of the seqNums array. | |
187 | * | |
188 | * @param i | |
189 | * @param sm | |
190 | */ | |
191 | 11510 | private void calcSeqNum(int i, ScoreMatrix sm) |
192 | { | |
193 | 11510 | int sSize = sequences.length; |
194 | ||
195 | 11510 | if ((i > -1) && (i < sSize)) |
196 | { | |
197 | 11510 | String sq = sequences[i].getSequenceAsString(); |
198 | ||
199 | 11510 | if (seqNums.size() <= i) |
200 | { | |
201 | 11510 | seqNums.addElement(new int[sq.length() + 1]); |
202 | } | |
203 | ||
204 | /* | |
205 | * the first entry in the array is the sequence's hashcode, | |
206 | * following entries are matrix indices of sequence characters | |
207 | */ | |
208 | 11510 | if (sq.hashCode() != seqNums.elementAt(i)[0]) |
209 | { | |
210 | 11510 | int j; |
211 | 11510 | int len; |
212 | 11510 | seqNumsChanged = true; |
213 | 11510 | len = sq.length(); |
214 | ||
215 | 11510 | if (maxLength < len) |
216 | { | |
217 | 0 | maxLength = len; |
218 | } | |
219 | ||
220 | 11510 | int[] sqnum = new int[len + 1]; // better to always make a new array - |
221 | // sequence can change its length | |
222 | 11510 | sqnum[0] = sq.hashCode(); |
223 | ||
224 | 1908144 | for (j = 1; j <= len; j++) |
225 | { | |
226 | // sqnum[j] = ResidueProperties.aaIndex[sq.charAt(j - 1)]; | |
227 | 1895809 | char residue = sq.charAt(j - 1); |
228 | 1896865 | if (Comparison.isGap(residue)) |
229 | { | |
230 | 423373 | sqnum[j] = GAP_INDEX; |
231 | } | |
232 | else | |
233 | { | |
234 | 1474636 | sqnum[j] = sm.getMatrixIndex(residue); |
235 | 1474462 | if (sqnum[j] == -1) |
236 | { | |
237 | 41 | sqnum[j] = GAP_INDEX; |
238 | } | |
239 | } | |
240 | } | |
241 | ||
242 | 11510 | seqNums.setElementAt(sqnum, i); |
243 | } | |
244 | else | |
245 | { | |
246 | 0 | jalview.bin.Console.outPrintln("SEQUENCE HAS BEEN DELETED!!!"); |
247 | } | |
248 | } | |
249 | else | |
250 | { | |
251 | // JBPNote INFO level debug | |
252 | 0 | jalview.bin.Console.errPrintln( |
253 | "ERROR: calcSeqNum called with out of range sequence index for Alignment\n"); | |
254 | } | |
255 | } | |
256 | ||
257 | /** | |
258 | * Calculates the conservation values for given set of sequences | |
259 | */ | |
260 | 1030 | public void calculate() |
261 | { | |
262 | 1030 | int height = sequences.length; |
263 | ||
264 | 1029 | total = new Map[maxLength]; |
265 | ||
266 | 145910 | for (int column = start; column <= end; column++) |
267 | { | |
268 | 144884 | ResidueCount values = countResidues(column); |
269 | ||
270 | /* | |
271 | * percentage count at or below which we ignore residues | |
272 | */ | |
273 | 144878 | int thresh = (threshold * height) / 100; |
274 | ||
275 | /* | |
276 | * check observed residues in column and record whether each | |
277 | * physico-chemical property is conserved (+1), absence conserved (0), | |
278 | * or not conserved (-1) | |
279 | * Using TreeMap means properties are displayed in alphabetical order | |
280 | */ | |
281 | 144879 | SortedMap<String, Integer> resultHash = new TreeMap<>(); |
282 | 144889 | SymbolCounts symbolCounts = values.getSymbolCounts(); |
283 | 144872 | char[] symbols = symbolCounts.symbols; |
284 | 144885 | int[] counts = symbolCounts.values; |
285 | 467275 | for (int j = 0; j < symbols.length; j++) |
286 | { | |
287 | 322411 | char c = symbols[j]; |
288 | 322427 | if (counts[j] > thresh) |
289 | { | |
290 | 322413 | recordConservation(resultHash, String.valueOf(c)); |
291 | } | |
292 | } | |
293 | 144886 | if (values.getGapCount() > thresh) |
294 | { | |
295 | 84372 | recordConservation(resultHash, "-"); |
296 | } | |
297 | ||
298 | 144883 | if (total.length > 0) |
299 | { | |
300 | 144889 | total[column - start] = resultHash; |
301 | } | |
302 | } | |
303 | } | |
304 | ||
305 | /** | |
306 | * Updates the conservation results for an observed residue | |
307 | * | |
308 | * @param resultMap | |
309 | * a map of {property, conservation} where conservation value is +1 | |
310 | * (all residues have the property), 0 (no residue has the property) | |
311 | * or -1 (some do, some don't) | |
312 | * @param res | |
313 | */ | |
314 | 406709 | protected static void recordConservation(Map<String, Integer> resultMap, |
315 | String res) | |
316 | { | |
317 | 406712 | res = res.toUpperCase(Locale.ROOT); |
318 | 406727 | for (Entry<String, Map<String, Integer>> property : ResidueProperties.propHash |
319 | .entrySet()) | |
320 | { | |
321 | 4057230 | String propertyName = property.getKey(); |
322 | 4057779 | Integer residuePropertyValue = property.getValue().get(res); |
323 | ||
324 | 4065228 | if (!resultMap.containsKey(propertyName)) |
325 | { | |
326 | /* | |
327 | * first time we've seen this residue - note whether it has this property | |
328 | */ | |
329 | 1448316 | if (residuePropertyValue != null) |
330 | { | |
331 | 1448140 | resultMap.put(propertyName, residuePropertyValue); |
332 | } | |
333 | else | |
334 | { | |
335 | /* | |
336 | * unrecognised residue - use default value for property | |
337 | */ | |
338 | 140 | resultMap.put(propertyName, property.getValue().get("-")); |
339 | } | |
340 | } | |
341 | else | |
342 | { | |
343 | 2616484 | Integer currentResult = resultMap.get(propertyName); |
344 | 2617467 | if (currentResult.intValue() != -1 |
345 | && !currentResult.equals(residuePropertyValue)) | |
346 | { | |
347 | /* | |
348 | * property is unconserved - residues seen both with and without it | |
349 | */ | |
350 | 761076 | resultMap.put(propertyName, Integer.valueOf(-1)); |
351 | } | |
352 | } | |
353 | } | |
354 | } | |
355 | ||
356 | /** | |
357 | * Counts residues (upper-cased) and gaps in the given column | |
358 | * | |
359 | * @param column | |
360 | * @return | |
361 | */ | |
362 | 144879 | protected ResidueCount countResidues(int column) |
363 | { | |
364 | 144879 | ResidueCount values = new ResidueCount(false); |
365 | ||
366 | 2064268 | for (int row = 0; row < sequences.length; row++) |
367 | { | |
368 | 1919176 | if (sequences[row].getLength() > column) |
369 | { | |
370 | 1904568 | char c = sequences[row].getCharAt(column); |
371 | 1904329 | if (canonicaliseAa) |
372 | { | |
373 | 1904150 | int index = ResidueProperties.aaIndex[c]; |
374 | 1904446 | c = index > 20 ? '-' : ResidueProperties.aa[index].charAt(0); |
375 | } | |
376 | else | |
377 | { | |
378 | 0 | c = toUpperCase(c); |
379 | } | |
380 | 1904305 | if (Comparison.isGap(c)) |
381 | { | |
382 | 424567 | values.addGap(); |
383 | } | |
384 | else | |
385 | { | |
386 | 1479841 | values.add(c); |
387 | } | |
388 | } | |
389 | else | |
390 | { | |
391 | 14540 | values.addGap(); |
392 | } | |
393 | } | |
394 | 144877 | return values; |
395 | } | |
396 | ||
397 | /** | |
398 | * Counts conservation and gaps for a column of the alignment | |
399 | * | |
400 | * @return { 1 if fully conserved, else 0, gap count } | |
401 | */ | |
402 | 144872 | public int[] countConservationAndGaps(int column) |
403 | { | |
404 | 144873 | int gapCount = 0; |
405 | 144874 | boolean fullyConserved = true; |
406 | 144873 | int iSize = sequences.length; |
407 | ||
408 | 144870 | if (iSize == 0) |
409 | { | |
410 | 0 | return new int[] { 0, 0 }; |
411 | } | |
412 | ||
413 | 144869 | char lastRes = '0'; |
414 | 2061518 | for (int i = 0; i < iSize; i++) |
415 | { | |
416 | 1917024 | if (column >= sequences[i].getLength()) |
417 | { | |
418 | 14540 | gapCount++; |
419 | 14540 | continue; |
420 | } | |
421 | ||
422 | 1902531 | char c = sequences[i].getCharAt(column); // gaps do not have upper/lower |
423 | // case | |
424 | ||
425 | 1903762 | if (Comparison.isGap((c))) |
426 | { | |
427 | 424481 | gapCount++; |
428 | } | |
429 | else | |
430 | { | |
431 | 1479280 | c = toUpperCase(c); |
432 | 1479708 | if (lastRes == '0') |
433 | { | |
434 | 137324 | lastRes = c; |
435 | } | |
436 | 1480568 | if (c != lastRes) |
437 | { | |
438 | 465102 | fullyConserved = false; |
439 | } | |
440 | } | |
441 | } | |
442 | ||
443 | 144857 | int[] r = new int[] { fullyConserved ? 1 : 0, gapCount }; |
444 | 144840 | return r; |
445 | } | |
446 | ||
447 | /** | |
448 | * Returns the upper-cased character if between 'a' and 'z', else the | |
449 | * unchanged value | |
450 | * | |
451 | * @param c | |
452 | * @return | |
453 | */ | |
454 | 1479342 | char toUpperCase(char c) |
455 | { | |
456 | 1479444 | if ('a' <= c && c <= 'z') |
457 | { | |
458 | 6813 | c -= TOUPPERCASE; |
459 | } | |
460 | 1479569 | return c; |
461 | } | |
462 | ||
463 | /** | |
464 | * Calculates the conservation sequence | |
465 | * | |
466 | * @param positiveOnly | |
467 | * if true, calculate positive conservation; else calculate both | |
468 | * positive and negative conservation | |
469 | * @param maxPercentageGaps | |
470 | * the percentage of gaps in a column, at or above which no | |
471 | * conservation is asserted | |
472 | */ | |
473 | 1028 | public void verdict(boolean positiveOnly, float maxPercentageGaps) |
474 | { | |
475 | // TODO call this at the end of calculate(), should not be a public method | |
476 | ||
477 | 1028 | StringBuilder consString = new StringBuilder(end); |
478 | ||
479 | // NOTE THIS SHOULD CHECK IF THE CONSEQUENCE ALREADY | |
480 | // EXISTS AND NOT OVERWRITE WITH '-', BUT THIS CASE | |
481 | // DOES NOT EXIST IN JALVIEW 2.1.2 | |
482 | 1118 | for (int i = 0; i < start; i++) |
483 | { | |
484 | 90 | consString.append('-'); |
485 | } | |
486 | 1028 | consSymbs = new String[end - start + 1]; |
487 | 145892 | for (int i = start; i <= end; i++) |
488 | { | |
489 | 144868 | int[] gapcons = countConservationAndGaps(i); |
490 | 144852 | boolean fullyConserved = gapcons[0] == 1; |
491 | 144867 | int totGaps = gapcons[1]; |
492 | 144878 | float pgaps = (totGaps * 100f) / sequences.length; |
493 | ||
494 | 144873 | if (maxPercentageGaps > pgaps) |
495 | { | |
496 | 102266 | Map<String, Integer> resultHash = total[i - start]; |
497 | 102265 | int count = 0; |
498 | 102262 | StringBuilder positives = new StringBuilder(64); |
499 | 102261 | StringBuilder negatives = new StringBuilder(32); |
500 | 102263 | for (String type : resultHash.keySet()) |
501 | { | |
502 | 1021511 | int result = resultHash.get(type).intValue(); |
503 | 1022382 | if (result == -1) |
504 | { | |
505 | /* | |
506 | * not conserved (present or absent) | |
507 | */ | |
508 | 469592 | continue; |
509 | } | |
510 | 552705 | count++; |
511 | 552816 | if (result == 1) |
512 | { | |
513 | /* | |
514 | * positively conserved property (all residues have it) | |
515 | */ | |
516 | 173612 | positives.append(positives.length() == 0 ? "" : " "); |
517 | 173607 | positives.append(type); |
518 | } | |
519 | 552758 | if (result == 0 && !positiveOnly) |
520 | { | |
521 | /* | |
522 | * absense of property is conserved (all residues lack it) | |
523 | */ | |
524 | 379209 | negatives.append(negatives.length() == 0 ? "" : " "); |
525 | 379212 | negatives.append("!").append(type); |
526 | } | |
527 | } | |
528 | 102264 | if (negatives.length() > 0) |
529 | { | |
530 | 60330 | positives.append(" ").append(negatives); |
531 | } | |
532 | 102266 | consSymbs[i - start] = positives.toString(); |
533 | ||
534 | 102270 | if (count < 10) |
535 | { | |
536 | 69052 | consString.append(count); // Conserved props!=Identity |
537 | } | |
538 | else | |
539 | { | |
540 | 33216 | consString.append(fullyConserved ? "*" : "+"); |
541 | } | |
542 | } | |
543 | else | |
544 | { | |
545 | 42610 | consString.append('-'); |
546 | } | |
547 | } | |
548 | ||
549 | 1028 | consSequence = new Sequence(name, consString.toString(), start, end); |
550 | } | |
551 | ||
552 | /** | |
553 | * | |
554 | * | |
555 | * @return Conservation sequence | |
556 | */ | |
557 | 1047 | public SequenceI getConsSequence() |
558 | { | |
559 | 1047 | return consSequence; |
560 | } | |
561 | ||
562 | // From Alignment.java in jalview118 | |
563 | 1016 | public void findQuality() |
564 | { | |
565 | 1016 | findQuality(0, maxLength - 1, ScoreModels.getInstance().getBlosum62()); |
566 | } | |
567 | ||
568 | /** | |
569 | * DOCUMENT ME! | |
570 | * | |
571 | * @param sm | |
572 | */ | |
573 | 1016 | private void percentIdentity(ScoreMatrix sm) |
574 | { | |
575 | 1016 | seqNums = new Vector<>(); |
576 | 1016 | int i = 0, iSize = sequences.length; |
577 | // Do we need to calculate this again? | |
578 | 12526 | for (i = 0; i < iSize; i++) |
579 | { | |
580 | 11510 | calcSeqNum(i, sm); |
581 | } | |
582 | ||
583 | 1016 | if ((cons2 == null) || seqNumsChanged) |
584 | { | |
585 | // FIXME remove magic number 24 without changing calc | |
586 | // sm.getSize() returns 25 so doesn't quite do it... | |
587 | 1016 | cons2 = new int[maxLength][24]; |
588 | 1016 | cons2GapCounts = new int[maxLength]; |
589 | ||
590 | 1016 | int j = 0; |
591 | ||
592 | 12526 | while (j < sequences.length) |
593 | { | |
594 | 11510 | int[] sqnum = seqNums.elementAt(j); |
595 | ||
596 | 1907701 | for (i = 1; i < sqnum.length; i++) |
597 | { | |
598 | 1895977 | int index = sqnum[i]; |
599 | 1897275 | if (index == GAP_INDEX) |
600 | { | |
601 | 423441 | cons2GapCounts[i - 1]++; |
602 | } | |
603 | else | |
604 | { | |
605 | 1473827 | cons2[i - 1][index]++; |
606 | } | |
607 | } | |
608 | ||
609 | // TODO should this start from sqnum.length? | |
610 | 26005 | for (i = sqnum.length - 1; i < maxLength; i++) |
611 | { | |
612 | 14495 | cons2GapCounts[i]++; |
613 | } | |
614 | 11510 | j++; |
615 | } | |
616 | } | |
617 | } | |
618 | ||
619 | /** | |
620 | * Calculates the quality of the set of sequences over the given inclusive | |
621 | * column range, using the specified substitution score matrix | |
622 | * | |
623 | * @param startCol | |
624 | * @param endCol | |
625 | * @param scoreMatrix | |
626 | */ | |
627 | 1016 | protected void findQuality(int startCol, int endCol, |
628 | ScoreMatrix scoreMatrix) | |
629 | { | |
630 | 1016 | quality = new Vector<>(); |
631 | ||
632 | 1016 | double max = -Double.MAX_VALUE; |
633 | 1016 | float[][] scores = scoreMatrix.getMatrix(); |
634 | ||
635 | 1016 | percentIdentity(scoreMatrix); |
636 | ||
637 | 1016 | int size = seqNums.size(); |
638 | 1016 | int[] lengths = new int[size]; |
639 | ||
640 | 12526 | for (int l = 0; l < size; l++) |
641 | { | |
642 | 11510 | lengths[l] = seqNums.elementAt(l).length - 1; |
643 | } | |
644 | ||
645 | 1016 | final int symbolCount = scoreMatrix.getSize(); |
646 | ||
647 | 145014 | for (int j = startCol; j <= endCol; j++) |
648 | { | |
649 | 143999 | double bigtot = 0; |
650 | ||
651 | // First Xr = depends on column only | |
652 | 144000 | double[] x = new double[symbolCount]; |
653 | ||
654 | 3592572 | for (int ii = 0; ii < symbolCount; ii++) |
655 | { | |
656 | 3449214 | x[ii] = 0; |
657 | ||
658 | /* | |
659 | * todo JAL-728 currently assuming last symbol in matrix is * for gap | |
660 | * (which we ignore as counted separately); true for BLOSUM62 but may | |
661 | * not be once alternative matrices are supported | |
662 | */ | |
663 | 81673758 | for (int i2 = 0; i2 < symbolCount - 1; i2++) |
664 | { | |
665 | 78193696 | x[ii] += (((double) cons2[j][i2] * scores[ii][i2]) + 4D); |
666 | } | |
667 | 3450202 | x[ii] += 4D + cons2GapCounts[j] * scoreMatrix.getMinimumScore(); |
668 | ||
669 | 3450649 | x[ii] /= size; |
670 | } | |
671 | ||
672 | // Now calculate D for each position and sum | |
673 | 2055234 | for (int k = 0; k < size; k++) |
674 | { | |
675 | 1911305 | double tot = 0; |
676 | 1911572 | double[] xx = new double[symbolCount]; |
677 | // sequence character index, or implied gap if sequence too short | |
678 | 1912176 | int seqNum = (j < lengths[k]) ? seqNums.elementAt(k)[j + 1] |
679 | : GAP_INDEX; | |
680 | ||
681 | 45194120 | for (int i = 0; i < symbolCount - 1; i++) |
682 | { | |
683 | 43407983 | double sr = 4D; |
684 | 43338237 | if (seqNum == GAP_INDEX) |
685 | { | |
686 | 10055329 | sr += scoreMatrix.getMinimumScore(); |
687 | } | |
688 | else | |
689 | { | |
690 | 33486548 | sr += scores[i][seqNum]; |
691 | } | |
692 | ||
693 | 43493526 | xx[i] = x[i] - sr; |
694 | ||
695 | 43494121 | tot += (xx[i] * xx[i]); |
696 | } | |
697 | ||
698 | 1911198 | bigtot += Math.sqrt(tot); |
699 | } | |
700 | ||
701 | 143995 | max = Math.max(max, bigtot); |
702 | ||
703 | 143994 | quality.addElement(Double.valueOf(bigtot)); |
704 | } | |
705 | ||
706 | 1016 | double newmax = -Double.MAX_VALUE; |
707 | ||
708 | 145015 | for (int j = startCol; j <= endCol; j++) |
709 | { | |
710 | 143998 | double tmp = quality.elementAt(j).doubleValue(); |
711 | // tmp = ((max - tmp) * (size - cons2[j][23])) / size; | |
712 | 143995 | tmp = ((max - tmp) * (size - cons2GapCounts[j])) / size; |
713 | ||
714 | // jalview.bin.Console.outPrintln(tmp+ " " + j); | |
715 | 143994 | quality.setElementAt(Double.valueOf(tmp), j); |
716 | ||
717 | 144000 | if (tmp > newmax) |
718 | { | |
719 | 2789 | newmax = tmp; |
720 | } | |
721 | } | |
722 | ||
723 | 1016 | qualityMinimum = 0D; |
724 | 1016 | qualityMaximum = newmax; |
725 | } | |
726 | ||
727 | /** | |
728 | * Complete the given consensus and quuality annotation rows. Note: currently | |
729 | * this method will reallocate the given annotation row if it is different to | |
730 | * the calculated width, otherwise will leave its length unchanged. | |
731 | * | |
732 | * @param conservation | |
733 | * conservation annotation row | |
734 | * @param quality2 | |
735 | * (optional - may be null) | |
736 | * @param istart | |
737 | * first column for conservation | |
738 | * @param alWidth | |
739 | * extent of conservation | |
740 | */ | |
741 | 1016 | public void completeAnnotations(AlignmentAnnotation conservation, |
742 | AlignmentAnnotation quality2, int istart, int alWidth) | |
743 | { | |
744 | 1016 | SequenceI cons = getConsSequence(); |
745 | ||
746 | /* | |
747 | * colour scale for Conservation and Quality; | |
748 | */ | |
749 | 1016 | float minR = 0.3f; |
750 | 1016 | float minG = 0.0f; |
751 | 1016 | float minB = 0f; |
752 | 1016 | float maxR = 1.0f - minR; |
753 | 1016 | float maxG = 0.9f - minG; |
754 | 1016 | float maxB = 0f - minB; |
755 | ||
756 | 1016 | float min = 0f; |
757 | 1016 | float max = 11f; |
758 | 1016 | float qmin = 0f; |
759 | 1016 | float qmax = 0f; |
760 | ||
761 | 1016 | if (conservation != null && conservation.annotations != null |
762 | && conservation.annotations.length != alWidth) | |
763 | { | |
764 | 368 | conservation.annotations = new Annotation[alWidth]; |
765 | } | |
766 | ||
767 | 1016 | if (quality2 != null) |
768 | { | |
769 | 1016 | quality2.graphMax = (float) qualityMaximum; |
770 | 1016 | if (quality2.annotations != null |
771 | && quality2.annotations.length != alWidth) | |
772 | { | |
773 | 369 | quality2.annotations = new Annotation[alWidth]; |
774 | } | |
775 | 1016 | qmin = (float) qualityMinimum; |
776 | 1016 | qmax = (float) qualityMaximum; |
777 | } | |
778 | ||
779 | 145026 | for (int i = istart; i < alWidth; i++) |
780 | { | |
781 | 144012 | float value = 0; |
782 | ||
783 | 144013 | char c = cons.getCharAt(i); |
784 | ||
785 | 144013 | if (Character.isDigit(c)) |
786 | { | |
787 | 68764 | value = c - '0'; |
788 | } | |
789 | 75250 | else if (c == '*') |
790 | { | |
791 | 32827 | value = 11; |
792 | } | |
793 | 42423 | else if (c == '+') |
794 | { | |
795 | 91 | value = 10; |
796 | } | |
797 | ||
798 | 144013 | if (conservation != null) |
799 | { | |
800 | 143856 | float vprop = value - min; |
801 | 143854 | vprop /= max; |
802 | 143857 | int consp = i - start; |
803 | 143856 | String conssym = (value > 0 && consp > -1 |
804 | && consp < consSymbs.length) ? consSymbs[consp] : ""; | |
805 | 143855 | conservation.annotations[i] = new Annotation(String.valueOf(c), |
806 | conssym, ' ', value, new Color(minR + (maxR * vprop), | |
807 | minG + (maxG * vprop), minB + (maxB * vprop))); | |
808 | } | |
809 | ||
810 | // Quality calc | |
811 | 144013 | if (quality2 != null) |
812 | { | |
813 | 144011 | value = quality.elementAt(i).floatValue(); |
814 | 144008 | float vprop = value - qmin; |
815 | 144011 | vprop /= qmax; |
816 | 144010 | String description = FORMAT_3DP.form(value); |
817 | 144012 | quality2.annotations[i] = new Annotation(" ", description, ' ', |
818 | value, new Color(minR + (maxR * vprop), | |
819 | minG + (maxG * vprop), minB + (maxB * vprop))); | |
820 | } | |
821 | } | |
822 | } | |
823 | ||
824 | /** | |
825 | * construct and call the calculation methods on a new Conservation object | |
826 | * | |
827 | * @param name | |
828 | * - name of conservation | |
829 | * @param seqs | |
830 | * @param start | |
831 | * first column in calculation window | |
832 | * @param end | |
833 | * last column in calculation window | |
834 | * @param positiveOnly | |
835 | * calculate positive (true) or positive and negative (false) | |
836 | * conservation | |
837 | * @param maxPercentGaps | |
838 | * percentage of gaps tolerated in column | |
839 | * @param calcQuality | |
840 | * flag indicating if alignment quality should be calculated | |
841 | * @return Conservation object ready for use in visualization | |
842 | */ | |
843 | 1019 | public static Conservation calculateConservation(String name, |
844 | List<SequenceI> seqs, int start, int end, boolean positiveOnly, | |
845 | int maxPercentGaps, boolean calcQuality) | |
846 | { | |
847 | 1019 | Conservation cons = new Conservation(name, seqs, start, end); |
848 | 1019 | cons.calculate(); |
849 | 1019 | cons.verdict(positiveOnly, maxPercentGaps); |
850 | ||
851 | 1019 | if (calcQuality) |
852 | { | |
853 | 1016 | cons.findQuality(); |
854 | } | |
855 | ||
856 | 1019 | return cons; |
857 | } | |
858 | ||
859 | /** | |
860 | * Returns the computed tooltip (annotation description) for a given column. | |
861 | * The tip is empty if the conservation score is zero, otherwise holds the | |
862 | * conserved properties (and, optionally, properties whose absence is | |
863 | * conserved). | |
864 | * | |
865 | * @param column | |
866 | * @return | |
867 | */ | |
868 | 7 | String getTooltip(int column) |
869 | { | |
870 | 7 | SequenceI cons = getConsSequence(); |
871 | 7 | char val = column < cons.getLength() ? cons.getCharAt(column) : '-'; |
872 | 7 | boolean hasConservation = val != '-' && val != '0'; |
873 | 7 | int consp = column - start; |
874 | 7 | String tip = (hasConservation && consp > -1 && consp < consSymbs.length) |
875 | ? consSymbs[consp] | |
876 | : ""; | |
877 | 7 | return tip; |
878 | } | |
879 | } |