Class |
Line # |
Actions |
|||
---|---|---|---|---|---|
AlignmentSorter | 55 | 239 | 100 |
1 | /* | |
2 | * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) | |
3 | * Copyright (C) $$Year-Rel$$ The Jalview Authors | |
4 | * | |
5 | * This file is part of Jalview. | |
6 | * | |
7 | * Jalview is free software: you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation, either version 3 | |
10 | * of the License, or (at your option) any later version. | |
11 | * | |
12 | * Jalview is distributed in the hope that it will be useful, but | |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty | |
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR | |
15 | * PURPOSE. See the GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with Jalview. If not, see <http://www.gnu.org/licenses/>. | |
19 | * The Jalview Authors are detailed in the 'AUTHORS' file. | |
20 | */ | |
21 | package jalview.analysis; | |
22 | ||
23 | import jalview.analysis.scoremodels.PIDModel; | |
24 | import jalview.analysis.scoremodels.SimilarityParams; | |
25 | import jalview.datamodel.AlignmentAnnotation; | |
26 | import jalview.datamodel.AlignmentI; | |
27 | import jalview.datamodel.AlignmentOrder; | |
28 | import jalview.datamodel.BinaryNode; | |
29 | import jalview.datamodel.SequenceFeature; | |
30 | import jalview.datamodel.SequenceGroup; | |
31 | import jalview.datamodel.SequenceI; | |
32 | import jalview.datamodel.SequenceNode; | |
33 | import jalview.util.QuickSort; | |
34 | ||
35 | import java.util.ArrayList; | |
36 | import java.util.Collections; | |
37 | import java.util.Iterator; | |
38 | import java.util.List; | |
39 | ||
40 | /** | |
41 | * Routines for manipulating the order of a multiple sequence alignment TODO: | |
42 | * this class retains some global states concerning sort-order which should be | |
43 | * made attributes for the caller's alignment visualization. TODO: refactor to | |
44 | * allow a subset of selected sequences to be sorted within the context of a | |
45 | * whole alignment. Sort method template is: SequenceI[] tobesorted, [ input | |
46 | * data mapping to each tobesorted element to use ], Alignment context of | |
47 | * tobesorted that are to be re-ordered, boolean sortinplace, [special data - ie | |
48 | * seuqence to be sorted w.r.t.]) sortinplace implies that the sorted vector | |
49 | * resulting from applying the operation to tobesorted should be mapped back to | |
50 | * the original positions in alignment. Otherwise, normal behaviour is to re | |
51 | * order alignment so that tobesorted is sorted and grouped together starting | |
52 | * from the first tobesorted position in the alignment. e.g. (a,tb2,b,tb1,c,tb3 | |
53 | * becomes a,tb1,tb2,tb3,b,c) | |
54 | */ | |
55 | public class AlignmentSorter | |
56 | { | |
57 | /* | |
58 | * todo: refactor searches to follow a basic pattern: (search property, last | |
59 | * search state, current sort direction) | |
60 | */ | |
61 | static boolean sortIdAscending = true; | |
62 | ||
63 | static int lastGroupHash = 0; | |
64 | ||
65 | static boolean sortGroupAscending = true; | |
66 | ||
67 | static AlignmentOrder lastOrder = null; | |
68 | ||
69 | static boolean sortOrderAscending = true; | |
70 | ||
71 | static TreeModel lastTree = null; | |
72 | ||
73 | static boolean sortTreeAscending = true; | |
74 | ||
75 | /* | |
76 | * last Annotation Label used for sort by Annotation score | |
77 | */ | |
78 | private static String lastSortByAnnotation; | |
79 | ||
80 | /* | |
81 | * string hash of last arguments to sortByFeature | |
82 | * (sort order toggles if this is unchanged between sorts) | |
83 | */ | |
84 | private static String sortByFeatureCriteria; | |
85 | ||
86 | private static boolean sortByFeatureAscending = true; | |
87 | ||
88 | private static boolean sortLengthAscending; | |
89 | ||
90 | /** | |
91 | * Sorts sequences in the alignment by Percentage Identity with the given | |
92 | * reference sequence, sorting the highest identity to the top | |
93 | * | |
94 | * @param align | |
95 | * AlignmentI | |
96 | * @param s | |
97 | * SequenceI | |
98 | * @param end | |
99 | */ | |
100 | 0 | public static void sortByPID(AlignmentI align, SequenceI s) |
101 | { | |
102 | 0 | int nSeq = align.getHeight(); |
103 | ||
104 | 0 | float[] scores = new float[nSeq]; |
105 | 0 | SequenceI[] seqs = new SequenceI[nSeq]; |
106 | 0 | String refSeq = s.getSequenceAsString(); |
107 | ||
108 | 0 | SimilarityParams pidParams = new SimilarityParams(true, true, true, |
109 | true); | |
110 | 0 | for (int i = 0; i < nSeq; i++) |
111 | { | |
112 | 0 | scores[i] = (float) PIDModel.computePID( |
113 | align.getSequenceAt(i).getSequenceAsString(), refSeq, | |
114 | pidParams); | |
115 | 0 | seqs[i] = align.getSequenceAt(i); |
116 | } | |
117 | ||
118 | 0 | QuickSort.sort(scores, seqs); |
119 | ||
120 | 0 | setReverseOrder(align, seqs); |
121 | } | |
122 | ||
123 | /** | |
124 | * Reverse the order of the sort | |
125 | * | |
126 | * @param align | |
127 | * DOCUMENT ME! | |
128 | * @param seqs | |
129 | * DOCUMENT ME! | |
130 | */ | |
131 | 2 | private static void setReverseOrder(AlignmentI align, SequenceI[] seqs) |
132 | { | |
133 | 2 | int nSeq = seqs.length; |
134 | ||
135 | 2 | int len = 0; |
136 | ||
137 | 2 | if ((nSeq % 2) == 0) |
138 | { | |
139 | 0 | len = nSeq / 2; |
140 | } | |
141 | else | |
142 | { | |
143 | 2 | len = (nSeq + 1) / 2; |
144 | } | |
145 | ||
146 | // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work | |
147 | 2 | List<SequenceI> asq = align.getSequences(); |
148 | 2 | synchronized (asq) |
149 | { | |
150 | 36 | for (int i = 0; i < len; i++) |
151 | { | |
152 | // SequenceI tmp = seqs[i]; | |
153 | 34 | asq.set(i, seqs[nSeq - i - 1]); |
154 | 34 | asq.set(nSeq - i - 1, seqs[i]); |
155 | } | |
156 | } | |
157 | } | |
158 | ||
159 | /** | |
160 | * Sets the Alignment object with the given sequences | |
161 | * | |
162 | * @param align | |
163 | * Alignment object to be updated | |
164 | * @param tmp | |
165 | * sequences as a vector | |
166 | */ | |
167 | 3 | private static void setOrder(AlignmentI align, List<SequenceI> tmp) |
168 | { | |
169 | 3 | setOrder(align, vectorSubsetToArray(tmp, align.getSequences())); |
170 | } | |
171 | ||
172 | /** | |
173 | * Sets the Alignment object with the given sequences | |
174 | * | |
175 | * @param align | |
176 | * DOCUMENT ME! | |
177 | * @param seqs | |
178 | * sequences as an array | |
179 | */ | |
180 | 9 | public static void setOrder(AlignmentI align, SequenceI[] seqs) |
181 | { | |
182 | // NOTE: DO NOT USE align.setSequenceAt() here - it will NOT work | |
183 | 9 | List<SequenceI> algn = align.getSequences(); |
184 | 9 | synchronized (algn) |
185 | { | |
186 | 9 | List<SequenceI> tmp = new ArrayList<>(); |
187 | ||
188 | 125 | for (int i = 0; i < seqs.length; i++) |
189 | { | |
190 | 116 | if (algn.contains(seqs[i])) |
191 | { | |
192 | 116 | tmp.add(seqs[i]); |
193 | } | |
194 | } | |
195 | ||
196 | 9 | algn.clear(); |
197 | // User may have hidden seqs, then clicked undo or redo | |
198 | 125 | for (int i = 0; i < tmp.size(); i++) |
199 | { | |
200 | 116 | algn.add(tmp.get(i)); |
201 | } | |
202 | } | |
203 | } | |
204 | ||
205 | /** | |
206 | * Sorts by ID. Numbers are sorted before letters. | |
207 | * | |
208 | * @param align | |
209 | * The alignment object to sort | |
210 | */ | |
211 | 0 | public static void sortByID(AlignmentI align) |
212 | { | |
213 | 0 | int nSeq = align.getHeight(); |
214 | ||
215 | 0 | String[] ids = new String[nSeq]; |
216 | 0 | SequenceI[] seqs = new SequenceI[nSeq]; |
217 | ||
218 | 0 | for (int i = 0; i < nSeq; i++) |
219 | { | |
220 | 0 | ids[i] = align.getSequenceAt(i).getName(); |
221 | 0 | seqs[i] = align.getSequenceAt(i); |
222 | } | |
223 | ||
224 | 0 | QuickSort.sort(ids, seqs); |
225 | ||
226 | 0 | if (sortIdAscending) |
227 | { | |
228 | 0 | setReverseOrder(align, seqs); |
229 | } | |
230 | else | |
231 | { | |
232 | 0 | setOrder(align, seqs); |
233 | } | |
234 | ||
235 | 0 | sortIdAscending = !sortIdAscending; |
236 | } | |
237 | ||
238 | /** | |
239 | * Sorts by sequence length | |
240 | * | |
241 | * @param align | |
242 | * The alignment object to sort | |
243 | */ | |
244 | 0 | public static void sortByLength(AlignmentI align) |
245 | { | |
246 | 0 | int nSeq = align.getHeight(); |
247 | ||
248 | 0 | float[] length = new float[nSeq]; |
249 | 0 | SequenceI[] seqs = new SequenceI[nSeq]; |
250 | ||
251 | 0 | for (int i = 0; i < nSeq; i++) |
252 | { | |
253 | 0 | seqs[i] = align.getSequenceAt(i); |
254 | 0 | length[i] = (seqs[i].getEnd() - seqs[i].getStart()); |
255 | } | |
256 | ||
257 | 0 | QuickSort.sort(length, seqs); |
258 | ||
259 | 0 | if (sortLengthAscending) |
260 | { | |
261 | 0 | setReverseOrder(align, seqs); |
262 | } | |
263 | else | |
264 | { | |
265 | 0 | setOrder(align, seqs); |
266 | } | |
267 | ||
268 | 0 | sortLengthAscending = !sortLengthAscending; |
269 | } | |
270 | ||
271 | /** | |
272 | * Sorts the alignment by size of group. <br> | |
273 | * Maintains the order of sequences in each group by order in given alignment | |
274 | * object. | |
275 | * | |
276 | * @param align | |
277 | * sorts the given alignment object by group | |
278 | */ | |
279 | 4 | public static void sortByGroup(AlignmentI align) |
280 | { | |
281 | // MAINTAINS ORIGNAL SEQUENCE ORDER, | |
282 | // ORDERS BY GROUP SIZE | |
283 | 4 | List<SequenceGroup> groups = new ArrayList<>(); |
284 | ||
285 | 4 | if (groups.hashCode() != lastGroupHash) |
286 | { | |
287 | 1 | sortGroupAscending = true; |
288 | 1 | lastGroupHash = groups.hashCode(); |
289 | } | |
290 | else | |
291 | { | |
292 | 3 | sortGroupAscending = !sortGroupAscending; |
293 | } | |
294 | ||
295 | // SORTS GROUPS BY SIZE | |
296 | // //////////////////// | |
297 | 4 | for (SequenceGroup sg : align.getGroups()) |
298 | { | |
299 | 44 | for (int j = 0; j < groups.size(); j++) |
300 | { | |
301 | 32 | SequenceGroup sg2 = groups.get(j); |
302 | ||
303 | 32 | if (sg.getSize() > sg2.getSize()) |
304 | { | |
305 | 8 | groups.add(j, sg); |
306 | ||
307 | 8 | break; |
308 | } | |
309 | } | |
310 | ||
311 | 20 | if (!groups.contains(sg)) |
312 | { | |
313 | 12 | groups.add(sg); |
314 | } | |
315 | } | |
316 | ||
317 | // NOW ADD SEQUENCES MAINTAINING ALIGNMENT ORDER | |
318 | // ///////////////////////////////////////////// | |
319 | 4 | List<SequenceI> seqs = new ArrayList<>(); |
320 | ||
321 | 24 | for (int i = 0; i < groups.size(); i++) |
322 | { | |
323 | 20 | SequenceGroup sg = groups.get(i); |
324 | 20 | SequenceI[] orderedseqs = sg.getSequencesInOrder(align); |
325 | ||
326 | 152 | for (int j = 0; j < orderedseqs.length; j++) |
327 | { | |
328 | 132 | seqs.add(orderedseqs[j]); |
329 | } | |
330 | } | |
331 | ||
332 | 4 | if (sortGroupAscending) |
333 | { | |
334 | 2 | setOrder(align, seqs); |
335 | } | |
336 | else | |
337 | { | |
338 | 2 | setReverseOrder(align, |
339 | vectorSubsetToArray(seqs, align.getSequences())); | |
340 | } | |
341 | } | |
342 | ||
343 | /** | |
344 | * Select sequences in order from tmp that is present in mask, and any | |
345 | * remaining sequences in mask not in tmp | |
346 | * | |
347 | * @param tmp | |
348 | * thread safe collection of sequences | |
349 | * @param mask | |
350 | * thread safe collection of sequences | |
351 | * | |
352 | * @return intersect(tmp,mask)+intersect(complement(tmp),mask) | |
353 | */ | |
354 | 5 | private static SequenceI[] vectorSubsetToArray(List<SequenceI> tmp, |
355 | List<SequenceI> mask) | |
356 | { | |
357 | // or? | |
358 | // tmp2 = tmp.retainAll(mask); | |
359 | // return tmp2.addAll(mask.removeAll(tmp2)) | |
360 | ||
361 | 5 | ArrayList<SequenceI> seqs = new ArrayList<>(); |
362 | 5 | int i, idx; |
363 | 5 | boolean[] tmask = new boolean[mask.size()]; |
364 | ||
365 | 152 | for (i = 0; i < mask.size(); i++) |
366 | { | |
367 | 147 | tmask[i] = true; |
368 | } | |
369 | ||
370 | 152 | for (i = 0; i < tmp.size(); i++) |
371 | { | |
372 | 147 | SequenceI sq = tmp.get(i); |
373 | 147 | idx = mask.indexOf(sq); |
374 | 147 | if (idx > -1 && tmask[idx]) |
375 | { | |
376 | 147 | tmask[idx] = false; |
377 | 147 | seqs.add(sq); |
378 | } | |
379 | } | |
380 | ||
381 | 152 | for (i = 0; i < tmask.length; i++) |
382 | { | |
383 | 147 | if (tmask[i]) |
384 | { | |
385 | 0 | seqs.add(mask.get(i)); |
386 | } | |
387 | } | |
388 | ||
389 | 5 | return seqs.toArray(new SequenceI[seqs.size()]); |
390 | } | |
391 | ||
392 | /** | |
393 | * Sorts by a given AlignmentOrder object | |
394 | * | |
395 | * @param align | |
396 | * Alignment to order | |
397 | * @param order | |
398 | * specified order for alignment | |
399 | */ | |
400 | 0 | public static void sortBy(AlignmentI align, AlignmentOrder order) |
401 | { | |
402 | // Get an ordered vector of sequences which may also be present in align | |
403 | 0 | List<SequenceI> tmp = order.getOrder(); |
404 | ||
405 | 0 | if (lastOrder == order) |
406 | { | |
407 | 0 | sortOrderAscending = !sortOrderAscending; |
408 | } | |
409 | else | |
410 | { | |
411 | 0 | sortOrderAscending = true; |
412 | } | |
413 | ||
414 | 0 | if (sortOrderAscending) |
415 | { | |
416 | 0 | setOrder(align, tmp); |
417 | } | |
418 | else | |
419 | { | |
420 | 0 | setReverseOrder(align, |
421 | vectorSubsetToArray(tmp, align.getSequences())); | |
422 | } | |
423 | } | |
424 | ||
425 | /** | |
426 | * DOCUMENT ME! | |
427 | * | |
428 | * @param align | |
429 | * alignment to order | |
430 | * @param tree | |
431 | * tree which has | |
432 | * | |
433 | * @return DOCUMENT ME! | |
434 | */ | |
435 | 1 | private static List<SequenceI> getOrderByTree(AlignmentI align, |
436 | TreeModel tree) | |
437 | { | |
438 | 1 | int nSeq = align.getHeight(); |
439 | ||
440 | 1 | List<SequenceI> tmp = new ArrayList<>(); |
441 | ||
442 | 1 | tmp = _sortByTree(tree.getTopNode(), tmp, align.getSequences()); |
443 | ||
444 | 1 | if (tmp.size() != nSeq) |
445 | { | |
446 | // TODO: JBPNote - decide if this is always an error | |
447 | // (eg. not when a tree is associated to another alignment which has more | |
448 | // sequences) | |
449 | 0 | if (tmp.size() != nSeq) |
450 | { | |
451 | 0 | addStrays(align, tmp); |
452 | } | |
453 | ||
454 | 0 | if (tmp.size() != nSeq) |
455 | { | |
456 | 0 | jalview.bin.Console.errPrintln("WARNING: tmp.size()=" + tmp.size() |
457 | + " != nseq=" + nSeq | |
458 | + " in getOrderByTree - tree contains sequences not in alignment"); | |
459 | } | |
460 | } | |
461 | ||
462 | 1 | return tmp; |
463 | } | |
464 | ||
465 | /** | |
466 | * Sorts the alignment by a given tree | |
467 | * | |
468 | * @param align | |
469 | * alignment to order | |
470 | * @param tree | |
471 | * tree which has | |
472 | */ | |
473 | 1 | public static void sortByTree(AlignmentI align, TreeModel tree) |
474 | { | |
475 | 1 | List<SequenceI> tmp = getOrderByTree(align, tree); |
476 | ||
477 | // tmp should properly permute align with tree. | |
478 | 1 | if (lastTree != tree) |
479 | { | |
480 | 1 | sortTreeAscending = true; |
481 | 1 | lastTree = tree; |
482 | } | |
483 | else | |
484 | { | |
485 | 0 | sortTreeAscending = !sortTreeAscending; |
486 | } | |
487 | ||
488 | 1 | if (sortTreeAscending) |
489 | { | |
490 | 1 | setOrder(align, tmp); |
491 | } | |
492 | else | |
493 | { | |
494 | 0 | setReverseOrder(align, |
495 | vectorSubsetToArray(tmp, align.getSequences())); | |
496 | } | |
497 | } | |
498 | ||
499 | /** | |
500 | * DOCUMENT ME! | |
501 | * | |
502 | * @param align | |
503 | * DOCUMENT ME! | |
504 | * @param tmp | |
505 | * DOCUMENT ME! | |
506 | */ | |
507 | 0 | private static void addStrays(AlignmentI align, List<SequenceI> tmp) |
508 | { | |
509 | 0 | int nSeq = align.getHeight(); |
510 | ||
511 | 0 | for (int i = 0; i < nSeq; i++) |
512 | { | |
513 | 0 | if (!tmp.contains(align.getSequenceAt(i))) |
514 | { | |
515 | 0 | tmp.add(align.getSequenceAt(i)); |
516 | } | |
517 | } | |
518 | ||
519 | 0 | if (nSeq != tmp.size()) |
520 | { | |
521 | 0 | System.err |
522 | .println("ERROR: Size still not right even after addStrays"); | |
523 | } | |
524 | } | |
525 | ||
526 | /** | |
527 | * DOCUMENT ME! | |
528 | * | |
529 | * @param node | |
530 | * DOCUMENT ME! | |
531 | * @param tmp | |
532 | * DOCUMENT ME! | |
533 | * @param seqset | |
534 | * DOCUMENT ME! | |
535 | * | |
536 | * @return DOCUMENT ME! | |
537 | */ | |
538 | 29 | private static List<SequenceI> _sortByTree(BinaryNode node, |
539 | List<SequenceI> tmp, List<SequenceI> seqset) | |
540 | { | |
541 | 29 | if (node == null) |
542 | { | |
543 | 0 | return tmp; |
544 | } | |
545 | ||
546 | 29 | BinaryNode left = (BinaryNode) node.left(); |
547 | 29 | BinaryNode right = (BinaryNode) node.right(); |
548 | ||
549 | 29 | if ((left == null) && (right == null)) |
550 | { | |
551 | 15 | if (!(node instanceof SequenceNode |
552 | && ((SequenceNode) node).isPlaceholder()) | |
553 | && (node.element() != null)) | |
554 | { | |
555 | 15 | if (node.element() instanceof SequenceI) |
556 | { | |
557 | 15 | if (!tmp.contains(node.element())) // && (seqset==null || |
558 | // seqset.size()==0 || | |
559 | // seqset.contains(tmp))) | |
560 | { | |
561 | 15 | tmp.add((SequenceI) node.element()); |
562 | } | |
563 | } | |
564 | } | |
565 | ||
566 | 15 | return tmp; |
567 | } | |
568 | else | |
569 | { | |
570 | 14 | _sortByTree(left, tmp, seqset); |
571 | 14 | _sortByTree(right, tmp, seqset); |
572 | } | |
573 | ||
574 | 14 | return tmp; |
575 | } | |
576 | ||
577 | // Ordering Objects | |
578 | // Alignment.sortBy(OrderObj) - sequence of sequence pointer refs in | |
579 | // appropriate order | |
580 | // | |
581 | ||
582 | /** | |
583 | * recover the order of sequences given by the safe numbering scheme introducd | |
584 | * SeqsetUtils.uniquify. | |
585 | */ | |
586 | 0 | public static void recoverOrder(SequenceI[] alignment) |
587 | { | |
588 | 0 | float[] ids = new float[alignment.length]; |
589 | ||
590 | 0 | for (int i = 0; i < alignment.length; i++) |
591 | { | |
592 | 0 | ids[i] = (Float.valueOf(alignment[i].getName().substring(8))) |
593 | .floatValue(); | |
594 | } | |
595 | ||
596 | 0 | jalview.util.QuickSort.sort(ids, alignment); |
597 | } | |
598 | ||
599 | /** | |
600 | * Sort sequence in order of increasing score attribute for annotation with a | |
601 | * particular scoreLabel. Or reverse if same label was used previously | |
602 | * | |
603 | * @param scoreLabel | |
604 | * exact label for sequence associated AlignmentAnnotation scores to | |
605 | * use for sorting. | |
606 | * @param alignment | |
607 | * sequences to be sorted | |
608 | */ | |
609 | 0 | public static void sortByAnnotationScore(String scoreLabel, |
610 | AlignmentI alignment) | |
611 | { | |
612 | 0 | SequenceI[] seqs = alignment.getSequencesArray(); |
613 | 0 | boolean[] hasScore = new boolean[seqs.length]; // per sequence score |
614 | // presence | |
615 | 0 | int hasScores = 0; // number of scores present on set |
616 | 0 | double[] scores = new double[seqs.length]; |
617 | 0 | double min = 0, max = 0; |
618 | 0 | for (int i = 0; i < seqs.length; i++) |
619 | { | |
620 | 0 | AlignmentAnnotation[] scoreAnn = seqs[i].getAnnotation(scoreLabel); |
621 | 0 | if (scoreAnn != null) |
622 | { | |
623 | 0 | hasScores++; |
624 | 0 | hasScore[i] = true; |
625 | 0 | scores[i] = scoreAnn[0].getScore(); // take the first instance of this |
626 | // score. | |
627 | 0 | if (hasScores == 1) |
628 | { | |
629 | 0 | max = min = scores[i]; |
630 | } | |
631 | else | |
632 | { | |
633 | 0 | if (max < scores[i]) |
634 | { | |
635 | 0 | max = scores[i]; |
636 | } | |
637 | 0 | if (min > scores[i]) |
638 | { | |
639 | 0 | min = scores[i]; |
640 | } | |
641 | } | |
642 | } | |
643 | else | |
644 | { | |
645 | 0 | hasScore[i] = false; |
646 | } | |
647 | } | |
648 | 0 | if (hasScores == 0) |
649 | { | |
650 | 0 | return; // do nothing - no scores present to sort by. |
651 | } | |
652 | 0 | if (hasScores < seqs.length) |
653 | { | |
654 | 0 | for (int i = 0; i < seqs.length; i++) |
655 | { | |
656 | 0 | if (!hasScore[i]) |
657 | { | |
658 | 0 | scores[i] = (max + i + 1.0); |
659 | } | |
660 | } | |
661 | } | |
662 | ||
663 | 0 | jalview.util.QuickSort.sort(scores, seqs); |
664 | 0 | if (lastSortByAnnotation != scoreLabel) |
665 | { | |
666 | 0 | lastSortByAnnotation = scoreLabel; |
667 | 0 | setOrder(alignment, seqs); |
668 | } | |
669 | else | |
670 | { | |
671 | 0 | setReverseOrder(alignment, seqs); |
672 | } | |
673 | } | |
674 | ||
675 | /** | |
676 | * types of feature ordering: Sort by score : average score - or total score - | |
677 | * over all features in region Sort by feature label text: (or if null - | |
678 | * feature type text) - numerical or alphabetical Sort by feature density: | |
679 | * based on counts - ignoring individual text or scores for each feature | |
680 | */ | |
681 | public static String FEATURE_SCORE = "average_score"; | |
682 | ||
683 | public static String FEATURE_LABEL = "text"; | |
684 | ||
685 | public static String FEATURE_DENSITY = "density"; | |
686 | ||
687 | /** | |
688 | * Sort sequences by feature score or density, optionally restricted by | |
689 | * feature types, feature groups, or alignment start/end positions. | |
690 | * <p> | |
691 | * If the sort is repeated for the same combination of types and groups, sort | |
692 | * order is reversed. | |
693 | * | |
694 | * @param featureTypes | |
695 | * a list of feature types to include (or null for all) | |
696 | * @param groups | |
697 | * a list of feature groups to include (or null for all) | |
698 | * @param startCol | |
699 | * start column position to include (base zero) | |
700 | * @param endCol | |
701 | * end column position to include (base zero) | |
702 | * @param alignment | |
703 | * the alignment to be sorted | |
704 | * @param method | |
705 | * either "average_score" or "density" ("text" not yet implemented) | |
706 | */ | |
707 | 6 | public static void sortByFeature(List<String> featureTypes, |
708 | List<String> groups, final int startCol, final int endCol, | |
709 | AlignmentI alignment, String method) | |
710 | { | |
711 | 6 | if (method != FEATURE_SCORE && method != FEATURE_LABEL |
712 | && method != FEATURE_DENSITY) | |
713 | { | |
714 | 0 | String msg = String.format( |
715 | "Implementation Error - sortByFeature method must be either '%s' or '%s'", | |
716 | FEATURE_SCORE, FEATURE_DENSITY); | |
717 | 0 | jalview.bin.Console.errPrintln(msg); |
718 | 0 | return; |
719 | } | |
720 | ||
721 | 6 | flipFeatureSortIfUnchanged(method, featureTypes, groups, startCol, |
722 | endCol); | |
723 | ||
724 | 6 | SequenceI[] seqs = alignment.getSequencesArray(); |
725 | ||
726 | 6 | boolean[] hasScore = new boolean[seqs.length]; // per sequence score |
727 | // presence | |
728 | 6 | int hasScores = 0; // number of scores present on set |
729 | 6 | double[] scores = new double[seqs.length]; |
730 | 6 | int[] seqScores = new int[seqs.length]; |
731 | 6 | Object[][] feats = new Object[seqs.length][]; |
732 | 6 | double min = 0d; |
733 | 6 | double max = 0d; |
734 | ||
735 | 30 | for (int i = 0; i < seqs.length; i++) |
736 | { | |
737 | /* | |
738 | * get sequence residues overlapping column region | |
739 | * and features for residue positions and specified types | |
740 | */ | |
741 | 24 | String[] types = featureTypes == null ? null |
742 | : featureTypes.toArray(new String[featureTypes.size()]); | |
743 | 24 | List<SequenceFeature> sfs = seqs[i].findFeatures(startCol + 1, |
744 | endCol + 1, types); | |
745 | ||
746 | 24 | seqScores[i] = 0; |
747 | 24 | scores[i] = 0.0; |
748 | ||
749 | 24 | Iterator<SequenceFeature> it = sfs.listIterator(); |
750 | 52 | while (it.hasNext()) |
751 | { | |
752 | 28 | SequenceFeature sf = it.next(); |
753 | ||
754 | /* | |
755 | * accept all features with null or empty group, otherwise | |
756 | * check group is one of the currently visible groups | |
757 | */ | |
758 | 28 | String featureGroup = sf.getFeatureGroup(); |
759 | 28 | if (groups != null && featureGroup != null |
760 | && !"".equals(featureGroup) | |
761 | && !groups.contains(featureGroup)) | |
762 | { | |
763 | 1 | it.remove(); |
764 | } | |
765 | else | |
766 | { | |
767 | 27 | float score = sf.getScore(); |
768 | 27 | if (FEATURE_SCORE.equals(method) && !Float.isNaN(score)) |
769 | { | |
770 | 21 | if (seqScores[i] == 0) |
771 | { | |
772 | 15 | hasScores++; |
773 | } | |
774 | 21 | seqScores[i]++; |
775 | 21 | hasScore[i] = true; |
776 | 21 | scores[i] += score; |
777 | // take the first instance of this score // ?? | |
778 | } | |
779 | } | |
780 | } | |
781 | ||
782 | 24 | feats[i] = sfs.toArray(new SequenceFeature[sfs.size()]); |
783 | 24 | if (!sfs.isEmpty()) |
784 | { | |
785 | 18 | if (method == FEATURE_LABEL) |
786 | { | |
787 | // order the labels by alphabet (not yet implemented) | |
788 | 0 | String[] labs = new String[sfs.size()]; |
789 | 0 | for (int l = 0; l < sfs.size(); l++) |
790 | { | |
791 | 0 | SequenceFeature sf = sfs.get(l); |
792 | 0 | String description = sf.getDescription(); |
793 | 0 | labs[l] = (description != null ? description : sf.getType()); |
794 | } | |
795 | 0 | QuickSort.sort(labs, feats[i]); |
796 | } | |
797 | } | |
798 | 24 | if (hasScore[i]) |
799 | { | |
800 | // compute average score | |
801 | 15 | scores[i] /= seqScores[i]; |
802 | // update the score bounds. | |
803 | 15 | if (hasScores == 1) |
804 | { | |
805 | 5 | min = scores[i]; |
806 | 5 | max = min; |
807 | } | |
808 | else | |
809 | { | |
810 | 10 | max = Math.max(max, scores[i]); |
811 | 10 | min = Math.min(min, scores[i]); |
812 | } | |
813 | } | |
814 | } | |
815 | ||
816 | 6 | if (FEATURE_SCORE.equals(method)) |
817 | { | |
818 | 6 | if (hasScores == 0) |
819 | { | |
820 | 1 | return; // do nothing - no scores present to sort by. |
821 | } | |
822 | // pad score matrix | |
823 | 5 | if (hasScores < seqs.length) |
824 | { | |
825 | 25 | for (int i = 0; i < seqs.length; i++) |
826 | { | |
827 | 20 | if (!hasScore[i]) |
828 | { | |
829 | 5 | scores[i] = (max + 1 + i); |
830 | } | |
831 | else | |
832 | { | |
833 | // int nf = (feats[i] == null) ? 0 | |
834 | // : ((SequenceFeature[]) feats[i]).length; | |
835 | // // jalview.bin.Console.errPrintln("Sorting on Score: seq " + | |
836 | // seqs[i].getName() | |
837 | // + " Feats: " + nf + " Score : " + scores[i]); | |
838 | } | |
839 | } | |
840 | } | |
841 | 5 | QuickSort.sortByDouble(scores, seqs, sortByFeatureAscending); |
842 | } | |
843 | 0 | else if (FEATURE_DENSITY.equals(method)) |
844 | { | |
845 | 0 | for (int i = 0; i < seqs.length; i++) |
846 | { | |
847 | 0 | int featureCount = feats[i] == null ? 0 |
848 | : ((SequenceFeature[]) feats[i]).length; | |
849 | 0 | scores[i] = featureCount; |
850 | // jalview.bin.Console.errPrintln("Sorting on Density: seq | |
851 | // "+seqs[i].getName()+ | |
852 | // " Feats: "+featureCount+" Score : "+scores[i]); | |
853 | } | |
854 | 0 | QuickSort.sortByDouble(scores, seqs, sortByFeatureAscending); |
855 | } | |
856 | ||
857 | 5 | setOrder(alignment, seqs); |
858 | } | |
859 | ||
860 | /** | |
861 | * Builds a string hash of criteria for sorting, and if unchanged from last | |
862 | * time, reverse the sort order | |
863 | * | |
864 | * @param method | |
865 | * @param featureTypes | |
866 | * @param groups | |
867 | * @param startCol | |
868 | * @param endCol | |
869 | */ | |
870 | 6 | protected static void flipFeatureSortIfUnchanged(String method, |
871 | List<String> featureTypes, List<String> groups, | |
872 | final int startCol, final int endCol) | |
873 | { | |
874 | 6 | StringBuilder sb = new StringBuilder(64); |
875 | 6 | sb.append(startCol).append(method).append(endCol); |
876 | 6 | if (featureTypes != null) |
877 | { | |
878 | 2 | Collections.sort(featureTypes); |
879 | 2 | sb.append(featureTypes.toString()); |
880 | } | |
881 | 6 | if (groups != null) |
882 | { | |
883 | 1 | Collections.sort(groups); |
884 | 1 | sb.append(groups.toString()); |
885 | } | |
886 | 6 | String scoreCriteria = sb.toString(); |
887 | ||
888 | /* | |
889 | * if resorting on the same criteria, toggle sort order | |
890 | */ | |
891 | 6 | if (sortByFeatureCriteria == null |
892 | || !scoreCriteria.equals(sortByFeatureCriteria)) | |
893 | { | |
894 | 4 | sortByFeatureAscending = true; |
895 | } | |
896 | else | |
897 | { | |
898 | 2 | sortByFeatureAscending = !sortByFeatureAscending; |
899 | } | |
900 | 6 | sortByFeatureCriteria = scoreCriteria; |
901 | } | |
902 | ||
903 | } |