Class | Line # | Actions | |||
---|---|---|---|---|---|
Grouping | 40 | 63 | 14 |
1 | /* | |
2 | * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) | |
3 | * Copyright (C) $$Year-Rel$$ The Jalview Authors | |
4 | * | |
5 | * This file is part of Jalview. | |
6 | * | |
7 | * Jalview is free software: you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation, either version 3 | |
10 | * of the License, or (at your option) any later version. | |
11 | * | |
12 | * Jalview is distributed in the hope that it will be useful, but | |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty | |
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR | |
15 | * PURPOSE. See the GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with Jalview. If not, see <http://www.gnu.org/licenses/>. | |
19 | * The Jalview Authors are detailed in the 'AUTHORS' file. | |
20 | */ | |
21 | package jalview.analysis; | |
22 | ||
23 | import jalview.datamodel.ColumnSelection; | |
24 | import jalview.datamodel.SequenceGroup; | |
25 | import jalview.datamodel.SequenceI; | |
26 | ||
27 | import java.util.ArrayList; | |
28 | import java.util.HashMap; | |
29 | import java.util.List; | |
30 | import java.util.Map; | |
31 | import java.util.Vector; | |
32 | ||
33 | /** | |
34 | * various methods for defining groups on an alignment based on some other | |
35 | * properties | |
36 | * | |
37 | * @author JimP | |
38 | * | |
39 | */ | |
40 | public class Grouping | |
41 | { | |
42 | /** | |
43 | * Divide the given sequences based on the equivalence of their corresponding | |
44 | * selectedChars string. If exgroups is provided, existing groups will be | |
45 | * subdivided. | |
46 | * | |
47 | * @param sequences | |
48 | * @param selectedChars | |
49 | * @param list | |
50 | * @return | |
51 | */ | |
52 | 0 | public static SequenceGroup[] makeGroupsFrom(SequenceI[] sequences, |
53 | String[] selectedChars, List<SequenceGroup> list) | |
54 | { | |
55 | // TODO: determine how to get/recover input data for group generation | |
56 | 0 | Map<String, List<SequenceI>> gps = new HashMap<String, List<SequenceI>>(); |
57 | 0 | int width = 0, i; |
58 | 0 | Map<String, SequenceGroup> pgroup = new HashMap<String, SequenceGroup>(); |
59 | 0 | if (list != null) |
60 | { | |
61 | 0 | for (SequenceGroup sg : list) |
62 | { | |
63 | 0 | for (SequenceI sq : sg.getSequences(null)) |
64 | { | |
65 | 0 | pgroup.put(sq.toString(), sg); |
66 | } | |
67 | } | |
68 | } | |
69 | 0 | for (i = 0; i < sequences.length; i++) |
70 | { | |
71 | 0 | String schar = selectedChars[i]; |
72 | 0 | SequenceGroup pgp = pgroup.get(((Object) sequences[i]).toString()); |
73 | 0 | if (pgp != null) |
74 | { | |
75 | 0 | schar = pgp.getName() + ":" + schar; |
76 | } | |
77 | 0 | List<SequenceI> svec = gps.get(schar); |
78 | 0 | if (svec == null) |
79 | { | |
80 | 0 | svec = new ArrayList<SequenceI>(); |
81 | 0 | gps.put(schar, svec); |
82 | } | |
83 | 0 | if (width < sequences[i].getLength()) |
84 | { | |
85 | 0 | width = sequences[i].getLength(); |
86 | } | |
87 | 0 | svec.add(sequences[i]); |
88 | } | |
89 | // make some groups | |
90 | 0 | SequenceGroup[] groups = new SequenceGroup[gps.size()]; |
91 | 0 | i = 0; |
92 | 0 | for (String key : gps.keySet()) |
93 | { | |
94 | 0 | SequenceGroup group = new SequenceGroup(gps.get(key), |
95 | "Subseq: " + key, null, true, true, false, 0, width - 1); | |
96 | ||
97 | 0 | groups[i++] = group; |
98 | } | |
99 | 0 | gps.clear(); |
100 | 0 | pgroup.clear(); |
101 | 0 | return groups; |
102 | } | |
103 | ||
104 | /** | |
105 | * Divide the given sequences based on the equivalence of characters at | |
106 | * selected columns If exgroups is provided, existing groups will be | |
107 | * subdivided. | |
108 | * | |
109 | * @param sequences | |
110 | * @param columnSelection | |
111 | * @param list | |
112 | * @return | |
113 | */ | |
114 | 0 | public static SequenceGroup[] makeGroupsFromCols(SequenceI[] sequences, |
115 | ColumnSelection cs, List<SequenceGroup> list) | |
116 | { | |
117 | // TODO: determine how to get/recover input data for group generation | |
118 | 0 | Map<String, List<SequenceI>> gps = new HashMap<String, List<SequenceI>>(); |
119 | 0 | Map<String, SequenceGroup> pgroup = new HashMap<String, SequenceGroup>(); |
120 | 0 | if (list != null) |
121 | { | |
122 | 0 | for (SequenceGroup sg : list) |
123 | { | |
124 | 0 | for (SequenceI sq : sg.getSequences(null)) |
125 | { | |
126 | 0 | pgroup.put(sq.toString(), sg); |
127 | } | |
128 | } | |
129 | } | |
130 | ||
131 | /* | |
132 | * get selected columns (in the order they were selected); | |
133 | * note this could include right-to-left ranges | |
134 | */ | |
135 | 0 | int[] spos = new int[cs.getSelected().size()]; |
136 | 0 | int width = -1; |
137 | 0 | int i = 0; |
138 | 0 | for (Integer pos : cs.getSelected()) |
139 | { | |
140 | 0 | spos[i++] = pos.intValue(); |
141 | } | |
142 | ||
143 | 0 | for (i = 0; i < sequences.length; i++) |
144 | { | |
145 | 0 | int slen = sequences[i].getLength(); |
146 | 0 | if (width < slen) |
147 | { | |
148 | 0 | width = slen; |
149 | } | |
150 | ||
151 | 0 | SequenceGroup pgp = pgroup.get(((Object) sequences[i]).toString()); |
152 | 0 | StringBuilder schar = new StringBuilder(); |
153 | 0 | if (pgp != null) |
154 | { | |
155 | 0 | schar.append(pgp.getName() + ":"); |
156 | } | |
157 | 0 | for (int p : spos) |
158 | { | |
159 | 0 | if (p >= slen) |
160 | { | |
161 | 0 | schar.append("~"); |
162 | } | |
163 | else | |
164 | { | |
165 | 0 | schar.append(sequences[i].getCharAt(p)); |
166 | } | |
167 | } | |
168 | 0 | List<SequenceI> svec = gps.get(schar.toString()); |
169 | 0 | if (svec == null) |
170 | { | |
171 | 0 | svec = new ArrayList<SequenceI>(); |
172 | 0 | gps.put(schar.toString(), svec); |
173 | } | |
174 | 0 | svec.add(sequences[i]); |
175 | } | |
176 | // make some groups | |
177 | 0 | SequenceGroup[] groups = new SequenceGroup[gps.size()]; |
178 | 0 | i = 0; |
179 | 0 | for (String key : gps.keySet()) |
180 | { | |
181 | 0 | SequenceGroup group = new SequenceGroup(gps.get(key), |
182 | "Subseq: " + key, null, true, true, false, 0, width - 1); | |
183 | ||
184 | 0 | groups[i++] = group; |
185 | } | |
186 | 0 | gps.clear(); |
187 | 0 | pgroup.clear(); |
188 | 0 | return groups; |
189 | } | |
190 | ||
191 | /** | |
192 | * subdivide the given sequences based on the distribution of features | |
193 | * | |
194 | * @param featureLabels | |
195 | * - null or one or more feature types to filter on. | |
196 | * @param groupLabels | |
197 | * - null or set of groups to filter features on | |
198 | * @param start | |
199 | * - range for feature filter | |
200 | * @param stop | |
201 | * - range for feature filter | |
202 | * @param sequences | |
203 | * - sequences to be divided | |
204 | * @param exgroups | |
205 | * - existing groups to be subdivided | |
206 | * @param method | |
207 | * - density, description, score | |
208 | */ | |
209 | 0 | public static void divideByFeature(String[] featureLabels, |
210 | String[] groupLabels, int start, int stop, SequenceI[] sequences, | |
211 | Vector exgroups, String method) | |
212 | { | |
213 | // TODO implement divideByFeature | |
214 | /* | |
215 | * if (method!=AlignmentSorter.FEATURE_SCORE && | |
216 | * method!=AlignmentSorter.FEATURE_LABEL && | |
217 | * method!=AlignmentSorter.FEATURE_DENSITY) { throw newError( | |
218 | * "Implementation Error - sortByFeature method must be one of FEATURE_SCORE, FEATURE_LABEL or FEATURE_DENSITY." | |
219 | * ); } boolean ignoreScore=method!=AlignmentSorter.FEATURE_SCORE; | |
220 | * StringBuffer scoreLabel = new StringBuffer(); | |
221 | * scoreLabel.append(start+stop+method); // This doesn't work yet - we'd | |
222 | * like to have a canonical ordering that can be preserved from call to call | |
223 | * for (int i=0;featureLabels!=null && i<featureLabels.length; i++) { | |
224 | * scoreLabel.append(featureLabels[i]==null ? "null" : featureLabels[i]); } | |
225 | * for (int i=0;groupLabels!=null && i<groupLabels.length; i++) { | |
226 | * scoreLabel.append(groupLabels[i]==null ? "null" : groupLabels[i]); } | |
227 | * SequenceI[] seqs = alignment.getSequencesArray(); | |
228 | * | |
229 | * boolean[] hasScore = new boolean[seqs.length]; // per sequence score // | |
230 | * presence int hasScores = 0; // number of scores present on set double[] | |
231 | * scores = new double[seqs.length]; int[] seqScores = new int[seqs.length]; | |
232 | * Object[] feats = new Object[seqs.length]; double min = 0, max = 0; for | |
233 | * (int i = 0; i < seqs.length; i++) { SequenceFeature[] sf = | |
234 | * seqs[i].getSequenceFeatures(); if (sf==null && | |
235 | * seqs[i].getDatasetSequence()!=null) { sf = | |
236 | * seqs[i].getDatasetSequence().getSequenceFeatures(); } if (sf==null) { sf | |
237 | * = new SequenceFeature[0]; } else { SequenceFeature[] tmp = new | |
238 | * SequenceFeature[sf.length]; for (int s=0; s<tmp.length;s++) { tmp[s] = | |
239 | * sf[s]; } sf = tmp; } int sstart = (start==-1) ? start : | |
240 | * seqs[i].findPosition(start); int sstop = (stop==-1) ? stop : | |
241 | * seqs[i].findPosition(stop); seqScores[i]=0; scores[i]=0.0; int | |
242 | * n=sf.length; for (int f=0;f<sf.length;f++) { // filter for selection | |
243 | * criteria if ( // ignore features outwith alignment start-stop positions. | |
244 | * (sf[f].end < sstart || sf[f].begin > sstop) || // or ignore based on | |
245 | * selection criteria (featureLabels != null && | |
246 | * !AlignmentSorter.containsIgnoreCase(sf[f].type, featureLabels)) || | |
247 | * (groupLabels != null // problem here: we cannot eliminate null feature | |
248 | * group features && (sf[f].getFeatureGroup() != null && | |
249 | * !AlignmentSorter.containsIgnoreCase(sf[f].getFeatureGroup(), | |
250 | * groupLabels)))) { // forget about this feature sf[f] = null; n--; } else | |
251 | * { // or, also take a look at the scores if necessary. if (!ignoreScore && | |
252 | * sf[f].getScore()!=Float.NaN) { if (seqScores[i]==0) { hasScores++; } | |
253 | * seqScores[i]++; hasScore[i] = true; scores[i] += sf[f].getScore(); // | |
254 | * take the first instance of this // score. } } } SequenceFeature[] fs; | |
255 | * feats[i] = fs = new SequenceFeature[n]; if (n>0) { n=0; for (int | |
256 | * f=0;f<sf.length;f++) { if (sf[f]!=null) { ((SequenceFeature[]) | |
257 | * feats[i])[n++] = sf[f]; } } if (method==FEATURE_LABEL) { // order the | |
258 | * labels by alphabet String[] labs = new String[fs.length]; for (int | |
259 | * l=0;l<labs.length; l++) { labs[l] = (fs[l].getDescription()!=null ? | |
260 | * fs[l].getDescription() : fs[l].getType()); } | |
261 | * jalview.util.QuickSort.sort(labs, ((Object[]) feats[i])); } } if | |
262 | * (hasScore[i]) { // compute average score scores[i]/=seqScores[i]; // | |
263 | * update the score bounds. if (hasScores == 1) { max = min = scores[i]; } | |
264 | * else { if (max < scores[i]) { max = scores[i]; } if (min > scores[i]) { | |
265 | * min = scores[i]; } } } } | |
266 | * | |
267 | * if (method==FEATURE_SCORE) { if (hasScores == 0) { return; // do nothing | |
268 | * - no scores present to sort by. } // pad score matrix if (hasScores < | |
269 | * seqs.length) { for (int i = 0; i < seqs.length; i++) { if (!hasScore[i]) | |
270 | * { scores[i] = (max + i); } else { int nf=(feats[i]==null) ? 0 | |
271 | * :((SequenceFeature[]) feats[i]).length; | |
272 | * jalview.bin.Console.errPrintln("Sorting on Score: seq "+seqs[i].getName()+ | |
273 | * " Feats: "+nf+" Score : "+scores[i]); } } } | |
274 | * | |
275 | * jalview.util.QuickSort.sort(scores, seqs); } else if | |
276 | * (method==FEATURE_DENSITY) { | |
277 | * | |
278 | * // break ties between equivalent numbers for adjacent sequences by adding | |
279 | * 1/Nseq*i on the original order double fr = 0.9/(1.0*seqs.length); for | |
280 | * (int i=0;i<seqs.length; i++) { double nf; scores[i] = | |
281 | * (0.05+fr*i)+(nf=((feats[i]==null) ? 0.0 :1.0*((SequenceFeature[]) | |
282 | * feats[i]).length)); | |
283 | * jalview.bin.Console.errPrintln("Sorting on Density: seq "+seqs[i].getName()+ | |
284 | * " Feats: "+nf+" Score : "+scores[i]); } | |
285 | * jalview.util.QuickSort.sort(scores, seqs); } else { if | |
286 | * (method==FEATURE_LABEL) { throw new Error("Not yet implemented."); } } if | |
287 | * (lastSortByFeatureScore ==null || | |
288 | * scoreLabel.equals(lastSortByFeatureScore)) { setOrder(alignment, seqs); } | |
289 | * else { setReverseOrder(alignment, seqs); } lastSortByFeatureScore = | |
290 | * scoreLabel.toString(); | |
291 | */ | |
292 | } | |
293 | ||
294 | } |