Class |
Line # |
Actions |
|||
---|---|---|---|---|---|
SeqsetUtils | 33 | 87 | 39 |
1 | /* | |
2 | * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) | |
3 | * Copyright (C) $$Year-Rel$$ The Jalview Authors | |
4 | * | |
5 | * This file is part of Jalview. | |
6 | * | |
7 | * Jalview is free software: you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation, either version 3 | |
10 | * of the License, or (at your option) any later version. | |
11 | * | |
12 | * Jalview is distributed in the hope that it will be useful, but | |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty | |
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR | |
15 | * PURPOSE. See the GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with Jalview. If not, see <http://www.gnu.org/licenses/>. | |
19 | * The Jalview Authors are detailed in the 'AUTHORS' file. | |
20 | */ | |
21 | package jalview.analysis; | |
22 | ||
23 | import jalview.datamodel.PDBEntry; | |
24 | import jalview.datamodel.Sequence; | |
25 | import jalview.datamodel.SequenceFeature; | |
26 | import jalview.datamodel.SequenceI; | |
27 | ||
28 | import java.util.Enumeration; | |
29 | import java.util.Hashtable; | |
30 | import java.util.List; | |
31 | import java.util.Vector; | |
32 | ||
33 | public class SeqsetUtils | |
34 | { | |
35 | ||
36 | /** | |
37 | * Store essential properties of a sequence in a hashtable for later recovery | |
38 | * Keys are Name, Start, End, SeqFeatures, PdbId | |
39 | * | |
40 | * @param seq | |
41 | * SequenceI | |
42 | * @return Hashtable | |
43 | */ | |
44 | 237 | public static Hashtable SeqCharacterHash(SequenceI seq) |
45 | { | |
46 | 237 | Hashtable sqinfo = new Hashtable(); |
47 | 237 | sqinfo.put("Name", seq.getName()); |
48 | 237 | sqinfo.put("Start", Integer.valueOf(seq.getStart())); |
49 | 237 | sqinfo.put("End", Integer.valueOf(seq.getEnd())); |
50 | 237 | if (seq.getDescription() != null) |
51 | { | |
52 | 34 | sqinfo.put("Description", seq.getDescription()); |
53 | } | |
54 | ||
55 | 237 | Vector<SequenceFeature> sfeat = new Vector<SequenceFeature>(); |
56 | 237 | List<SequenceFeature> sfs = seq.getFeatures().getAllFeatures(); |
57 | 237 | sfeat.addAll(sfs); |
58 | ||
59 | 237 | if (seq.getDatasetSequence() == null) |
60 | { | |
61 | 17 | sqinfo.put("SeqFeatures", sfeat); |
62 | 17 | sqinfo.put("PdbId", |
63 | 17 | (seq.getAllPDBEntries() != null) ? seq.getAllPDBEntries() |
64 | : new Vector<PDBEntry>()); | |
65 | } | |
66 | else | |
67 | { | |
68 | 220 | sqinfo.put("datasetSequence", |
69 | 220 | (seq.getDatasetSequence() != null) ? seq.getDatasetSequence() |
70 | : new Sequence("THISISAPLACEHOLDER", "")); | |
71 | } | |
72 | 237 | return sqinfo; |
73 | } | |
74 | ||
75 | /** | |
76 | * Recover essential properties of a sequence from a hashtable TODO: replace | |
77 | * these methods with something more elegant. | |
78 | * | |
79 | * @param sq | |
80 | * SequenceI | |
81 | * @param sqinfo | |
82 | * Hashtable | |
83 | * @return boolean true if name was not updated from sqinfo Name entry | |
84 | */ | |
85 | 174 | public static boolean SeqCharacterUnhash(SequenceI sq, Hashtable sqinfo) |
86 | { | |
87 | 174 | boolean namePresent = true; |
88 | 174 | if (sqinfo == null) |
89 | { | |
90 | 0 | return false; |
91 | } | |
92 | 174 | String oldname = (String) sqinfo.get("Name"); |
93 | 174 | Integer start = (Integer) sqinfo.get("Start"); |
94 | 174 | Integer end = (Integer) sqinfo.get("End"); |
95 | 174 | Vector<SequenceFeature> sfeatures = (Vector<SequenceFeature>) sqinfo |
96 | .get("SeqFeatures"); | |
97 | 174 | Vector<PDBEntry> pdbid = (Vector<PDBEntry>) sqinfo.get("PdbId"); |
98 | 174 | String description = (String) sqinfo.get("Description"); |
99 | 174 | Sequence seqds = (Sequence) sqinfo.get("datasetSequence"); |
100 | 174 | if (oldname == null) |
101 | { | |
102 | 0 | namePresent = false; |
103 | } | |
104 | else | |
105 | { | |
106 | 174 | sq.setName(oldname); |
107 | } | |
108 | 174 | if (pdbid != null && pdbid.size() > 0) |
109 | { | |
110 | 0 | sq.setPDBId(pdbid); |
111 | } | |
112 | ||
113 | 174 | if ((start != null) && (end != null)) |
114 | { | |
115 | 174 | sq.setStart(start.intValue()); |
116 | 174 | sq.setEnd(end.intValue()); |
117 | } | |
118 | ||
119 | 174 | if (sfeatures != null && !sfeatures.isEmpty()) |
120 | { | |
121 | 0 | sq.setSequenceFeatures(sfeatures); |
122 | } | |
123 | 174 | if (description != null) |
124 | { | |
125 | 2 | sq.setDescription(description); |
126 | } | |
127 | 174 | if ((seqds != null) && !(seqds.getName().equals("THISISAPLACEHOLDER") |
128 | && seqds.getLength() == 0)) | |
129 | { | |
130 | 172 | if (sfeatures != null) |
131 | { | |
132 | 0 | jalview.bin.Console.errPrintln( |
133 | "Implementation error: setting dataset sequence for a sequence which has sequence features.\n\tDataset sequence features will not be visible."); | |
134 | } | |
135 | 172 | sq.setDatasetSequence(seqds); |
136 | } | |
137 | ||
138 | 174 | return namePresent; |
139 | } | |
140 | ||
141 | /** | |
142 | * Form of the unique name used in uniquify for the i'th sequence in an | |
143 | * ordered vector of sequences. | |
144 | * | |
145 | * @param i | |
146 | * int | |
147 | * @return String | |
148 | */ | |
149 | 101 | public static String unique_name(int i) |
150 | { | |
151 | 101 | return new String("Sequence" + i); |
152 | } | |
153 | ||
154 | /** | |
155 | * Generates a hash of SeqCharacterHash properties for each sequence in a | |
156 | * sequence set, and optionally renames the sequences to an unambiguous 'safe' | |
157 | * name. | |
158 | * | |
159 | * @param sequences | |
160 | * SequenceI[] | |
161 | * @param write_names | |
162 | * boolean set this to rename each of the sequences to its | |
163 | * unique_name(index) name | |
164 | * @return Hashtable to be passed to | |
165 | * @see deuniquify to recover original names (and properties) for renamed | |
166 | * sequences | |
167 | */ | |
168 | 4 | public static Hashtable uniquify(SequenceI[] sequences, |
169 | boolean write_names) | |
170 | { | |
171 | // Generate a safely named sequence set and a hash to recover the sequence | |
172 | // names | |
173 | 4 | Hashtable map = new Hashtable(); |
174 | // String[] un_names = new String[sequences.length]; | |
175 | ||
176 | 105 | for (int i = 0; i < sequences.length; i++) |
177 | { | |
178 | 101 | String safename = unique_name(i); |
179 | 101 | map.put(safename, SeqCharacterHash(sequences[i])); |
180 | ||
181 | 101 | if (write_names) |
182 | { | |
183 | 101 | sequences[i].setName(safename); |
184 | } | |
185 | } | |
186 | ||
187 | 4 | return map; |
188 | } | |
189 | ||
190 | /** | |
191 | * recover unsafe sequence names and original properties for a sequence set | |
192 | * using a map generated by | |
193 | * | |
194 | * @see uniquify(sequences,true) | |
195 | * @param map | |
196 | * Hashtable | |
197 | * @param sequences | |
198 | * SequenceI[] | |
199 | * @return boolean | |
200 | */ | |
201 | 1 | public static boolean deuniquify(Hashtable map, SequenceI[] sequences) |
202 | { | |
203 | 1 | return deuniquify(map, sequences, true); |
204 | } | |
205 | ||
206 | /** | |
207 | * recover unsafe sequence names and original properties for a sequence set | |
208 | * using a map generated by | |
209 | * | |
210 | * @see uniquify(sequences,true) | |
211 | * @param map | |
212 | * Hashtable | |
213 | * @param sequences | |
214 | * SequenceI[] | |
215 | * @param quiet | |
216 | * when false, don't complain about sequences without any data in the | |
217 | * map. | |
218 | * @return boolean | |
219 | */ | |
220 | 1 | public static boolean deuniquify(Hashtable map, SequenceI[] sequences, |
221 | boolean quiet) | |
222 | { | |
223 | 1 | jalview.analysis.SequenceIdMatcher matcher = new SequenceIdMatcher( |
224 | sequences); | |
225 | 1 | SequenceI msq = null; |
226 | 1 | Enumeration keys = map.keys(); |
227 | 1 | Vector unmatched = new Vector(); |
228 | 3 | for (int i = 0, j = sequences.length; i < j; i++) |
229 | { | |
230 | 2 | unmatched.addElement(sequences[i]); |
231 | } | |
232 | 3 | while (keys.hasMoreElements()) |
233 | { | |
234 | 2 | Object key = keys.nextElement(); |
235 | 2 | if (key instanceof String) |
236 | { | |
237 | ? | if ((msq = matcher.findIdMatch((String) key)) != null) |
238 | { | |
239 | 2 | Hashtable sqinfo = (Hashtable) map.get(key); |
240 | 2 | unmatched.removeElement(msq); |
241 | 2 | SeqCharacterUnhash(msq, sqinfo); |
242 | } | |
243 | else | |
244 | { | |
245 | 0 | if (!quiet) |
246 | { | |
247 | 0 | jalview.bin.Console.errPrintln("Can't find '" + ((String) key) |
248 | + "' in uniquified alignment"); | |
249 | } | |
250 | } | |
251 | } | |
252 | } | |
253 | 1 | if (unmatched.size() > 0 && !quiet) |
254 | { | |
255 | 0 | jalview.bin.Console.errPrintln("Did not find matches for :"); |
256 | 0 | for (Enumeration i = unmatched.elements(); i |
257 | .hasMoreElements(); System.out | |
258 | .println(((SequenceI) i.nextElement()).getName())) | |
259 | { | |
260 | 0 | ; |
261 | } | |
262 | 0 | return false; |
263 | } | |
264 | ||
265 | 1 | return true; |
266 | } | |
267 | ||
268 | /** | |
269 | * returns a subset of the sequenceI seuqences, including only those that | |
270 | * contain at least one residue. | |
271 | * | |
272 | * @param sequences | |
273 | * SequenceI[] | |
274 | * @return SequenceI[] | |
275 | */ | |
276 | 0 | public static SequenceI[] getNonEmptySequenceSet(SequenceI[] sequences) |
277 | { | |
278 | // Identify first row of alignment with residues for prediction | |
279 | 0 | boolean ungapped[] = new boolean[sequences.length]; |
280 | 0 | int msflen = 0; |
281 | 0 | for (int i = 0, j = sequences.length; i < j; i++) |
282 | { | |
283 | 0 | String tempseq = jalview.analysis.AlignSeq.extractGaps( |
284 | jalview.util.Comparison.GapChars, | |
285 | sequences[i].getSequenceAsString()); | |
286 | ||
287 | 0 | if (tempseq.length() == 0) |
288 | { | |
289 | 0 | ungapped[i] = false; |
290 | } | |
291 | else | |
292 | { | |
293 | 0 | ungapped[i] = true; |
294 | 0 | msflen++; |
295 | } | |
296 | } | |
297 | 0 | if (msflen == 0) |
298 | { | |
299 | 0 | return null; // no minimal set |
300 | } | |
301 | // compose minimal set | |
302 | 0 | SequenceI[] mset = new SequenceI[msflen]; |
303 | 0 | for (int i = 0, j = sequences.length, k = 0; i < j; i++) |
304 | { | |
305 | 0 | if (ungapped[i]) |
306 | { | |
307 | 0 | mset[k++] = sequences[i]; |
308 | } | |
309 | } | |
310 | 0 | ungapped = null; |
311 | 0 | return mset; |
312 | } | |
313 | } |