Class |
Line # |
Actions |
|||
---|---|---|---|---|---|
JPredFile | 56 | 115 | 44 |
1 | /* | |
2 | * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) | |
3 | * Copyright (C) $$Year-Rel$$ The Jalview Authors | |
4 | * | |
5 | * This file is part of Jalview. | |
6 | * | |
7 | * Jalview is free software: you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation, either version 3 | |
10 | * of the License, or (at your option) any later version. | |
11 | * | |
12 | * Jalview is distributed in the hope that it will be useful, but | |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty | |
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR | |
15 | * PURPOSE. See the GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with Jalview. If not, see <http://www.gnu.org/licenses/>. | |
19 | * The Jalview Authors are detailed in the 'AUTHORS' file. | |
20 | */ | |
21 | /** | |
22 | * PredFile.java | |
23 | * JalviewX / Vamsas Project | |
24 | * JPred.seq.concise reader | |
25 | */ | |
26 | package jalview.io; | |
27 | ||
28 | import java.util.Locale; | |
29 | ||
30 | import jalview.datamodel.Alignment; | |
31 | import jalview.datamodel.AlignmentAnnotation; | |
32 | import jalview.datamodel.Sequence; | |
33 | import jalview.datamodel.SequenceI; | |
34 | import jalview.util.MessageManager; | |
35 | ||
36 | import java.io.IOException; | |
37 | import java.util.Hashtable; | |
38 | import java.util.StringTokenizer; | |
39 | import java.util.Vector; | |
40 | ||
41 | /** | |
42 | * Parser for the JPred/JNet concise format. This is a series of CSV lines, each | |
43 | * line is either a sequence (QUERY), a sequence profile (align;), or jnet | |
44 | * prediction annotation (anything else). Automagic translation happens for | |
45 | * annotation called 'JNETPRED' (translated to Secondary Structure Prediction), | |
46 | * or 'JNETCONF' (translates to 'Prediction Confidence'). Numeric scores are | |
47 | * differentiated from symbolic by being parseable into a float vector. They are | |
48 | * put in Scores. Symscores gets the others. JNetAnnotationMaker translates the | |
49 | * data parsed by this object into annotation on an alignment. It is | |
50 | * automatically called but can be used to transfer the annotation onto a | |
51 | * sequence in another alignment (and insert gaps where necessary) | |
52 | * | |
53 | * @author jprocter | |
54 | * @version $Revision$ | |
55 | */ | |
56 | public class JPredFile extends AlignFile | |
57 | { | |
58 | Vector ids; | |
59 | ||
60 | Vector conf; | |
61 | ||
62 | Hashtable Scores; // Hash of names and score vectors | |
63 | ||
64 | Hashtable Symscores; // indexes of symbol annotation properties in sequenceI | |
65 | ||
66 | // vector | |
67 | ||
68 | private int QuerySeqPosition; | |
69 | ||
70 | /** | |
71 | * Creates a new JPredFile object. | |
72 | * | |
73 | * BH allows File or String | |
74 | * | |
75 | * @param inFile | |
76 | * DOCUMENT ME! | |
77 | * @param sourceType | |
78 | * DOCUMENT ME! | |
79 | * | |
80 | * @throws IOException | |
81 | * DOCUMENT ME! | |
82 | */ | |
83 | 0 | public JPredFile(Object inFile, DataSourceType sourceType) |
84 | throws IOException | |
85 | { | |
86 | 0 | super(inFile, sourceType); |
87 | } | |
88 | ||
89 | 0 | public JPredFile(FileParse source) throws IOException |
90 | { | |
91 | 0 | super(source); |
92 | } | |
93 | ||
94 | /** | |
95 | * DOCUMENT ME! | |
96 | * | |
97 | * @param QuerySeqPosition | |
98 | * DOCUMENT ME! | |
99 | */ | |
100 | 0 | public void setQuerySeqPosition(int QuerySeqPosition) |
101 | { | |
102 | 0 | this.QuerySeqPosition = QuerySeqPosition; |
103 | } | |
104 | ||
105 | /** | |
106 | * DOCUMENT ME! | |
107 | * | |
108 | * @return DOCUMENT ME! | |
109 | */ | |
110 | 0 | public int getQuerySeqPosition() |
111 | { | |
112 | 0 | return QuerySeqPosition; |
113 | } | |
114 | ||
115 | /** | |
116 | * DOCUMENT ME! | |
117 | * | |
118 | * @return DOCUMENT ME! | |
119 | */ | |
120 | 0 | public Hashtable getScores() |
121 | { | |
122 | 0 | return Scores; |
123 | } | |
124 | ||
125 | /** | |
126 | * DOCUMENT ME! | |
127 | * | |
128 | * @return DOCUMENT ME! | |
129 | */ | |
130 | 0 | public Hashtable getSymscores() |
131 | { | |
132 | 0 | return Symscores; |
133 | } | |
134 | ||
135 | /** | |
136 | * DOCUMENT ME! | |
137 | */ | |
138 | 0 | @Override |
139 | public void initData() | |
140 | { | |
141 | 0 | super.initData(); |
142 | 0 | Scores = new Hashtable(); |
143 | 0 | ids = null; |
144 | 0 | conf = null; |
145 | } | |
146 | ||
147 | /** | |
148 | * parse a JPred concise file into a sequence-alignment like object. | |
149 | */ | |
150 | 0 | @Override |
151 | public void parse() throws IOException | |
152 | { | |
153 | // JBPNote log.jalview.bin.Console.outPrintln("all read in "); | |
154 | 0 | String line; |
155 | 0 | QuerySeqPosition = -1; |
156 | 0 | noSeqs = 0; |
157 | ||
158 | 0 | Vector seq_entries = new Vector(); |
159 | 0 | Vector ids = new Vector(); |
160 | 0 | Hashtable Symscores = new Hashtable(); |
161 | ||
162 | 0 | while ((line = nextLine()) != null) |
163 | { | |
164 | // Concise format allows no comments or non comma-formatted data | |
165 | 0 | StringTokenizer str = new StringTokenizer(line, ":"); |
166 | 0 | String id = ""; |
167 | ||
168 | 0 | if (!str.hasMoreTokens()) |
169 | { | |
170 | 0 | continue; |
171 | } | |
172 | ||
173 | 0 | id = str.nextToken(); |
174 | ||
175 | 0 | String seqsym = str.nextToken(); |
176 | 0 | StringTokenizer symbols = new StringTokenizer(seqsym, ","); |
177 | ||
178 | // decide if we have more than just alphanumeric symbols | |
179 | 0 | int numSymbols = symbols.countTokens(); |
180 | ||
181 | 0 | if (numSymbols == 0) |
182 | { | |
183 | 0 | continue; |
184 | } | |
185 | ||
186 | 0 | if (seqsym.length() != (2 * numSymbols)) |
187 | { | |
188 | // Set of scalars for some property | |
189 | 0 | if (Scores.containsKey(id)) |
190 | { | |
191 | 0 | int i = 1; |
192 | ||
193 | 0 | while (Scores.containsKey(id + "_" + i)) |
194 | { | |
195 | 0 | i++; |
196 | } | |
197 | ||
198 | 0 | id = id + "_" + i; |
199 | } | |
200 | ||
201 | 0 | Vector scores = new Vector(); |
202 | ||
203 | // Typecheck from first entry | |
204 | 0 | int i = 0; |
205 | 0 | String ascore = "dead"; |
206 | ||
207 | 0 | try |
208 | { | |
209 | // store elements as floats... | |
210 | 0 | while (symbols.hasMoreTokens()) |
211 | { | |
212 | 0 | ascore = symbols.nextToken(); |
213 | ||
214 | 0 | Float score = Float.valueOf(ascore); |
215 | 0 | scores.addElement(score); |
216 | } | |
217 | ||
218 | 0 | Scores.put(id, scores); |
219 | } catch (Exception e) | |
220 | { | |
221 | // or just keep them as strings | |
222 | 0 | i = scores.size(); |
223 | ||
224 | 0 | for (int j = 0; j < i; j++) |
225 | { | |
226 | 0 | scores.setElementAt(((Float) scores.elementAt(j)).toString(), |
227 | j); | |
228 | } | |
229 | ||
230 | 0 | scores.addElement(ascore); |
231 | ||
232 | 0 | while (symbols.hasMoreTokens()) |
233 | { | |
234 | 0 | ascore = symbols.nextToken(); |
235 | 0 | scores.addElement(ascore); |
236 | } | |
237 | ||
238 | 0 | Scores.put(id, scores); |
239 | } | |
240 | } | |
241 | 0 | else if (id.equals("jnetconf")) |
242 | { | |
243 | // log.debug jalview.bin.Console.outPrintln("here"); | |
244 | 0 | id = "Prediction Confidence"; |
245 | 0 | this.conf = new Vector(numSymbols); |
246 | ||
247 | 0 | for (int i = 0; i < numSymbols; i++) |
248 | { | |
249 | 0 | conf.setElementAt(symbols.nextToken(), i); |
250 | } | |
251 | } | |
252 | else | |
253 | { | |
254 | // Sequence or a prediction string (rendered as sequence) | |
255 | 0 | StringBuffer newseq = new StringBuffer(); |
256 | ||
257 | 0 | for (int i = 0; i < numSymbols; i++) |
258 | { | |
259 | 0 | newseq.append(symbols.nextToken()); |
260 | } | |
261 | ||
262 | 0 | if (id.indexOf(";") > -1) |
263 | { | |
264 | 0 | seq_entries.addElement(newseq); |
265 | ||
266 | 0 | int i = 1; |
267 | 0 | String name = id.substring(id.indexOf(";") + 1); |
268 | ||
269 | 0 | while (ids.lastIndexOf(name) > -1) |
270 | { | |
271 | 0 | name = id.substring(id.indexOf(";") + 1) + "_" + ++i; |
272 | } | |
273 | ||
274 | 0 | if (QuerySeqPosition == -1) |
275 | { | |
276 | 0 | QuerySeqPosition = ids.size(); |
277 | } | |
278 | 0 | ids.addElement(name); |
279 | 0 | noSeqs++; |
280 | } | |
281 | else | |
282 | { | |
283 | 0 | if (id.equals("JNETPRED")) |
284 | { | |
285 | 0 | id = "Predicted Secondary Structure"; |
286 | } | |
287 | ||
288 | 0 | seq_entries.addElement(newseq.toString()); |
289 | 0 | ids.addElement(id); |
290 | 0 | Symscores.put(id, Integer.valueOf(ids.size() - 1)); |
291 | } | |
292 | } | |
293 | } | |
294 | /* | |
295 | * leave it to the parser user to actually check this. if (noSeqs < 1) { | |
296 | * throw new IOException( "JpredFile Parser: No sequence in the | |
297 | * prediction!"); } | |
298 | */ | |
299 | ||
300 | 0 | maxLength = seq_entries.elementAt(0).toString().length(); |
301 | ||
302 | 0 | for (int i = 0; i < ids.size(); i++) |
303 | { | |
304 | // Add all sequence like objects | |
305 | 0 | Sequence newSeq = new Sequence(ids.elementAt(i).toString(), |
306 | seq_entries.elementAt(i).toString(), 1, | |
307 | seq_entries.elementAt(i).toString().length()); | |
308 | ||
309 | 0 | if (maxLength != seq_entries.elementAt(i).toString().length()) |
310 | { | |
311 | 0 | throw new IOException(MessageManager.formatMessage( |
312 | "exception.jpredconcide_entry_has_unexpected_number_of_columns", | |
313 | new String[] | |
314 | { ids.elementAt(i).toString() })); | |
315 | } | |
316 | ||
317 | 0 | if ((newSeq.getName().startsWith("QUERY") |
318 | || newSeq.getName().startsWith("align;")) | |
319 | && (QuerySeqPosition == -1)) | |
320 | { | |
321 | 0 | QuerySeqPosition = seqs.size(); |
322 | } | |
323 | ||
324 | 0 | seqs.addElement(newSeq); |
325 | } | |
326 | 0 | if (seqs.size() > 0 && QuerySeqPosition > -1) |
327 | { | |
328 | // try to make annotation for a prediction only input (default if no | |
329 | // alignment is given and prediction contains a QUERY or align;sequence_id | |
330 | // line) | |
331 | 0 | Alignment tal = new Alignment(this.getSeqsAsArray()); |
332 | 0 | try |
333 | { | |
334 | 0 | JnetAnnotationMaker.add_annotation(this, tal, QuerySeqPosition, |
335 | true); | |
336 | } catch (Exception e) | |
337 | { | |
338 | 0 | tal = null; |
339 | 0 | IOException ex = new IOException(MessageManager.formatMessage( |
340 | "exception.couldnt_parse_concise_annotation_for_prediction", | |
341 | new String[] | |
342 | { e.getMessage() })); | |
343 | 0 | e.printStackTrace(); // java 1.1 does not have : |
344 | // ex.setStackTrace(e.getStackTrace()); | |
345 | 0 | throw ex; |
346 | } | |
347 | 0 | this.annotations = new Vector(); |
348 | 0 | AlignmentAnnotation[] aan = tal.getAlignmentAnnotation(); |
349 | 0 | for (int aai = 0; aan != null && aai < aan.length; aai++) |
350 | { | |
351 | 0 | annotations.addElement(aan[aai]); |
352 | } | |
353 | } | |
354 | } | |
355 | ||
356 | /** | |
357 | ||
358 | * | |
359 | * @return String | |
360 | */ | |
361 | 0 | @Override |
362 | public String print(SequenceI[] sqs, boolean jvsuffix) | |
363 | { | |
364 | 0 | return "Not Supported"; |
365 | } | |
366 | ||
367 | /** | |
368 | * | |
369 | * @param args | |
370 | * @j2sIgnore | |
371 | */ | |
372 | 0 | public static void main(String[] args) |
373 | { | |
374 | 0 | try |
375 | { | |
376 | 0 | JPredFile jpred = new JPredFile(args[0], DataSourceType.FILE); |
377 | ||
378 | 0 | for (int i = 0; i < jpred.seqs.size(); i++) |
379 | { | |
380 | 0 | jalview.bin.Console |
381 | .outPrintln(((Sequence) jpred.seqs.elementAt(i)).getName() | |
382 | + "\n" + ((Sequence) jpred.seqs.elementAt(i)) | |
383 | .getSequenceAsString() | |
384 | + "\n"); | |
385 | } | |
386 | } catch (java.io.IOException e) | |
387 | { | |
388 | 0 | jalview.bin.Console.errPrintln("Exception " + e); |
389 | // e.printStackTrace(); not java 1.1 compatible! | |
390 | } | |
391 | } | |
392 | ||
393 | Vector annotSeqs = null; | |
394 | ||
395 | /** | |
396 | * removeNonSequences | |
397 | */ | |
398 | 0 | public void removeNonSequences() |
399 | { | |
400 | 0 | if (annotSeqs != null) |
401 | { | |
402 | 0 | return; |
403 | } | |
404 | 0 | annotSeqs = new Vector(); |
405 | 0 | Vector newseqs = new Vector(); |
406 | 0 | int i = 0; |
407 | 0 | int j = seqs.size(); |
408 | 0 | for (; i < QuerySeqPosition; i++) |
409 | { | |
410 | 0 | annotSeqs.addElement(seqs.elementAt(i)); |
411 | } | |
412 | // check that no stray annotations have been added at the end. | |
413 | { | |
414 | 0 | SequenceI sq = seqs.elementAt(j - 1); |
415 | 0 | if (sq.getName().toUpperCase(Locale.ROOT).startsWith("JPRED")) |
416 | { | |
417 | 0 | annotSeqs.addElement(sq); |
418 | 0 | seqs.removeElementAt(--j); |
419 | } | |
420 | } | |
421 | 0 | for (; i < j; i++) |
422 | { | |
423 | 0 | newseqs.addElement(seqs.elementAt(i)); |
424 | } | |
425 | ||
426 | 0 | seqs.removeAllElements(); |
427 | 0 | seqs = newseqs; |
428 | } | |
429 | } | |
430 | ||
431 | /* | |
432 | * StringBuffer out = new StringBuffer(); | |
433 | * | |
434 | * out.append("START PRED\n"); for (int i = 0; i < s[0].sequence.length(); i++) | |
435 | * { out.append(s[0].sequence.substring(i, i + 1) + " "); | |
436 | * out.append(s[1].sequence.substring(i, i + 1) + " "); | |
437 | * out.append(s[1].score[0].elementAt(i) + " "); | |
438 | * out.append(s[1].score[1].elementAt(i) + " "); | |
439 | * out.append(s[1].score[2].elementAt(i) + " "); | |
440 | * out.append(s[1].score[3].elementAt(i) + " "); | |
441 | * | |
442 | * out.append("\n"); } out.append("END PRED\n"); return out.toString(); } | |
443 | * | |
444 | * public static void main(String[] args) { try { BLCFile blc = new | |
445 | * BLCFile(args[0], "File"); DrawableSequence[] s = new | |
446 | * DrawableSequence[blc.seqs.size()]; for (int i = 0; i < blc.seqs.size(); i++) | |
447 | * { s[i] = new DrawableSequence( (Sequence) blc.seqs.elementAt(i)); } String | |
448 | * out = BLCFile.print(s); | |
449 | * | |
450 | * AlignFrame af = new AlignFrame(null, s); af.resize(700, 500); af.show(); | |
451 | * jalview.bin.Console.outPrintln(out); } catch (java.io.IOException e) { | |
452 | * jalview.bin.Console.outPrintln("Exception " + e); } } } | |
453 | */ |