Class |
Line # |
Actions |
|||
---|---|---|---|---|---|
SimpleBlastFile | 38 | 103 | 48 |
1 | /* | |
2 | * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) | |
3 | * Copyright (C) $$Year-Rel$$ The Jalview Authors | |
4 | * | |
5 | * This file is part of Jalview. | |
6 | * | |
7 | * Jalview is free software: you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation, either version 3 | |
10 | * of the License, or (at your option) any later version. | |
11 | * | |
12 | * Jalview is distributed in the hope that it will be useful, but | |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty | |
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR | |
15 | * PURPOSE. See the GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with Jalview. If not, see <http://www.gnu.org/licenses/>. | |
19 | * The Jalview Authors are detailed in the 'AUTHORS' file. | |
20 | */ | |
21 | package jalview.io; | |
22 | ||
23 | import jalview.datamodel.Sequence; | |
24 | import jalview.datamodel.SequenceI; | |
25 | ||
26 | import java.io.IOException; | |
27 | import java.util.Enumeration; | |
28 | import java.util.Hashtable; | |
29 | import java.util.Vector; | |
30 | ||
31 | /** | |
32 | * parse a simple blast report. Attempt to cope with query anchored and pairwise | |
33 | * alignments only. | |
34 | * | |
35 | * @author Jim Procter | |
36 | */ | |
37 | ||
38 | public class SimpleBlastFile extends AlignFile | |
39 | { | |
40 | /** | |
41 | * header and footer info goes into alignment annotation. | |
42 | */ | |
43 | StringBuffer headerLines, footerLines; | |
44 | ||
45 | /** | |
46 | * hold sequence ids in order of appearance in file | |
47 | */ | |
48 | Vector seqids; | |
49 | ||
50 | 0 | public SimpleBlastFile() |
51 | { | |
52 | } | |
53 | ||
54 | 0 | public SimpleBlastFile(String inFile, DataSourceType sourceType) |
55 | throws IOException | |
56 | { | |
57 | 0 | super(inFile, sourceType); |
58 | } | |
59 | ||
60 | 0 | public SimpleBlastFile(FileParse source) throws IOException |
61 | { | |
62 | 0 | super(source); |
63 | } | |
64 | ||
65 | 0 | @Override |
66 | public void initData() | |
67 | { | |
68 | 0 | super.initData(); |
69 | 0 | headerLines = new StringBuffer(); |
70 | 0 | footerLines = new StringBuffer(); |
71 | 0 | seqids = new Vector(); |
72 | } | |
73 | ||
74 | 0 | @Override |
75 | public void parse() throws IOException | |
76 | { | |
77 | 0 | String line; |
78 | 0 | char gapc = ' '; // nominal gap character |
79 | 0 | Hashtable seqhash = new Hashtable(); |
80 | 0 | boolean inAlignments = false; |
81 | 0 | int padding = -1, numcol = -1, aligcol = -1, lastcol = -1; |
82 | 0 | long qlen = 0, rstart, rend; // total number of query bases so far |
83 | 0 | boolean padseq = false; |
84 | 0 | while ((line = nextLine()) != null) |
85 | { | |
86 | 0 | if (line.indexOf("ALIGNMENTS") == 0) |
87 | { | |
88 | 0 | inAlignments = true; |
89 | } | |
90 | else | |
91 | { | |
92 | 0 | if (inAlignments) |
93 | { | |
94 | 0 | if (line.trim().length() == 0) |
95 | { | |
96 | 0 | continue; |
97 | } | |
98 | // parse out the sequences | |
99 | // query anchored means that we use the query sequence as the | |
100 | // alignment ruler | |
101 | 0 | if (line.indexOf("Query") == 0) |
102 | { | |
103 | 0 | padding = -1; |
104 | // reset column markers for this block | |
105 | 0 | numcol = -1; |
106 | 0 | aligcol = -1; |
107 | 0 | lastcol = -1; |
108 | // init or reset the column positions | |
109 | 0 | for (int p = 5, mLen = line.length(); p < mLen; p++) |
110 | { | |
111 | 0 | char c = line.charAt(p); |
112 | 0 | if (c >= '0' && c <= '9') |
113 | { | |
114 | 0 | if (numcol == -1) |
115 | { | |
116 | 0 | numcol = p; |
117 | } | |
118 | 0 | else if (aligcol != -1 && lastcol == -1) |
119 | { | |
120 | 0 | lastcol = p; |
121 | } | |
122 | } | |
123 | else | |
124 | { | |
125 | 0 | if (c >= 'A' && c <= 'z') |
126 | { | |
127 | 0 | if (aligcol == -1) |
128 | { | |
129 | 0 | aligcol = p; |
130 | 0 | padding = -1; |
131 | } | |
132 | } | |
133 | else | |
134 | { | |
135 | 0 | if (padding == -1) |
136 | { | |
137 | 0 | padding = p; // beginning of last stretch of whitespace |
138 | } | |
139 | } | |
140 | } | |
141 | } | |
142 | 0 | if (padding == -1) |
143 | { | |
144 | 0 | padding = aligcol; |
145 | } | |
146 | } | |
147 | 0 | if (line.indexOf("Database:") > -1 |
148 | || (aligcol == -1 || numcol == -1 || lastcol == -1) | |
149 | || line.length() < lastcol) | |
150 | { | |
151 | 0 | inAlignments = false; |
152 | } | |
153 | else | |
154 | { | |
155 | // now extract the alignment. | |
156 | 0 | String sqid = line.substring(0, numcol).trim(); |
157 | 0 | String stindx = line.substring(numcol, aligcol).trim(); |
158 | 0 | String aligseg = line.substring(aligcol, padding); |
159 | 0 | String endindx = line.substring(lastcol).trim(); |
160 | // init start/end prior to parsing | |
161 | 0 | rstart = 1; // best guess we have |
162 | 0 | rend = 0; // if zero at end of parsing, then we count non-gaps |
163 | 0 | try |
164 | { | |
165 | 0 | rstart = Long.parseLong(stindx); |
166 | } catch (Exception e) | |
167 | { | |
168 | 0 | jalview.bin.Console.errPrintln( |
169 | "Couldn't parse '" + stindx + "' as start of row"); | |
170 | // inAlignments = false; | |
171 | // warn for this line | |
172 | } | |
173 | 0 | try |
174 | { | |
175 | 0 | rend = Long.parseLong(endindx); |
176 | } catch (Exception e) | |
177 | { | |
178 | 0 | jalview.bin.Console.errPrintln( |
179 | "Couldn't parse '" + endindx + "' as end of row"); | |
180 | // inAlignments = false; | |
181 | ||
182 | // warn for this line | |
183 | } | |
184 | 0 | Vector seqentries = (Vector) seqhash.get(sqid); |
185 | 0 | if (seqentries == null) |
186 | { | |
187 | 0 | seqentries = new Vector(); |
188 | 0 | seqhash.put(sqid, seqentries); |
189 | 0 | seqids.addElement(sqid); |
190 | } | |
191 | ||
192 | 0 | Object[] seqentry = null; |
193 | 0 | Enumeration sqent = seqentries.elements(); |
194 | 0 | while (seqentry == null && sqent.hasMoreElements()) |
195 | { | |
196 | 0 | seqentry = (Object[]) sqent.nextElement(); |
197 | 0 | if (((long[]) seqentry[1])[1] + 1 != rstart) |
198 | { | |
199 | 0 | seqentry = null; |
200 | } | |
201 | } | |
202 | 0 | padseq = false; |
203 | 0 | if (seqentry == null) |
204 | { | |
205 | 0 | padseq = true; // prepend gaps to new sequences in this block |
206 | 0 | seqentry = new Object[] { new StringBuffer(), |
207 | new long[] | |
208 | { rstart, rend } }; | |
209 | 0 | seqentries.addElement(seqentry); |
210 | 0 | seqhash.put(sqid, seqentry); |
211 | ||
212 | } | |
213 | 0 | if (sqid.equals("Query")) |
214 | { | |
215 | // update current block length in case we need to pad | |
216 | 0 | qlen = ((StringBuffer) seqentry[0]).length(); |
217 | } | |
218 | 0 | StringBuffer sqs = ((StringBuffer) seqentry[0]); |
219 | 0 | if (padseq) |
220 | { | |
221 | 0 | for (long c = sqs.length(); c < qlen; c++) |
222 | { | |
223 | 0 | sqs.append(gapc); |
224 | } | |
225 | } | |
226 | 0 | sqs.append(aligseg); |
227 | 0 | if (rend > 0) |
228 | { | |
229 | 0 | ((long[]) seqentry[1])[1] = rend; |
230 | } | |
231 | } | |
232 | // end of parsing out the sequences | |
233 | } | |
234 | // if we haven't parsed the line as an alignment, then | |
235 | // add to the sequence header | |
236 | 0 | if (!inAlignments) |
237 | { | |
238 | 0 | String ln = line.trim(); |
239 | // save any header stuff for the user | |
240 | 0 | if (ln.length() > 0) |
241 | { | |
242 | 0 | StringBuffer addto = (seqhash.size() > 0) ? footerLines |
243 | : headerLines; | |
244 | 0 | addto.append(line); |
245 | 0 | addto.append("\n"); |
246 | } | |
247 | } | |
248 | } | |
249 | } | |
250 | 0 | if (seqhash.size() > 0) |
251 | { | |
252 | // make the sequence vector | |
253 | 0 | Enumeration seqid = seqids.elements(); |
254 | 0 | while (seqid.hasMoreElements()) |
255 | { | |
256 | 0 | String idstring = (String) seqid.nextElement(); |
257 | 0 | Object[] seqentry = (Object[]) seqhash.get(idstring); |
258 | 0 | try |
259 | { | |
260 | 0 | Sequence newseq = new Sequence(idstring, |
261 | ||
262 | ((StringBuffer) seqentry[0]).toString(), | |
263 | (int) ((long[]) seqentry[1])[0], | |
264 | (int) ((long[]) seqentry[1])[1]); | |
265 | 0 | if (newseq.getEnd() == 0) |
266 | { | |
267 | // assume there are no deletions in the sequence. | |
268 | 0 | newseq.setEnd(newseq.findPosition(newseq.getLength())); |
269 | } | |
270 | 0 | seqs.addElement(newseq); |
271 | } catch (Exception e) | |
272 | { | |
273 | 0 | if (warningMessage == null) |
274 | { | |
275 | 0 | warningMessage = ""; |
276 | } | |
277 | 0 | warningMessage += "Couldn't add Sequence - ID is '" + idstring |
278 | + "' : Exception was " + e.toString() + "\n"; | |
279 | } | |
280 | } | |
281 | // add any annotation | |
282 | 0 | if (headerLines.length() > 1) |
283 | { | |
284 | 0 | setAlignmentProperty("HEADER", headerLines.toString()); |
285 | } | |
286 | 0 | if (footerLines.length() > 1) |
287 | { | |
288 | 0 | setAlignmentProperty("FOOTER", footerLines.toString()); |
289 | } | |
290 | } | |
291 | } | |
292 | ||
293 | 0 | @Override |
294 | public String print(SequenceI[] sqs, boolean jvsuffix) | |
295 | { | |
296 | 0 | return new String("Not Implemented."); |
297 | } | |
298 | } |