Class |
Line # |
Actions |
|||
---|---|---|---|---|---|
MSFfile | 42 | 137 | 49 |
1 | /* | |
2 | * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) | |
3 | * Copyright (C) $$Year-Rel$$ The Jalview Authors | |
4 | * | |
5 | * This file is part of Jalview. | |
6 | * | |
7 | * Jalview is free software: you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation, either version 3 | |
10 | * of the License, or (at your option) any later version. | |
11 | * | |
12 | * Jalview is distributed in the hope that it will be useful, but | |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty | |
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR | |
15 | * PURPOSE. See the GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with Jalview. If not, see <http://www.gnu.org/licenses/>. | |
19 | * The Jalview Authors are detailed in the 'AUTHORS' file. | |
20 | */ | |
21 | package jalview.io; | |
22 | ||
23 | import java.util.Locale; | |
24 | ||
25 | import jalview.datamodel.Sequence; | |
26 | import jalview.datamodel.SequenceI; | |
27 | import jalview.util.Comparison; | |
28 | import jalview.util.Format; | |
29 | ||
30 | import java.io.IOException; | |
31 | import java.util.ArrayList; | |
32 | import java.util.Hashtable; | |
33 | import java.util.List; | |
34 | import java.util.StringTokenizer; | |
35 | ||
36 | /** | |
37 | * DOCUMENT ME! | |
38 | * | |
39 | * @author $author$ | |
40 | * @version $Revision$ | |
41 | */ | |
42 | public class MSFfile extends AlignFile | |
43 | { | |
44 | ||
45 | /** | |
46 | * Creates a new MSFfile object. | |
47 | */ | |
48 | 5 | public MSFfile() |
49 | { | |
50 | } | |
51 | ||
52 | /** | |
53 | * Creates a new MSFfile object. | |
54 | * | |
55 | * @param inFile | |
56 | * DOCUMENT ME! | |
57 | * @param type | |
58 | * DOCUMENT ME! | |
59 | * | |
60 | * @throws IOException | |
61 | * DOCUMENT ME! | |
62 | */ | |
63 | 0 | public MSFfile(String inFile, DataSourceType type) throws IOException |
64 | { | |
65 | 0 | super(inFile, type); |
66 | } | |
67 | ||
68 | 2 | public MSFfile(FileParse source) throws IOException |
69 | { | |
70 | 2 | super(source); |
71 | } | |
72 | ||
73 | /** | |
74 | * Read and parse MSF sequence data | |
75 | */ | |
76 | 2 | @Override |
77 | public void parse() throws IOException | |
78 | { | |
79 | 2 | boolean seqFlag = false; |
80 | 2 | List<String> headers = new ArrayList<String>(); |
81 | 2 | Hashtable<String, StringBuilder> seqhash = new Hashtable<String, StringBuilder>(); |
82 | ||
83 | 2 | try |
84 | { | |
85 | 2 | String line; |
86 | ? | while ((line = nextLine()) != null) |
87 | { | |
88 | 176 | StringTokenizer str = new StringTokenizer(line); |
89 | ||
90 | 176 | String key = null; |
91 | 540 | while (str.hasMoreTokens()) |
92 | { | |
93 | 364 | String inStr = str.nextToken(); |
94 | ||
95 | // If line has header information add to the headers vector | |
96 | 364 | if (inStr.indexOf("Name:") != -1) |
97 | { | |
98 | 30 | key = str.nextToken(); |
99 | 30 | headers.add(key); |
100 | } | |
101 | ||
102 | // if line has // set SeqFlag so we know sequences are coming | |
103 | 364 | if (inStr.indexOf("//") != -1) |
104 | { | |
105 | 2 | seqFlag = true; |
106 | } | |
107 | ||
108 | // Process lines as sequence lines if seqFlag is set | |
109 | 364 | if ((inStr.indexOf("//") == -1) && seqFlag) |
110 | { | |
111 | // sequence id is the first field | |
112 | 120 | key = inStr; |
113 | ||
114 | 120 | StringBuilder tempseq; |
115 | ||
116 | // Get sequence from hash if it exists | |
117 | 120 | if (seqhash.containsKey(key)) |
118 | { | |
119 | 90 | tempseq = seqhash.get(key); |
120 | } | |
121 | else | |
122 | { | |
123 | 30 | tempseq = new StringBuilder(64); |
124 | 30 | seqhash.put(key, tempseq); |
125 | } | |
126 | ||
127 | // loop through the rest of the words | |
128 | 600 | while (str.hasMoreTokens()) |
129 | { | |
130 | // append the word to the sequence | |
131 | 480 | String sequenceBlock = str.nextToken(); |
132 | 480 | tempseq.append(sequenceBlock); |
133 | } | |
134 | } | |
135 | } | |
136 | } | |
137 | } catch (IOException e) | |
138 | { | |
139 | 0 | jalview.bin.Console.errPrintln("Exception parsing MSFFile " + e); |
140 | 0 | e.printStackTrace(); |
141 | } | |
142 | ||
143 | 2 | this.noSeqs = headers.size(); |
144 | ||
145 | // Add sequences to the hash | |
146 | 32 | for (int i = 0; i < headers.size(); i++) |
147 | { | |
148 | 30 | if (seqhash.get(headers.get(i)) != null) |
149 | { | |
150 | 30 | String head = headers.get(i); |
151 | 30 | String seq = seqhash.get(head).toString(); |
152 | ||
153 | 30 | if (maxLength < head.length()) |
154 | { | |
155 | 6 | maxLength = head.length(); |
156 | } | |
157 | ||
158 | /* | |
159 | * replace ~ (leading/trailing positions) with the gap character; | |
160 | * use '.' as this is the internal gap character required by MSF | |
161 | */ | |
162 | 30 | seq = seq.replace('~', '.'); |
163 | ||
164 | 30 | Sequence newSeq = parseId(head); |
165 | ||
166 | 30 | newSeq.setSequence(seq); |
167 | ||
168 | 30 | seqs.addElement(newSeq); |
169 | } | |
170 | else | |
171 | { | |
172 | 0 | jalview.bin.Console |
173 | .errPrintln("MSFFile Parser: Can't find sequence for " | |
174 | + headers.get(i)); | |
175 | } | |
176 | } | |
177 | } | |
178 | ||
179 | /** | |
180 | * DOCUMENT ME! | |
181 | * | |
182 | * @param seq | |
183 | * DOCUMENT ME! | |
184 | * | |
185 | * @return DOCUMENT ME! | |
186 | */ | |
187 | 75 | public int checkSum(String seq) |
188 | { | |
189 | 75 | int check = 0; |
190 | 75 | String sequence = seq.toUpperCase(Locale.ROOT); |
191 | ||
192 | 11850 | for (int i = 0; i < sequence.length(); i++) |
193 | { | |
194 | 11775 | try |
195 | { | |
196 | ||
197 | 11775 | int value = sequence.charAt(i); |
198 | 11775 | if (value != -1) |
199 | { | |
200 | 11775 | check += (i % 57 + 1) * value; |
201 | } | |
202 | } catch (Exception e) | |
203 | { | |
204 | 0 | jalview.bin.Console |
205 | .errPrintln("Exception during MSF Checksum calculation"); | |
206 | 0 | e.printStackTrace(); |
207 | } | |
208 | } | |
209 | ||
210 | 75 | return check % 10000; |
211 | } | |
212 | ||
213 | /** | |
214 | * DOCUMENT ME! | |
215 | * | |
216 | * @param s | |
217 | * DOCUMENT ME! | |
218 | * @param is_NA | |
219 | * DOCUMENT ME! | |
220 | * | |
221 | * @return DOCUMENT ME! | |
222 | */ | |
223 | 3 | @Override |
224 | public String print(SequenceI[] sqs, boolean jvSuffix) | |
225 | { | |
226 | ||
227 | 3 | boolean is_NA = Comparison.isNucleotide(sqs); |
228 | ||
229 | 3 | SequenceI[] s = new SequenceI[sqs.length]; |
230 | ||
231 | 3 | StringBuilder out = new StringBuilder(256); |
232 | 3 | out.append("!!").append(is_NA ? "NA" : "AA") |
233 | .append("_MULTIPLE_ALIGNMENT 1.0"); | |
234 | // TODO: JBPNote : Jalview doesn't remember NA or AA yet. | |
235 | 3 | out.append(newline); |
236 | 3 | out.append(newline); |
237 | 3 | int max = 0; |
238 | 3 | int maxid = 0; |
239 | 3 | int i = 0; |
240 | ||
241 | 48 | while ((i < sqs.length) && (sqs[i] != null)) |
242 | { | |
243 | /* | |
244 | * modify to MSF format: uses '.' for internal gaps, | |
245 | * and '~' for leading or trailing gaps | |
246 | */ | |
247 | 45 | String seqString = sqs[i].getSequenceAsString().replace('-', '.'); |
248 | ||
249 | 45 | StringBuilder sb = new StringBuilder(seqString); |
250 | ||
251 | 576 | for (int ii = 0; ii < sb.length(); ii++) |
252 | { | |
253 | 576 | if (sb.charAt(ii) == '.') |
254 | { | |
255 | 531 | sb.setCharAt(ii, '~'); |
256 | } | |
257 | else | |
258 | { | |
259 | 45 | break; |
260 | } | |
261 | } | |
262 | ||
263 | 195 | for (int ii = sb.length() - 1; ii > 0; ii--) |
264 | { | |
265 | 195 | if (sb.charAt(ii) == '.') |
266 | { | |
267 | 150 | sb.setCharAt(ii, '~'); |
268 | } | |
269 | else | |
270 | { | |
271 | 45 | break; |
272 | } | |
273 | } | |
274 | 45 | s[i] = new Sequence(sqs[i].getName(), sb.toString(), |
275 | sqs[i].getStart(), sqs[i].getEnd()); | |
276 | ||
277 | 45 | if (sb.length() > max) |
278 | { | |
279 | 3 | max = sb.length(); |
280 | } | |
281 | ||
282 | 45 | i++; |
283 | } | |
284 | ||
285 | 3 | Format maxLenpad = new Format( |
286 | "%" + (new String("" + max)).length() + "d"); | |
287 | 3 | Format maxChkpad = new Format( |
288 | "%" + (new String("1" + max)).length() + "d"); | |
289 | 3 | i = 0; |
290 | ||
291 | 3 | int bigChecksum = 0; |
292 | 3 | int[] checksums = new int[s.length]; |
293 | 48 | while (i < s.length) |
294 | { | |
295 | 45 | checksums[i] = checkSum(s[i].getSequenceAsString()); |
296 | 45 | bigChecksum += checksums[i]; |
297 | 45 | i++; |
298 | } | |
299 | ||
300 | 3 | long maxNB = 0; |
301 | 3 | out.append(" MSF: " + s[0].getLength() + " Type: " |
302 | 3 | + (is_NA ? "N" : "P") + " Check: " + (bigChecksum % 10000) |
303 | + " .."); | |
304 | 3 | out.append(newline); |
305 | 3 | out.append(newline); |
306 | 3 | out.append(newline); |
307 | ||
308 | 3 | String[] nameBlock = new String[s.length]; |
309 | 3 | String[] idBlock = new String[s.length]; |
310 | ||
311 | 3 | i = 0; |
312 | 48 | while ((i < s.length) && (s[i] != null)) |
313 | { | |
314 | ||
315 | 45 | nameBlock[i] = new String(" Name: " + printId(s[i], jvSuffix) + " "); |
316 | ||
317 | 45 | idBlock[i] = new String("Len: " + maxLenpad.form(s[i].getLength()) |
318 | + " Check: " + maxChkpad.form(checksums[i]) | |
319 | + " Weight: 1.00" + newline); | |
320 | ||
321 | 45 | if (s[i].getName().length() > maxid) |
322 | { | |
323 | 9 | maxid = s[i].getName().length(); |
324 | } | |
325 | ||
326 | 45 | if (nameBlock[i].length() > maxNB) |
327 | { | |
328 | 9 | maxNB = nameBlock[i].length(); |
329 | } | |
330 | ||
331 | 45 | i++; |
332 | } | |
333 | ||
334 | 3 | if (maxid < 10) |
335 | { | |
336 | 0 | maxid = 10; |
337 | } | |
338 | ||
339 | 3 | if (maxNB < 15) |
340 | { | |
341 | 0 | maxNB = 15; |
342 | } | |
343 | ||
344 | 3 | Format nbFormat = new Format("%-" + maxNB + "s"); |
345 | ||
346 | 48 | for (i = 0; (i < s.length) && (s[i] != null); i++) |
347 | { | |
348 | 45 | out.append(nbFormat.form(nameBlock[i]) + idBlock[i]); |
349 | } | |
350 | ||
351 | 3 | maxid++; |
352 | 3 | out.append(newline); |
353 | 3 | out.append(newline); |
354 | 3 | out.append("//"); |
355 | 3 | out.append(newline); |
356 | 3 | out.append(newline); |
357 | 3 | int len = 50; |
358 | ||
359 | 3 | int nochunks = (max / len) + (max % len > 0 ? 1 : 0); |
360 | ||
361 | 15 | for (i = 0; i < nochunks; i++) |
362 | { | |
363 | 12 | int j = 0; |
364 | ||
365 | 192 | while ((j < s.length) && (s[j] != null)) |
366 | { | |
367 | 180 | String name = printId(s[j], jvSuffix); |
368 | ||
369 | 180 | out.append(new Format("%-" + maxid + "s").form(name + " ")); |
370 | ||
371 | 1080 | for (int k = 0; k < 5; k++) |
372 | { | |
373 | 900 | int start = (i * 50) + (k * 10); |
374 | 900 | int end = start + 10; |
375 | ||
376 | 900 | int length = s[j].getLength(); |
377 | 900 | if ((end < length) && (start < length)) |
378 | { | |
379 | 675 | out.append(s[j].getSequence(start, end)); |
380 | ||
381 | 675 | if (k < 4) |
382 | { | |
383 | 540 | out.append(" "); |
384 | } | |
385 | else | |
386 | { | |
387 | 135 | out.append(newline); |
388 | } | |
389 | } | |
390 | else | |
391 | { | |
392 | 225 | if (start < length) |
393 | { | |
394 | 45 | out.append(s[j].getSequenceAsString().substring(start)); |
395 | 45 | out.append(newline); |
396 | } | |
397 | else | |
398 | { | |
399 | 180 | if (k == 0) |
400 | { | |
401 | 0 | out.append(newline); |
402 | } | |
403 | } | |
404 | } | |
405 | } | |
406 | ||
407 | 180 | j++; |
408 | } | |
409 | ||
410 | 12 | out.append(newline); |
411 | } | |
412 | ||
413 | 3 | return out.toString(); |
414 | } | |
415 | } |