Class |
Line # |
Actions |
|||
---|---|---|---|---|---|
NewickFile | 81 | 269 | 112 |
1 | /* | |
2 | * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) | |
3 | * Copyright (C) $$Year-Rel$$ The Jalview Authors | |
4 | * | |
5 | * This file is part of Jalview. | |
6 | * | |
7 | * Jalview is free software: you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation, either version 3 | |
10 | * of the License, or (at your option) any later version. | |
11 | * | |
12 | * Jalview is distributed in the hope that it will be useful, but | |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty | |
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR | |
15 | * PURPOSE. See the GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with Jalview. If not, see <http://www.gnu.org/licenses/>. | |
19 | * The Jalview Authors are detailed in the 'AUTHORS' file. | |
20 | */ | |
21 | // NewickFile.java | |
22 | // Tree I/O | |
23 | // http://evolution.genetics.washington.edu/phylip/newick_doc.html | |
24 | // TODO: Implement Basic NHX tag parsing and preservation | |
25 | // TODO: http://evolution.genetics.wustl.edu/eddy/forester/NHX.html | |
26 | // TODO: Extended SequenceNodeI to hold parsed NHX strings | |
27 | package jalview.io; | |
28 | ||
29 | import java.io.BufferedReader; | |
30 | import java.io.File; | |
31 | import java.io.FileReader; | |
32 | import java.io.IOException; | |
33 | import java.util.Locale; | |
34 | import java.util.StringTokenizer; | |
35 | ||
36 | import com.stevesoft.pat.Regex; | |
37 | ||
38 | import jalview.bin.Jalview; | |
39 | import jalview.bin.Jalview.ExitCode; | |
40 | import jalview.datamodel.BinaryNode; | |
41 | import jalview.datamodel.SequenceNode; | |
42 | import jalview.util.MessageManager; | |
43 | ||
44 | /** | |
45 | * Parse a new hanpshire style tree Caveats: NHX files are NOT supported and the | |
46 | * tree distances and topology are unreliable when they are parsed. TODO: on | |
47 | * this: NHX codes are appended in comments beginning with &&NHX. The codes are | |
48 | * given below (from http://www.phylosoft.org/forester/NHX.html): Element Type | |
49 | * Description Corresponding phyloXML element (parent element in parentheses) no | |
50 | * tag string name of this node/clade (MUST BE FIRST, IF ASSIGNED) | |
51 | * <name>(<clade>) : decimal branch length to parent node (MUST BE SECOND, IF | |
52 | * ASSIGNED) <branch_length>(<clade>) :GN= string gene name <name>(<sequence>) | |
53 | * :AC= string sequence accession <accession>(<sequence>) :ND= string node | |
54 | * identifier - if this is being used, it has to be unique within each phylogeny | |
55 | * <node_id>(<clade>) :B= decimal confidence value for parent branch | |
56 | * <confidence>(<clade>) :D= 'T', 'F', or '?' 'T' if this node represents a | |
57 | * duplication event - 'F' if this node represents a speciation event, '?' if | |
58 | * this node represents an unknown event (D= tag should be replaced by Ev= tag) | |
59 | * n/a :Ev=duplications>speciations>gene losses>event type>duplication type int | |
60 | * int int string string event (replaces the =D tag), number of duplication, | |
61 | * speciation, and gene loss events, type of event (transfer, fusion, root, | |
62 | * unknown, other, speciation_duplication_loss, unassigned) <events>(<clade>) | |
63 | * :E= string EC number at this node <annotation>(<sequence>) :Fu= string | |
64 | * function at this node <annotation>(<sequence>) | |
65 | * :DS=protein-length>from>to>support>name>from>... int int int double string | |
66 | * int ... domain structure at this node <domain_architecture>(<sequence>) :S= | |
67 | * string species name of the species/phylum at this node <taxonomy>(<clade>) | |
68 | * :T= integer taxonomy ID of the species/phylum at this node <id>(<taxonomy>) | |
69 | * :W= integer width of parent branch <width>(<clade>) :C=rrr.ggg.bbb | |
70 | * integer.integer.integer color of parent branch <color>(<clade>) :Co= 'Y' or | |
71 | * 'N' collapse this node when drawing the tree (default is not to collapse) n/a | |
72 | * :XB= string custom data associated with a branch <property>(<clade>) :XN= | |
73 | * string custom data associated with a node <property>(<clade>) :O= integer | |
74 | * orthologous to this external node n/a :SN= integer subtree neighbors n/a :SO= | |
75 | * integer super orthologous (no duplications on paths) to this external node | |
76 | * n/a | |
77 | * | |
78 | * @author Jim Procter | |
79 | * @version $Revision$ | |
80 | */ | |
81 | public class NewickFile extends FileParse | |
82 | { | |
83 | BinaryNode root; | |
84 | ||
85 | private boolean HasBootstrap = false; | |
86 | ||
87 | private boolean HasDistances = false; | |
88 | ||
89 | private boolean RootHasDistance = false; | |
90 | ||
91 | // File IO Flags | |
92 | boolean ReplaceUnderscores = false; | |
93 | ||
94 | boolean printRootInfo = true; | |
95 | ||
96 | private Regex[] NodeSafeName = new Regex[] { | |
97 | new Regex().perlCode("m/[\\[,:'()]/"), // test for | |
98 | // requiring | |
99 | // quotes | |
100 | new Regex().perlCode("s/'/''/"), // escaping quote | |
101 | // characters | |
102 | new Regex().perlCode("s/\\/w/_/") // unqoted whitespace | |
103 | // transformation | |
104 | }; | |
105 | ||
106 | char QuoteChar = '\''; | |
107 | ||
108 | /** | |
109 | * Creates a new NewickFile object. | |
110 | * | |
111 | * @param inStr | |
112 | * DOCUMENT ME! | |
113 | * | |
114 | * @throws IOException | |
115 | * DOCUMENT ME! | |
116 | */ | |
117 | 12 | public NewickFile(String inStr) throws IOException |
118 | { | |
119 | 12 | super(inStr, DataSourceType.PASTE); |
120 | } | |
121 | ||
122 | /** | |
123 | * Creates a new NewickFile object. | |
124 | * | |
125 | * @param inFile | |
126 | * DOCUMENT ME! | |
127 | * @param protocol | |
128 | * DOCUMENT ME! | |
129 | * | |
130 | * @throws IOException | |
131 | * DOCUMENT ME! | |
132 | */ | |
133 | 8 | public NewickFile(String inFile, DataSourceType protocol) |
134 | throws IOException | |
135 | { | |
136 | 8 | super(inFile, protocol); |
137 | } | |
138 | ||
139 | 0 | public NewickFile(FileParse source) throws IOException |
140 | { | |
141 | 0 | super(source); |
142 | } | |
143 | ||
144 | /** | |
145 | * Creates a new NewickFile object. | |
146 | * | |
147 | * @param newtree | |
148 | * DOCUMENT ME! | |
149 | */ | |
150 | 7 | public NewickFile(BinaryNode newtree) |
151 | { | |
152 | 7 | root = newtree; |
153 | } | |
154 | ||
155 | /** | |
156 | * Creates a new NewickFile object. | |
157 | * | |
158 | * @param newtree | |
159 | * DOCUMENT ME! | |
160 | * @param bootstrap | |
161 | * DOCUMENT ME! | |
162 | */ | |
163 | 0 | public NewickFile(SequenceNode newtree, boolean bootstrap) |
164 | { | |
165 | 0 | HasBootstrap = bootstrap; |
166 | 0 | root = newtree; |
167 | } | |
168 | ||
169 | /** | |
170 | * Creates a new NewickFile object. | |
171 | * | |
172 | * @param newtree | |
173 | * DOCUMENT ME! | |
174 | * @param bootstrap | |
175 | * DOCUMENT ME! | |
176 | * @param distances | |
177 | * DOCUMENT ME! | |
178 | */ | |
179 | 3 | public NewickFile(BinaryNode newtree, boolean bootstrap, |
180 | boolean distances) | |
181 | { | |
182 | 3 | root = newtree; |
183 | 3 | HasBootstrap = bootstrap; |
184 | 3 | HasDistances = distances; |
185 | } | |
186 | ||
187 | /** | |
188 | * Creates a new NewickFile object. | |
189 | * | |
190 | * @param newtree | |
191 | * DOCUMENT ME! | |
192 | * @param bootstrap | |
193 | * DOCUMENT ME! | |
194 | * @param distances | |
195 | * DOCUMENT ME! | |
196 | * @param rootdistance | |
197 | * DOCUMENT ME! | |
198 | */ | |
199 | 0 | public NewickFile(BinaryNode newtree, boolean bootstrap, |
200 | boolean distances, boolean rootdistance) | |
201 | { | |
202 | 0 | root = newtree; |
203 | 0 | HasBootstrap = bootstrap; |
204 | 0 | HasDistances = distances; |
205 | 0 | RootHasDistance = rootdistance; |
206 | } | |
207 | ||
208 | /** | |
209 | * DOCUMENT ME! | |
210 | * | |
211 | * @param Error | |
212 | * DOCUMENT ME! | |
213 | * @param Er | |
214 | * DOCUMENT ME! | |
215 | * @param r | |
216 | * DOCUMENT ME! | |
217 | * @param p | |
218 | * DOCUMENT ME! | |
219 | * @param s | |
220 | * DOCUMENT ME! | |
221 | * | |
222 | * @return DOCUMENT ME! | |
223 | */ | |
224 | 0 | private String ErrorStringrange(String Error, String Er, int r, int p, |
225 | String s) | |
226 | { | |
227 | 0 | return ((Error == null) ? "" : Error) + Er + " at position " + p + " ( " |
228 | 0 | + s.substring(((p - r) < 0) ? 0 : (p - r), |
229 | 0 | ((p + r) > s.length()) ? s.length() : (p + r)) |
230 | + " )\n"; | |
231 | } | |
232 | ||
233 | // @tree annotations | |
234 | // These are set automatically by the reader | |
235 | 37 | public boolean HasBootstrap() |
236 | { | |
237 | 37 | return HasBootstrap; |
238 | } | |
239 | ||
240 | /** | |
241 | * DOCUMENT ME! | |
242 | * | |
243 | * @return DOCUMENT ME! | |
244 | */ | |
245 | 37 | public boolean HasDistances() |
246 | { | |
247 | 37 | return HasDistances; |
248 | } | |
249 | ||
250 | 20 | public boolean HasRootDistance() |
251 | { | |
252 | 20 | return RootHasDistance; |
253 | } | |
254 | ||
255 | /** | |
256 | * parse the filesource as a newick file (new hampshire and/or extended) | |
257 | * | |
258 | * @throws IOException | |
259 | * with a line number and character position for badly formatted NH | |
260 | * strings | |
261 | */ | |
262 | 20 | public void parse() throws IOException |
263 | { | |
264 | 20 | String nf; |
265 | ||
266 | { // fill nf with complete tree file | |
267 | ||
268 | 20 | StringBuffer file = new StringBuffer(); |
269 | ||
270 | ? | while ((nf = nextLine()) != null) |
271 | { | |
272 | 20 | file.append(nf); |
273 | } | |
274 | ||
275 | 20 | nf = file.toString(); |
276 | } | |
277 | ||
278 | 20 | root = new SequenceNode(); |
279 | ||
280 | 20 | BinaryNode realroot = null; |
281 | 20 | BinaryNode c = root; |
282 | ||
283 | 20 | int d = -1; |
284 | 20 | int cp = 0; |
285 | // int flen = nf.length(); | |
286 | ||
287 | 20 | String Error = null; |
288 | 20 | String nodename = null; |
289 | 20 | String commentString2 = null; // comments after simple node props |
290 | ||
291 | 20 | double DefDistance = (float) 0.001; // @param Default distance for a node - |
292 | // very very small | |
293 | 20 | int DefBootstrap = -1; // @param Default bootstrap for a node |
294 | ||
295 | 20 | double distance = DefDistance; |
296 | 20 | int bootstrap = DefBootstrap; |
297 | ||
298 | 20 | boolean ascending = false; // flag indicating that we are leaving the |
299 | // current node | |
300 | ||
301 | 20 | Regex majorsyms = new Regex("[(\\['),;]"); |
302 | ||
303 | 20 | int nextcp = 0; |
304 | 20 | int ncp = cp; |
305 | 20 | boolean parsednodename = false; |
306 | 612 | while (majorsyms.searchFrom(nf, cp) && (Error == null)) |
307 | { | |
308 | 592 | int fcp = majorsyms.matchedFrom(); |
309 | 592 | char schar; |
310 | 592 | switch (schar = nf.charAt(fcp)) |
311 | { | |
312 | 188 | case '(': |
313 | ||
314 | // ascending should not be set | |
315 | // New Internal node | |
316 | 188 | if (ascending) |
317 | { | |
318 | 0 | Error = ErrorStringrange(Error, "Unexpected '('", 7, fcp, nf); |
319 | ||
320 | 0 | continue; |
321 | } | |
322 | 188 | d++; |
323 | ||
324 | 188 | if (c.right() == null) |
325 | { | |
326 | 94 | c.setRight(new SequenceNode(null, c, null, DefDistance, |
327 | DefBootstrap, false)); | |
328 | 94 | c = c.right(); |
329 | } | |
330 | else | |
331 | { | |
332 | 94 | if (c.left() != null) |
333 | { | |
334 | // Dummy node for polytomy - keeps c.left free for new node | |
335 | 0 | BinaryNode tmpn = new SequenceNode(null, c, null, 0, 0, true); |
336 | 0 | tmpn.SetChildren(c.left(), c.right()); |
337 | 0 | c.setRight(tmpn); |
338 | } | |
339 | ||
340 | 94 | c.setLeft(new SequenceNode(null, c, null, DefDistance, |
341 | DefBootstrap, false)); | |
342 | 94 | c = c.left(); |
343 | } | |
344 | ||
345 | 188 | if (realroot == null) |
346 | { | |
347 | 20 | realroot = c; |
348 | } | |
349 | ||
350 | 188 | nodename = null; |
351 | 188 | distance = DefDistance; |
352 | 188 | bootstrap = DefBootstrap; |
353 | 188 | cp = fcp + 1; |
354 | ||
355 | 188 | break; |
356 | ||
357 | // Deal with quoted fields | |
358 | 8 | case '\'': |
359 | ||
360 | 8 | Regex qnodename = new Regex("'([^']|'')+'"); |
361 | ||
362 | 8 | if (qnodename.searchFrom(nf, fcp)) |
363 | { | |
364 | 8 | int nl = qnodename.stringMatched().length(); |
365 | 8 | nodename = new String( |
366 | qnodename.stringMatched().substring(1, nl - 1)); | |
367 | // unpack any escaped colons | |
368 | 8 | Regex xpandquotes = Regex.perlCode("s/''/'/"); |
369 | 8 | String widernodename = xpandquotes.replaceAll(nodename); |
370 | 8 | nodename = widernodename; |
371 | // jump to after end of quoted nodename | |
372 | 8 | nextcp = fcp + nl + 1; |
373 | 8 | parsednodename = true; |
374 | } | |
375 | else | |
376 | { | |
377 | 0 | Error = ErrorStringrange(Error, |
378 | "Unterminated quotes for nodename", 7, fcp, nf); | |
379 | } | |
380 | ||
381 | 8 | break; |
382 | ||
383 | 396 | default: |
384 | 396 | if (schar == ';') |
385 | { | |
386 | 20 | if (d != -1) |
387 | { | |
388 | 0 | Error = ErrorStringrange(Error, |
389 | "Wayward semicolon (depth=" + d + ")", 7, fcp, nf); | |
390 | } | |
391 | // cp advanced at the end of default | |
392 | } | |
393 | 396 | if (schar == '[') |
394 | { | |
395 | // node string contains Comment or structured/extended NH format info | |
396 | /* | |
397 | * if ((fcp-cp>1 && nf.substring(cp,fcp).trim().length()>1)) { // will | |
398 | * process in remains jalview.bin.Console.errPrintln("skipped text: | |
399 | * '"+nf.substring(cp,fcp)+"'"); } | |
400 | */ | |
401 | // verify termination. | |
402 | 0 | Regex comment = new Regex("]"); |
403 | 0 | if (comment.searchFrom(nf, fcp)) |
404 | { | |
405 | // Skip the comment field | |
406 | 0 | nextcp = comment.matchedFrom() + 1; |
407 | 0 | warningMessage = "Tree file contained comments which may confuse input algorithm."; |
408 | 0 | break; |
409 | ||
410 | // cp advanced at the end of default to nextcp, ncp is unchanged so | |
411 | // any node info can be read. | |
412 | } | |
413 | else | |
414 | { | |
415 | 0 | Error = ErrorStringrange(Error, "Unterminated comment", 3, fcp, |
416 | nf); | |
417 | } | |
418 | } | |
419 | // Parse simpler field strings | |
420 | 396 | String fstring = nf.substring(ncp, fcp); |
421 | // remove any comments before we parse the node info | |
422 | // TODO: test newick file with quoted square brackets in node name (is | |
423 | // this allowed?) | |
424 | 396 | while (fstring.indexOf(']') > -1) |
425 | { | |
426 | 0 | int cstart = fstring.indexOf('['); |
427 | 0 | int cend = fstring.indexOf(']'); |
428 | 0 | commentString2 = fstring.substring(cstart + 1, cend); |
429 | 0 | fstring = fstring.substring(0, cstart) |
430 | + fstring.substring(cend + 1); | |
431 | ||
432 | } | |
433 | 396 | Regex uqnodename = new Regex("\\b([^' :;\\](),]+)"); |
434 | 396 | Regex nbootstrap = new Regex("\\s*([0-9+]+)\\s*:"); |
435 | 396 | Regex ndist = new Regex(":([-0-9Ee.+]+)"); |
436 | ||
437 | 396 | if (!parsednodename && uqnodename.search(fstring) |
438 | && ((uqnodename.matchedFrom(1) == 0) || (fstring | |
439 | .charAt(uqnodename.matchedFrom(1) - 1) != ':'))) // JBPNote | |
440 | // HACK! | |
441 | { | |
442 | 200 | if (nodename == null) |
443 | { | |
444 | 200 | if (ReplaceUnderscores) |
445 | { | |
446 | 0 | nodename = uqnodename.stringMatched(1).replace('_', ' '); |
447 | } | |
448 | else | |
449 | { | |
450 | 200 | nodename = uqnodename.stringMatched(1); |
451 | } | |
452 | } | |
453 | else | |
454 | { | |
455 | 0 | Error = ErrorStringrange(Error, |
456 | "File has broken algorithm - overwritten nodename", 10, | |
457 | fcp, nf); | |
458 | } | |
459 | } | |
460 | // get comment bootstraps | |
461 | ||
462 | 396 | if (nbootstrap.search(fstring)) |
463 | { | |
464 | 0 | if (nbootstrap.stringMatched(1) |
465 | .equals(uqnodename.stringMatched(1))) | |
466 | { | |
467 | 0 | nodename = null; // no nodename here. |
468 | } | |
469 | 0 | if (nodename == null || nodename.length() == 0 |
470 | || nbootstrap.matchedFrom(1) > (uqnodename.matchedFrom(1) | |
471 | + uqnodename.stringMatched().length())) | |
472 | { | |
473 | 0 | try |
474 | { | |
475 | 0 | bootstrap = (Integer.valueOf(nbootstrap.stringMatched(1))) |
476 | .intValue(); | |
477 | 0 | HasBootstrap = true; |
478 | } catch (Exception e) | |
479 | { | |
480 | 0 | Error = ErrorStringrange(Error, "Can't parse bootstrap value", |
481 | 4, ncp + nbootstrap.matchedFrom(), nf); | |
482 | } | |
483 | } | |
484 | } | |
485 | ||
486 | 396 | boolean nodehasdistance = false; |
487 | ||
488 | 396 | if (ndist.search(fstring)) |
489 | { | |
490 | 376 | try |
491 | { | |
492 | 376 | distance = (Double.valueOf(ndist.stringMatched(1))) |
493 | .floatValue(); | |
494 | 376 | HasDistances = true; |
495 | 376 | nodehasdistance = true; |
496 | } catch (Exception e) | |
497 | { | |
498 | 0 | Error = ErrorStringrange(Error, |
499 | "Can't parse node distance value", 7, | |
500 | ncp + ndist.matchedFrom(), nf); | |
501 | } | |
502 | } | |
503 | ||
504 | 396 | if (ascending) |
505 | { | |
506 | // Write node info here | |
507 | 188 | c.setName(nodename); |
508 | // Trees without distances still need a render distance | |
509 | 188 | c.dist = (HasDistances) ? distance : DefDistance; |
510 | // be consistent for internal bootstrap defaults too | |
511 | 188 | c.setBootstrap((HasBootstrap) ? bootstrap : DefBootstrap); |
512 | 188 | if (c == realroot) |
513 | { | |
514 | 20 | RootHasDistance = nodehasdistance; // JBPNote This is really |
515 | // UGLY!!! Ensure root node gets | |
516 | // its given distance | |
517 | } | |
518 | 188 | parseNHXNodeProps(c, commentString2); |
519 | 188 | commentString2 = null; |
520 | } | |
521 | else | |
522 | { | |
523 | // Find a place to put the leaf | |
524 | 208 | BinaryNode newnode = new SequenceNode(null, c, nodename, |
525 | 208 | (HasDistances) ? distance : DefDistance, |
526 | 208 | (HasBootstrap) ? bootstrap : DefBootstrap, false); |
527 | 208 | parseNHXNodeProps(c, commentString2); |
528 | 208 | commentString2 = null; |
529 | ||
530 | 208 | if (c.right() == null) |
531 | { | |
532 | 114 | c.setRight(newnode); |
533 | } | |
534 | else | |
535 | { | |
536 | 94 | if (c.left() == null) |
537 | { | |
538 | 94 | c.setLeft(newnode); |
539 | } | |
540 | else | |
541 | { | |
542 | // Insert a dummy node for polytomy | |
543 | // dummy nodes have distances | |
544 | 0 | BinaryNode newdummy = new SequenceNode(null, c, null, |
545 | 0 | (HasDistances ? 0 : DefDistance), 0, true); |
546 | 0 | newdummy.SetChildren(c.left(), newnode); |
547 | 0 | c.setLeft(newdummy); |
548 | } | |
549 | } | |
550 | } | |
551 | ||
552 | 396 | if (ascending) |
553 | { | |
554 | // move back up the tree from preceding closure | |
555 | 188 | c = c.AscendTree(); |
556 | ||
557 | 188 | if ((d > -1) && (c == null)) |
558 | { | |
559 | 0 | Error = ErrorStringrange(Error, |
560 | "File broke algorithm: Lost place in tree (is there an extra ')' ?)", | |
561 | 7, fcp, nf); | |
562 | } | |
563 | } | |
564 | ||
565 | 396 | if (nf.charAt(fcp) == ')') |
566 | { | |
567 | 188 | d--; |
568 | 188 | ascending = true; |
569 | } | |
570 | else | |
571 | { | |
572 | 208 | if (nf.charAt(fcp) == ',') |
573 | { | |
574 | 188 | if (ascending) |
575 | { | |
576 | 74 | ascending = false; |
577 | } | |
578 | else | |
579 | { | |
580 | // Just advance focus, if we need to | |
581 | 114 | if ((c.left() != null) && (!c.left().isLeaf())) |
582 | { | |
583 | 0 | c = c.left(); |
584 | } | |
585 | } | |
586 | } | |
587 | } | |
588 | ||
589 | // Reset new node properties to obvious fakes | |
590 | 396 | nodename = null; |
591 | 396 | distance = DefDistance; |
592 | 396 | bootstrap = DefBootstrap; |
593 | 396 | commentString2 = null; |
594 | 396 | parsednodename = false; |
595 | } | |
596 | 592 | if (nextcp == 0) |
597 | { | |
598 | 584 | ncp = cp = fcp + 1; |
599 | } | |
600 | else | |
601 | { | |
602 | 8 | cp = nextcp; |
603 | 8 | nextcp = 0; |
604 | } | |
605 | } | |
606 | ||
607 | 20 | if (Error != null) |
608 | { | |
609 | 0 | throw (new IOException( |
610 | MessageManager.formatMessage("exception.newfile", new String[] | |
611 | { Error.toString() }))); | |
612 | } | |
613 | 20 | if (root == null) |
614 | { | |
615 | 0 | throw (new IOException( |
616 | MessageManager.formatMessage("exception.newfile", new String[] | |
617 | { MessageManager.getString("label.no_tree_read_in") }))); | |
618 | } | |
619 | // THe next line is failing for topali trees - not sure why yet. if | |
620 | // (root.right()!=null && root.isDummy()) | |
621 | 20 | root = root.right().detach(); // remove the imaginary root. |
622 | ||
623 | 20 | if (!RootHasDistance) |
624 | { | |
625 | 20 | root.dist = (HasDistances) ? 0 : DefDistance; |
626 | } | |
627 | } | |
628 | ||
629 | /** | |
630 | * parse NHX codes in comment strings and update NewickFile state flags for | |
631 | * distances and bootstraps, and add any additional properties onto the node. | |
632 | * | |
633 | * @param c | |
634 | * @param commentString | |
635 | * @param commentString2 | |
636 | */ | |
637 | 396 | private void parseNHXNodeProps(BinaryNode c, String commentString) |
638 | { | |
639 | // TODO: store raw comment on the sequenceNode so it can be recovered when | |
640 | // tree is output | |
641 | 396 | if (commentString != null && commentString.startsWith("&&NHX")) |
642 | { | |
643 | 0 | StringTokenizer st = new StringTokenizer(commentString.substring(5), |
644 | ":"); | |
645 | 0 | while (st.hasMoreTokens()) |
646 | { | |
647 | 0 | String tok = st.nextToken(); |
648 | 0 | int colpos = tok.indexOf("="); |
649 | ||
650 | 0 | if (colpos > -1) |
651 | { | |
652 | 0 | String code = tok.substring(0, colpos); |
653 | 0 | String value = tok.substring(colpos + 1); |
654 | 0 | try |
655 | { | |
656 | // parse out code/value pairs | |
657 | 0 | if (code.toLowerCase(Locale.ROOT).equals("b")) |
658 | { | |
659 | 0 | int v = -1; |
660 | 0 | Float iv = Float.valueOf(value); |
661 | 0 | v = iv.intValue(); // jalview only does integer bootstraps |
662 | // currently | |
663 | 0 | c.setBootstrap(v); |
664 | 0 | HasBootstrap = true; |
665 | } | |
666 | // more codes here. | |
667 | } catch (Exception e) | |
668 | { | |
669 | 0 | jalview.bin.Console.errPrintln( |
670 | "Couldn't parse code '" + code + "' = '" + value + "'"); | |
671 | 0 | e.printStackTrace(System.err); |
672 | } | |
673 | } | |
674 | } | |
675 | } | |
676 | ||
677 | } | |
678 | ||
679 | /** | |
680 | * DOCUMENT ME! | |
681 | * | |
682 | * @return DOCUMENT ME! | |
683 | */ | |
684 | 46 | public BinaryNode getTree() |
685 | { | |
686 | 46 | return root; |
687 | } | |
688 | ||
689 | /** | |
690 | * Generate a newick format tree according to internal flags for bootstraps, | |
691 | * distances and root distances. | |
692 | * | |
693 | * @return new hampshire tree in a single line | |
694 | */ | |
695 | 10 | public String print() |
696 | { | |
697 | 10 | synchronized (this) |
698 | { | |
699 | 10 | StringBuffer tf = new StringBuffer(); |
700 | 10 | print(tf, root); |
701 | ||
702 | 10 | return (tf.append(";").toString()); |
703 | } | |
704 | } | |
705 | ||
706 | /** | |
707 | * | |
708 | * | |
709 | * Generate a newick format tree according to internal flags for distances and | |
710 | * root distances and user specificied writing of bootstraps. | |
711 | * | |
712 | * @param withbootstraps | |
713 | * controls if bootstrap values are explicitly written. | |
714 | * | |
715 | * @return new hampshire tree in a single line | |
716 | */ | |
717 | 7 | public String print(boolean withbootstraps) |
718 | { | |
719 | 7 | synchronized (this) |
720 | { | |
721 | 7 | boolean boots = this.HasBootstrap; |
722 | 7 | this.HasBootstrap = withbootstraps; |
723 | ||
724 | 7 | String rv = print(); |
725 | 7 | this.HasBootstrap = boots; |
726 | ||
727 | 7 | return rv; |
728 | } | |
729 | } | |
730 | ||
731 | /** | |
732 | * | |
733 | * Generate newick format tree according to internal flags for writing root | |
734 | * node distances. | |
735 | * | |
736 | * @param withbootstraps | |
737 | * explicitly write bootstrap values | |
738 | * @param withdists | |
739 | * explicitly write distances | |
740 | * | |
741 | * @return new hampshire tree in a single line | |
742 | */ | |
743 | 7 | public String print(boolean withbootstraps, boolean withdists) |
744 | { | |
745 | 7 | synchronized (this) |
746 | { | |
747 | 7 | boolean dists = this.HasDistances; |
748 | 7 | this.HasDistances = withdists; |
749 | ||
750 | 7 | String rv = print(withbootstraps); |
751 | 7 | this.HasDistances = dists; |
752 | ||
753 | 7 | return rv; |
754 | } | |
755 | } | |
756 | ||
757 | /** | |
758 | * Generate newick format tree according to user specified flags | |
759 | * | |
760 | * @param withbootstraps | |
761 | * explicitly write bootstrap values | |
762 | * @param withdists | |
763 | * explicitly write distances | |
764 | * @param printRootInfo | |
765 | * explicitly write root distance | |
766 | * | |
767 | * @return new hampshire tree in a single line | |
768 | */ | |
769 | 4 | public String print(boolean withbootstraps, boolean withdists, |
770 | boolean printRootInfo) | |
771 | { | |
772 | 4 | synchronized (this) |
773 | { | |
774 | 4 | boolean rootinfo = printRootInfo; |
775 | 4 | this.printRootInfo = printRootInfo; |
776 | ||
777 | 4 | String rv = print(withbootstraps, withdists); |
778 | 4 | this.printRootInfo = rootinfo; |
779 | ||
780 | 4 | return rv; |
781 | } | |
782 | } | |
783 | ||
784 | /** | |
785 | * DOCUMENT ME! | |
786 | * | |
787 | * @return DOCUMENT ME! | |
788 | */ | |
789 | 0 | char getQuoteChar() |
790 | { | |
791 | 0 | return QuoteChar; |
792 | } | |
793 | ||
794 | /** | |
795 | * DOCUMENT ME! | |
796 | * | |
797 | * @param c | |
798 | * DOCUMENT ME! | |
799 | * | |
800 | * @return DOCUMENT ME! | |
801 | */ | |
802 | 0 | char setQuoteChar(char c) |
803 | { | |
804 | 0 | char old = QuoteChar; |
805 | 0 | QuoteChar = c; |
806 | ||
807 | 0 | return old; |
808 | } | |
809 | ||
810 | /** | |
811 | * DOCUMENT ME! | |
812 | * | |
813 | * @param name | |
814 | * DOCUMENT ME! | |
815 | * | |
816 | * @return DOCUMENT ME! | |
817 | */ | |
818 | 144 | private String nodeName(String name) |
819 | { | |
820 | 144 | if (NodeSafeName[0].search(name)) |
821 | { | |
822 | 4 | return QuoteChar + NodeSafeName[1].replaceAll(name) + QuoteChar; |
823 | } | |
824 | else | |
825 | { | |
826 | 140 | return NodeSafeName[2].replaceAll(name); |
827 | } | |
828 | } | |
829 | ||
830 | /** | |
831 | * DOCUMENT ME! | |
832 | * | |
833 | * @param c | |
834 | * DOCUMENT ME! | |
835 | * | |
836 | * @return DOCUMENT ME! | |
837 | */ | |
838 | 268 | private String printNodeField(BinaryNode c) |
839 | { | |
840 | 268 | return ((c.getName() == null) ? "" : nodeName(c.getName())) |
841 | 116 | + ((HasBootstrap) ? ((c.getBootstrap() > -1) |
842 | 116 | ? ((c.getName() != null ? " " : "") + c.getBootstrap()) |
843 | : "") : "") | |
844 | 268 | + ((HasDistances) ? (":" + c.dist) : ""); |
845 | } | |
846 | ||
847 | /** | |
848 | * DOCUMENT ME! | |
849 | * | |
850 | * @param root | |
851 | * DOCUMENT ME! | |
852 | * | |
853 | * @return DOCUMENT ME! | |
854 | */ | |
855 | 10 | private String printRootField(BinaryNode root) |
856 | { | |
857 | 10 | return (printRootInfo) |
858 | 6 | ? (((root.getName() == null) ? "" : nodeName(root.getName())) |
859 | 6 | + ((HasBootstrap) |
860 | 1 | ? ((root.getBootstrap() > -1) |
861 | 1 | ? ((root.getName() != null ? " " : "") |
862 | + +root.getBootstrap()) | |
863 | : "") | |
864 | : "") | |
865 | 6 | + ((RootHasDistance) ? (":" + root.dist) : "")) |
866 | : ""; | |
867 | } | |
868 | ||
869 | // Non recursive call deals with root node properties | |
870 | 10 | public void print(StringBuffer tf, BinaryNode root) |
871 | { | |
872 | 10 | if (root != null) |
873 | { | |
874 | 10 | if (root.isLeaf() && printRootInfo) |
875 | { | |
876 | 0 | tf.append(printRootField(root)); |
877 | } | |
878 | else | |
879 | { | |
880 | 10 | if (root.isDummy()) |
881 | { | |
882 | 0 | _print(tf, root.right()); |
883 | 0 | _print(tf, root.left()); |
884 | } | |
885 | else | |
886 | { | |
887 | 10 | tf.append("("); |
888 | 10 | _print(tf, root.right()); |
889 | ||
890 | 10 | if (root.left() != null) |
891 | { | |
892 | 10 | tf.append(","); |
893 | } | |
894 | ||
895 | 10 | _print(tf, root.left()); |
896 | 10 | tf.append(")" + printRootField(root)); |
897 | } | |
898 | } | |
899 | } | |
900 | } | |
901 | ||
902 | // Recursive call for non-root nodes | |
903 | 268 | public void _print(StringBuffer tf, BinaryNode c) |
904 | { | |
905 | 268 | if (c != null) |
906 | { | |
907 | 268 | if (c.isLeaf()) |
908 | { | |
909 | 144 | tf.append(printNodeField(c)); |
910 | } | |
911 | else | |
912 | { | |
913 | 124 | if (c.isDummy()) |
914 | { | |
915 | 0 | _print(tf, c.left()); |
916 | 0 | if (c.left() != null) |
917 | { | |
918 | 0 | tf.append(","); |
919 | } | |
920 | 0 | _print(tf, c.right()); |
921 | } | |
922 | else | |
923 | { | |
924 | 124 | tf.append("("); |
925 | 124 | _print(tf, c.right()); |
926 | ||
927 | 124 | if (c.left() != null) |
928 | { | |
929 | 124 | tf.append(","); |
930 | } | |
931 | ||
932 | 124 | _print(tf, c.left()); |
933 | 124 | tf.append(")" + printNodeField(c)); |
934 | } | |
935 | } | |
936 | } | |
937 | } | |
938 | ||
939 | /** | |
940 | * | |
941 | * @param args | |
942 | * @j2sIgnore | |
943 | */ | |
944 | 0 | public static void main(String[] args) |
945 | { | |
946 | 0 | try |
947 | { | |
948 | 0 | if (args == null || args.length != 1) |
949 | { | |
950 | 0 | Jalview.exit( |
951 | "Takes one argument - file name of a newick tree file.", | |
952 | ExitCode.INVALID_ARGUMENT); | |
953 | } | |
954 | ||
955 | 0 | File fn = new File(args[0]); |
956 | ||
957 | 0 | StringBuffer newickfile = new StringBuffer(); |
958 | 0 | BufferedReader treefile = new BufferedReader(new FileReader(fn)); |
959 | 0 | String l; |
960 | ||
961 | 0 | while ((l = treefile.readLine()) != null) |
962 | { | |
963 | 0 | newickfile.append(l); |
964 | } | |
965 | ||
966 | 0 | treefile.close(); |
967 | 0 | jalview.bin.Console.outPrintln("Read file :\n"); |
968 | ||
969 | 0 | NewickFile trf = new NewickFile(args[0], DataSourceType.FILE); |
970 | 0 | trf.parse(); |
971 | 0 | jalview.bin.Console.outPrintln("Original file :\n"); |
972 | ||
973 | 0 | Regex nonl = new Regex("\n+", ""); |
974 | 0 | jalview.bin.Console |
975 | .outPrintln(nonl.replaceAll(newickfile.toString()) + "\n"); | |
976 | ||
977 | 0 | jalview.bin.Console.outPrintln("Parsed file.\n"); |
978 | 0 | jalview.bin.Console |
979 | .outPrintln("Default output type for original input.\n"); | |
980 | 0 | jalview.bin.Console.outPrintln(trf.print()); |
981 | 0 | jalview.bin.Console.outPrintln("Without bootstraps.\n"); |
982 | 0 | jalview.bin.Console.outPrintln(trf.print(false)); |
983 | 0 | jalview.bin.Console.outPrintln("Without distances.\n"); |
984 | 0 | jalview.bin.Console.outPrintln(trf.print(true, false)); |
985 | 0 | jalview.bin.Console |
986 | .outPrintln("Without bootstraps but with distanecs.\n"); | |
987 | 0 | jalview.bin.Console.outPrintln(trf.print(false, true)); |
988 | 0 | jalview.bin.Console.outPrintln("Without bootstraps or distanecs.\n"); |
989 | 0 | jalview.bin.Console.outPrintln(trf.print(false, false)); |
990 | 0 | jalview.bin.Console |
991 | .outPrintln("With bootstraps and with distances.\n"); | |
992 | 0 | jalview.bin.Console.outPrintln(trf.print(true, true)); |
993 | } catch (java.io.IOException e) | |
994 | { | |
995 | 0 | jalview.bin.Console.errPrintln("Exception\n" + e); |
996 | 0 | e.printStackTrace(); |
997 | } | |
998 | } | |
999 | } |