Class |
Line # |
Actions |
|||
---|---|---|---|---|---|
GenBankFile | 39 | 43 | 22 |
1 | /* | |
2 | * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) | |
3 | * Copyright (C) $$Year-Rel$$ The Jalview Authors | |
4 | * | |
5 | * This file is part of Jalview. | |
6 | * | |
7 | * Jalview is free software: you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation, either version 3 | |
10 | * of the License, or (at your option) any later version. | |
11 | * | |
12 | * Jalview is distributed in the hope that it will be useful, but | |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty | |
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR | |
15 | * PURPOSE. See the GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with Jalview. If not, see <http://www.gnu.org/licenses/>. | |
19 | * The Jalview Authors are detailed in the 'AUTHORS' file. | |
20 | */ | |
21 | package jalview.io; | |
22 | ||
23 | import java.io.IOException; | |
24 | ||
25 | /** | |
26 | * A class that provides selective parsing of the GenBank flatfile format. | |
27 | * <p> | |
28 | * The initial implementation is limited to extracting fields used by Jalview | |
29 | * after fetching an EMBL or EMBLCDS entry: | |
30 | * | |
31 | * <pre> | |
32 | * accession, version, sequence, xref | |
33 | * and (for CDS feature) location, protein_id, product, codon_start, translation | |
34 | * </pre> | |
35 | * | |
36 | * @author gmcarstairs | |
37 | * @see https://www.ncbi.nlm.nih.gov/Sitemap/samplerecord.html | |
38 | */ | |
39 | public class GenBankFile extends EMBLLikeFlatFile | |
40 | { | |
41 | private static final String DEFINITION = "DEFINITION"; | |
42 | ||
43 | /** | |
44 | * Constructor given a data source and the id of the source database | |
45 | * | |
46 | * @param fp | |
47 | * @param sourceId | |
48 | * @throws IOException | |
49 | */ | |
50 | 1 | public GenBankFile(FileParse fp, String sourceId) throws IOException |
51 | { | |
52 | 1 | super(fp, sourceId); |
53 | } | |
54 | ||
55 | /** | |
56 | * Parses the flatfile, and if successful, saves as an annotated sequence | |
57 | * which may be retrieved by calling {@code getSequence()} | |
58 | * | |
59 | * @throws IOException | |
60 | * @see https://www.ncbi.nlm.nih.gov/Sitemap/samplerecord.html | |
61 | */ | |
62 | 1 | @Override |
63 | public void parse() throws IOException | |
64 | { | |
65 | 1 | String line = nextLine(); |
66 | 28 | while (line != null) |
67 | { | |
68 | 27 | if (line.startsWith("LOCUS")) |
69 | { | |
70 | 1 | line = parseLocus(line); |
71 | } | |
72 | 26 | else if (line.startsWith(DEFINITION)) |
73 | { | |
74 | 1 | line = parseDefinition(line); |
75 | } | |
76 | 25 | else if (line.startsWith("ACCESSION")) |
77 | { | |
78 | 1 | this.accession = line.split(WHITESPACE)[1]; |
79 | 1 | line = nextLine(); |
80 | } | |
81 | 24 | else if (line.startsWith("VERSION")) |
82 | { | |
83 | 1 | line = parseVersion(line); |
84 | } | |
85 | 23 | else if (line.startsWith("ORIGIN")) |
86 | { | |
87 | 1 | line = parseSequence(); |
88 | } | |
89 | 22 | else if (line.startsWith("FEATURES")) |
90 | { | |
91 | 1 | line = nextLine(); |
92 | 19 | while (line.startsWith(" ")) |
93 | { | |
94 | 18 | line = parseFeature(line); |
95 | } | |
96 | } | |
97 | else | |
98 | { | |
99 | 21 | line = nextLine(); |
100 | } | |
101 | } | |
102 | 1 | buildSequence(); |
103 | } | |
104 | ||
105 | /** | |
106 | * Extracts and saves the primary accession and version (SV value) from an ID | |
107 | * line, or null if not found. Returns the next line after the one processed. | |
108 | * | |
109 | * @param line | |
110 | * @throws IOException | |
111 | */ | |
112 | 1 | String parseLocus(String line) throws IOException |
113 | { | |
114 | 1 | String[] tokens = line.split(WHITESPACE); |
115 | ||
116 | /* | |
117 | * first should be "LOCUS" | |
118 | */ | |
119 | 1 | if (tokens.length < 2 || !"LOCUS".equals(tokens[0])) |
120 | { | |
121 | 0 | return nextLine(); |
122 | } | |
123 | /* | |
124 | * second is primary accession | |
125 | */ | |
126 | 1 | String token = tokens[1].trim(); |
127 | 1 | if (!token.isEmpty()) |
128 | { | |
129 | 1 | this.accession = token; |
130 | } | |
131 | ||
132 | // not going to guess the rest just yet, but third is length with unit (bp) | |
133 | ||
134 | 1 | return nextLine(); |
135 | } | |
136 | ||
137 | /** | |
138 | * Reads sequence description from DEFINITION lines. Any trailing period is | |
139 | * discarded. Returns the next line after the definition line(s). | |
140 | * | |
141 | * @param line | |
142 | * @return | |
143 | * @throws IOException | |
144 | */ | |
145 | 1 | String parseDefinition(String line) throws IOException |
146 | { | |
147 | 1 | String desc = line.substring(DEFINITION.length()).trim(); |
148 | 1 | if (desc.endsWith(".")) |
149 | { | |
150 | 1 | desc = desc.substring(0, desc.length() - 1); |
151 | } | |
152 | ||
153 | /* | |
154 | * pass over any additional DE lines | |
155 | */ | |
156 | ? | while ((line = nextLine()) != null) |
157 | { | |
158 | 1 | if (line.startsWith(" ")) |
159 | { | |
160 | // definition continuation line | |
161 | 0 | desc += line.trim(); |
162 | } | |
163 | else | |
164 | { | |
165 | 1 | break; |
166 | } | |
167 | } | |
168 | 1 | this.description = desc; |
169 | ||
170 | 1 | return line; |
171 | } | |
172 | ||
173 | /** | |
174 | * Parses the VERSION line e.g. | |
175 | * | |
176 | * <pre> | |
177 | * VERSION X81322.1 | |
178 | * </pre> | |
179 | * | |
180 | * and returns the next line | |
181 | * | |
182 | * @param line | |
183 | * @throws IOException | |
184 | */ | |
185 | 1 | String parseVersion(String line) throws IOException |
186 | { | |
187 | /* | |
188 | * extract version part of <accession>.<version> | |
189 | * https://www.ncbi.nlm.nih.gov/Sitemap/samplerecord.html#VersionB | |
190 | */ | |
191 | 1 | String[] tokens = line.split(WHITESPACE); |
192 | 1 | if (tokens.length > 1) |
193 | { | |
194 | 1 | tokens = tokens[1].split("\\."); |
195 | 1 | if (tokens.length > 1) |
196 | { | |
197 | 1 | this.version = tokens[1]; |
198 | } | |
199 | } | |
200 | ||
201 | 1 | return nextLine(); |
202 | } | |
203 | ||
204 | 152 | @Override |
205 | protected boolean isFeatureContinuationLine(String line) | |
206 | { | |
207 | 152 | return line.startsWith(" "); // 6 spaces |
208 | } | |
209 | } |