Clover icon

Coverage Report

  1. Project Clover database Thu Nov 7 2024 10:11:34 GMT
  2. Package jalview.io

File GenBankFile.java

 

Coverage histogram

../../img/srcFileCovDistChart9.png
12% of files have more coverage

Code metrics

30
43
6
1
209
105
22
0.51
7.17
6
3.67

Classes

Class Line # Actions
GenBankFile 39 43 22
0.8734177487.3%
 

Contributing tests

This file is covered by 1 test. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.io;
22   
23    import java.io.IOException;
24   
25    /**
26    * A class that provides selective parsing of the GenBank flatfile format.
27    * <p>
28    * The initial implementation is limited to extracting fields used by Jalview
29    * after fetching an EMBL or EMBLCDS entry:
30    *
31    * <pre>
32    * accession, version, sequence, xref
33    * and (for CDS feature) location, protein_id, product, codon_start, translation
34    * </pre>
35    *
36    * @author gmcarstairs
37    * @see https://www.ncbi.nlm.nih.gov/Sitemap/samplerecord.html
38    */
 
39    public class GenBankFile extends EMBLLikeFlatFile
40    {
41    private static final String DEFINITION = "DEFINITION";
42   
43    /**
44    * Constructor given a data source and the id of the source database
45    *
46    * @param fp
47    * @param sourceId
48    * @throws IOException
49    */
 
50  1 toggle public GenBankFile(FileParse fp, String sourceId) throws IOException
51    {
52  1 super(fp, sourceId);
53    }
54   
55    /**
56    * Parses the flatfile, and if successful, saves as an annotated sequence
57    * which may be retrieved by calling {@code getSequence()}
58    *
59    * @throws IOException
60    * @see https://www.ncbi.nlm.nih.gov/Sitemap/samplerecord.html
61    */
 
62  1 toggle @Override
63    public void parse() throws IOException
64    {
65  1 String line = nextLine();
66  28 while (line != null)
67    {
68  27 if (line.startsWith("LOCUS"))
69    {
70  1 line = parseLocus(line);
71    }
72  26 else if (line.startsWith(DEFINITION))
73    {
74  1 line = parseDefinition(line);
75    }
76  25 else if (line.startsWith("ACCESSION"))
77    {
78  1 this.accession = line.split(WHITESPACE)[1];
79  1 line = nextLine();
80    }
81  24 else if (line.startsWith("VERSION"))
82    {
83  1 line = parseVersion(line);
84    }
85  23 else if (line.startsWith("ORIGIN"))
86    {
87  1 line = parseSequence();
88    }
89  22 else if (line.startsWith("FEATURES"))
90    {
91  1 line = nextLine();
92  19 while (line.startsWith(" "))
93    {
94  18 line = parseFeature(line);
95    }
96    }
97    else
98    {
99  21 line = nextLine();
100    }
101    }
102  1 buildSequence();
103    }
104   
105    /**
106    * Extracts and saves the primary accession and version (SV value) from an ID
107    * line, or null if not found. Returns the next line after the one processed.
108    *
109    * @param line
110    * @throws IOException
111    */
 
112  1 toggle String parseLocus(String line) throws IOException
113    {
114  1 String[] tokens = line.split(WHITESPACE);
115   
116    /*
117    * first should be "LOCUS"
118    */
119  1 if (tokens.length < 2 || !"LOCUS".equals(tokens[0]))
120    {
121  0 return nextLine();
122    }
123    /*
124    * second is primary accession
125    */
126  1 String token = tokens[1].trim();
127  1 if (!token.isEmpty())
128    {
129  1 this.accession = token;
130    }
131   
132    // not going to guess the rest just yet, but third is length with unit (bp)
133   
134  1 return nextLine();
135    }
136   
137    /**
138    * Reads sequence description from DEFINITION lines. Any trailing period is
139    * discarded. Returns the next line after the definition line(s).
140    *
141    * @param line
142    * @return
143    * @throws IOException
144    */
 
145  1 toggle String parseDefinition(String line) throws IOException
146    {
147  1 String desc = line.substring(DEFINITION.length()).trim();
148  1 if (desc.endsWith("."))
149    {
150  1 desc = desc.substring(0, desc.length() - 1);
151    }
152   
153    /*
154    * pass over any additional DE lines
155    */
156  ? while ((line = nextLine()) != null)
157    {
158  1 if (line.startsWith(" "))
159    {
160    // definition continuation line
161  0 desc += line.trim();
162    }
163    else
164    {
165  1 break;
166    }
167    }
168  1 this.description = desc;
169   
170  1 return line;
171    }
172   
173    /**
174    * Parses the VERSION line e.g.
175    *
176    * <pre>
177    * VERSION X81322.1
178    * </pre>
179    *
180    * and returns the next line
181    *
182    * @param line
183    * @throws IOException
184    */
 
185  1 toggle String parseVersion(String line) throws IOException
186    {
187    /*
188    * extract version part of <accession>.<version>
189    * https://www.ncbi.nlm.nih.gov/Sitemap/samplerecord.html#VersionB
190    */
191  1 String[] tokens = line.split(WHITESPACE);
192  1 if (tokens.length > 1)
193    {
194  1 tokens = tokens[1].split("\\.");
195  1 if (tokens.length > 1)
196    {
197  1 this.version = tokens[1];
198    }
199    }
200   
201  1 return nextLine();
202    }
203   
 
204  152 toggle @Override
205    protected boolean isFeatureContinuationLine(String line)
206    {
207  152 return line.startsWith(" "); // 6 spaces
208    }
209    }