1. Project Clover database Fri Dec 6 2024 13:47:14 GMT
  2. Package jalview.io

File EmblFlatFile.java

 

Coverage histogram

../../img/srcFileCovDistChart9.png
12% of files have more coverage

Code metrics

34
53
6
1
229
122
24
0.45
8.83
6
4

Classes

Class
Line #
Actions
EmblFlatFile 48 53 24
0.892473189.2%
 

Contributing tests

This file is covered by 3 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.io;
22   
23    import java.io.IOException;
24   
25    import jalview.bin.Console;
26    import jalview.datamodel.DBRefEntry;
27    import jalview.util.DBRefUtils;
28   
29    /**
30    * A class that provides selective parsing of the EMBL flatfile format.
31    * <p>
32    * The initial implementation is limited to extracting fields used by Jalview
33    * after fetching an EMBL or EMBLCDS entry:
34    *
35    * <pre>
36    * accession, version, sequence, xref
37    * and (for CDS feature) location, protein_id, product, codon_start, translation
38    * </pre>
39    *
40    * For a complete parser, it may be best to adopt that provided in
41    * https://github.com/enasequence/sequencetools/tree/master/src/main/java/uk/ac/ebi/embl/flatfile
42    * (but note this has a dependency on the Apache Commons library)
43    *
44    * @author gmcarstairs
45    * @see ftp://ftp.ebi.ac.uk/pub/databases/ena/sequence/release/doc/usrman.txt
46    * @see ftp://ftp.ebi.ac.uk/pub/databases/embl/doc/FT_current.html
47    */
 
48    public class EmblFlatFile extends EMBLLikeFlatFile
49    {
50    /**
51    * Constructor given a data source and the id of the source database
52    *
53    * @param fp
54    * @param sourceId
55    * @throws IOException
56    */
 
57  3 toggle public EmblFlatFile(FileParse fp, String sourceId) throws IOException
58    {
59  3 super(fp, sourceId);
60    }
61   
62    /**
63    * Parses the flatfile, and if successful, saves as an annotated sequence
64    * which may be retrieved by calling {@code getSequence()}
65    *
66    * @throws IOException
67    */
 
68  3 toggle @Override
69    public void parse() throws IOException
70    {
71  3 String line = nextLine();
72  121 while (line != null)
73    {
74  118 if (line.startsWith("ID"))
75    {
76  3 line = parseID(line);
77    }
78  115 else if (line.startsWith("DE"))
79    {
80  3 line = parseDE(line);
81    }
82  112 else if (line.startsWith("DR"))
83    {
84  8 line = parseDR(line);
85    }
86  104 else if (line.startsWith("SQ"))
87    {
88  3 line = parseSequence();
89    }
90  101 else if (line.startsWith("FT"))
91    {
92  23 line = parseFeature(line.substring(2));
93    }
94    else
95    {
96  78 line = nextLine();
97    }
98    }
99  3 buildSequence();
100    }
101   
102    /**
103    * Extracts and saves the primary accession and version (SV value) from an ID
104    * line, or null if not found. Returns the next line after the one processed.
105    *
106    * @param line
107    * @throws IOException
108    */
 
109  3 toggle String parseID(String line) throws IOException
110    {
111  3 String[] tokens = line.substring(2).split(";");
112   
113    /*
114    * first is primary accession
115    */
116  3 String token = tokens[0].trim();
117  3 if (!token.isEmpty())
118    {
119  3 this.accession = token;
120    }
121   
122    /*
123    * second token is 'SV versionNo'
124    */
125  3 if (tokens.length > 1)
126    {
127  3 token = tokens[1].trim();
128  3 if (token.startsWith("SV"))
129    {
130  3 String[] bits = token.trim().split(WHITESPACE);
131  3 this.version = bits[bits.length - 1];
132    }
133    }
134   
135    /*
136    * seventh token is 'length BP'
137    */
138  3 if (tokens.length > 6)
139    {
140  3 token = tokens[6].trim();
141  3 String[] bits = token.trim().split(WHITESPACE);
142  3 try
143    {
144  3 this.length = Integer.valueOf(bits[0]);
145    } catch (NumberFormatException e)
146    {
147  0 Console.error("bad length read in flatfile, line: " + line);
148    }
149    }
150   
151  3 return nextLine();
152    }
153   
154    /**
155    * Reads sequence description from the first DE line found. Any trailing
156    * period is discarded. If there are multiple DE lines, only the first (short
157    * description) is read, the rest are ignored.
158    *
159    * @param line
160    * @return
161    * @throws IOException
162    */
 
163  3 toggle String parseDE(String line) throws IOException
164    {
165  3 String desc = line.substring(2).trim();
166  3 if (desc.endsWith("."))
167    {
168  2 desc = desc.substring(0, desc.length() - 1);
169    }
170  3 this.description = desc;
171   
172    /*
173    * pass over any additional DE lines
174    */
175  ? while ((line = nextLine()) != null)
176    {
177  3 if (!line.startsWith("DE"))
178    {
179  3 break;
180    }
181    }
182   
183  3 return line;
184    }
185   
186    /**
187    * Processes one DR line and saves as a DBRefEntry cross-reference. Returns
188    * the line following the line processed.
189    *
190    * @param line
191    * @throws IOException
192    */
 
193  8 toggle String parseDR(String line) throws IOException
194    {
195  8 String[] tokens = line.substring(2).split(";");
196  8 if (tokens.length > 1)
197    {
198    /*
199    * ensure UniProtKB/Swiss-Prot converted to UNIPROT
200    */
201  8 String db = tokens[0].trim();
202  8 db = DBRefUtils.getCanonicalName(db);
203  8 String acc = tokens[1].trim();
204  8 if (acc.endsWith("."))
205    {
206  4 acc = acc.substring(0, acc.length() - 1);
207    }
208  8 String version = "0";
209  8 if (tokens.length > 2)
210    {
211  4 String secondaryId = tokens[2].trim();
212  4 if (!secondaryId.isEmpty())
213    {
214    // todo: is this right? secondary id is not a version number
215    // version = secondaryId;
216    }
217    }
218  8 this.dbrefs.add(new DBRefEntry(db, version, acc));
219    }
220   
221  8 return nextLine();
222    }
223   
 
224  447 toggle @Override
225    protected boolean isFeatureContinuationLine(String line)
226    {
227  447 return line.startsWith("FT "); // 4 spaces
228    }
229    }