Clover icon

Coverage Report

  1. Project Clover database Thu Aug 13 2020 12:04:21 BST
  2. Package jalview.analysis

File GeneticCodes.java

 

Coverage histogram

../../img/srcFileCovDistChart9.png
12% of files have more coverage

Code metrics

46
100
15
1
446
256
47
0.47
6.67
15
3.13

Classes

Class Line # Actions
GeneticCodes 40 100 47
0.8944099589.4%
 

Contributing tests

This file is covered by 118 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.analysis;
22   
23    import jalview.bin.Cache;
24   
25    import java.io.BufferedReader;
26    import java.io.IOException;
27    import java.io.InputStream;
28    import java.io.InputStreamReader;
29    import java.util.HashMap;
30    import java.util.LinkedHashMap;
31    import java.util.Map;
32    import java.util.StringTokenizer;
33   
34    /**
35    * A singleton that provides instances of genetic code translation tables
36    *
37    * @author gmcarstairs
38    * @see https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi
39    */
 
40    public final class GeneticCodes
41    {
42    private static final int CODON_LENGTH = 3;
43   
44    private static final String QUOTE = "\"";
45   
46    /*
47    * nucleotides as ordered in data file
48    */
49    private static final String NUCS = "TCAG";
50   
51    private static final int NUCS_COUNT = NUCS.length();
52   
53    private static final int NUCS_COUNT_SQUARED = NUCS_COUNT * NUCS_COUNT;
54   
55    private static final int NUCS_COUNT_CUBED = NUCS_COUNT * NUCS_COUNT
56    * NUCS_COUNT;
57   
58    private static final String AMBIGUITY_CODES_FILE = "/AmbiguityCodes.dat";
59   
60    private static final String RESOURCE_FILE = "/GeneticCodes.dat";
61   
62    private static GeneticCodes instance = new GeneticCodes();
63   
64    private Map<String, String> ambiguityCodes;
65   
66    /*
67    * loaded code tables, with keys in order of loading
68    */
69    private Map<String, GeneticCodeI> codeTables;
70   
71    /**
72    * Private constructor enforces singleton
73    */
 
74  18 toggle private GeneticCodes()
75    {
76  18 if (instance == null)
77    {
78  18 ambiguityCodes = new HashMap<>();
79   
80    /*
81    * LinkedHashMap preserves order of addition of entries,
82    * so we can assume the Standard Code Table is the first
83    */
84  18 codeTables = new LinkedHashMap<>();
85  18 loadAmbiguityCodes(AMBIGUITY_CODES_FILE);
86  18 loadCodes(RESOURCE_FILE);
87    }
88    }
89   
90    /**
91    * Returns the singleton instance of this class
92    *
93    * @return
94    */
 
95  535 toggle public static GeneticCodes getInstance()
96    {
97  535 return instance;
98    }
99   
100    /**
101    * Returns the known code tables, in order of loading.
102    *
103    * @return
104    */
 
105  242 toggle public Iterable<GeneticCodeI> getCodeTables()
106    {
107  242 return codeTables.values();
108    }
109   
110    /**
111    * Answers the code table with the given id
112    *
113    * @param id
114    * @return
115    */
 
116  14 toggle public GeneticCodeI getCodeTable(String id)
117    {
118  14 return codeTables.get(id);
119    }
120   
121    /**
122    * A convenience method that returns the standard code table (table 1). As
123    * implemented, this has to be the first table defined in the data file.
124    *
125    * @return
126    */
 
127  289 toggle public GeneticCodeI getStandardCodeTable()
128    {
129  289 return codeTables.values().iterator().next();
130    }
131   
132    /**
133    * Loads the code tables from a data file
134    */
 
135  18 toggle protected void loadCodes(String fileName)
136    {
137  18 try
138    {
139  18 InputStream is = getClass().getResourceAsStream(fileName);
140  18 if (is == null)
141    {
142  0 System.err.println("Resource file not found: " + fileName);
143  0 return;
144    }
145  18 BufferedReader dataIn = new BufferedReader(new InputStreamReader(is));
146   
147    /*
148    * skip comments and start of table
149    */
150  18 String line = "";
151  1692 while (line != null && !line.startsWith("Genetic-code-table"))
152    {
153  1674 line = readLine(dataIn);
154    }
155  18 line = readLine(dataIn);
156   
157  468 while (line.startsWith("{"))
158    {
159  450 line = loadOneTable(dataIn);
160    }
161    } catch (IOException | NullPointerException e)
162    {
163  0 Cache.log.error(
164    "Error reading genetic codes data file " + fileName + ": "
165    + e.getMessage());
166    }
167  18 if (codeTables.isEmpty())
168    {
169  0 System.err.println(
170    "No genetic code tables loaded, check format of file "
171    + fileName);
172    }
173    }
174   
175    /**
176    * Reads and saves Nucleotide ambiguity codes from a data file. The file may
177    * include comment lines (starting with #), a header 'DNA', and one line per
178    * ambiguity code, for example:
179    * <p>
180    * R&lt;tab&gt;AG
181    * <p>
182    * means that R is an ambiguity code meaning "A or G"
183    *
184    * @param fileName
185    */
 
186  18 toggle protected void loadAmbiguityCodes(String fileName)
187    {
188  18 try
189    {
190  18 InputStream is = getClass().getResourceAsStream(fileName);
191  18 if (is == null)
192    {
193  0 System.err.println("Resource file not found: " + fileName);
194  0 return;
195    }
196  18 BufferedReader dataIn = new BufferedReader(new InputStreamReader(is));
197  18 String line = "";
198  252 while (line != null)
199    {
200  234 line = readLine(dataIn);
201  234 if (line != null && !"DNA".equals(line.toUpperCase()))
202    {
203  198 String[] tokens = line.split("\\t");
204  198 if (tokens.length == 2)
205    {
206  198 ambiguityCodes.put(tokens[0].toUpperCase(),
207    tokens[1].toUpperCase());
208    }
209    else
210    {
211  0 System.err.println(
212    "Unexpected data in " + fileName + ": " + line);
213    }
214    }
215    }
216    } catch (IOException e)
217    {
218  0 Cache.log.error(
219    "Error reading nucleotide ambiguity codes data file: "
220    + e.getMessage());
221    }
222    }
223   
224    /**
225    * Reads up to and returns the next non-comment line, trimmed. Comment lines
226    * start with a #. Returns null at end of file.
227    *
228    * @param dataIn
229    * @return
230    * @throws IOException
231    */
 
232  6138 toggle protected String readLine(BufferedReader dataIn) throws IOException
233    {
234  6138 String line = dataIn.readLine();
235  6156 while (line != null && line.startsWith("#"))
236    {
237  18 line = readLine(dataIn);
238    }
239  6138 return line == null ? null : line.trim();
240    }
241   
242    /**
243    * Reads the lines of the data file describing one translation table, and
244    * creates and stores an instance of GeneticCodeI. Returns the '{' line
245    * starting the next table, or the '}' line at end of all tables. Data format
246    * is
247    *
248    * <pre>
249    * {
250    * name "Vertebrate Mitochondrial" ,
251    * name "SGC1" ,
252    * id 2 ,
253    * ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG",
254    * sncbieaa "----------**--------------------MMMM----------**---M------------"
255    * -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
256    * -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
257    * -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
258    * },
259    * </pre>
260    *
261    * of which we parse the first name, the id, and the ncbieaa translations for
262    * codons as ordered by the Base1/2/3 lines. Note Base1/2/3 are included for
263    * readability and are in a fixed order, these are not parsed. The sncbieaa
264    * line marks alternative start codons, these are not parsed.
265    *
266    * @param dataIn
267    * @return
268    * @throws IOException
269    */
 
270  450 toggle protected String loadOneTable(BufferedReader dataIn) throws IOException
271    {
272  450 String name = null;
273  450 String id = null;
274  450 Map<String, String> codons = new HashMap<>();
275   
276  450 String line = readLine(dataIn);
277   
278  3744 while (line != null && !line.startsWith("}"))
279    {
280  3294 if (line.startsWith("name") && name == null)
281    {
282  450 name = line.substring(line.indexOf(QUOTE) + 1,
283    line.lastIndexOf(QUOTE));
284    }
285  2844 else if (line.startsWith("id"))
286    {
287  450 id = new StringTokenizer(line.substring(2)).nextToken();
288    }
289  2394 else if (line.startsWith("ncbieaa"))
290    {
291  450 String aminos = line.substring(line.indexOf(QUOTE) + 1,
292    line.lastIndexOf(QUOTE));
293  450 if (aminos.length() != NUCS_COUNT_CUBED) // 4 * 4 * 4 combinations
294    {
295  0 Cache.log.error("wrong data length in code table: " + line);
296    }
297    else
298    {
299  29250 for (int i = 0; i < aminos.length(); i++)
300    {
301  28800 String peptide = String.valueOf(aminos.charAt(i));
302  28800 char codon1 = NUCS.charAt(i / NUCS_COUNT_SQUARED);
303  28800 char codon2 = NUCS
304    .charAt((i % NUCS_COUNT_SQUARED) / NUCS_COUNT);
305  28800 char codon3 = NUCS.charAt(i % NUCS_COUNT);
306  28800 String codon = new String(
307    new char[]
308    { codon1, codon2, codon3 });
309  28800 codons.put(codon, peptide);
310    }
311    }
312    }
313  3294 line = readLine(dataIn);
314    }
315   
316  450 registerCodeTable(id, name, codons);
317  450 return readLine(dataIn);
318    }
319   
320    /**
321    * Constructs and registers a GeneticCodeI instance with the codon
322    * translations as defined in the data file. For all instances except the
323    * first, any undeclared translations default to those in the standard code
324    * table.
325    *
326    * @param id
327    * @param name
328    * @param codons
329    */
 
330  450 toggle protected void registerCodeTable(final String id, final String name,
331    final Map<String, String> codons)
332    {
333  450 codeTables.put(id, new GeneticCodeI()
334    {
335    /*
336    * map of ambiguous codons to their 'product'
337    * (null if not all possible translations match)
338    */
339    Map<String, String> ambiguous = new HashMap<>();
340   
 
341  2616 toggle @Override
342    public String translateCanonical(String codon)
343    {
344  2616 return codons.get(codon.toUpperCase());
345    }
346   
 
347  2610 toggle @Override
348    public String translate(String codon)
349    {
350  2610 String upper = codon.toUpperCase();
351  2610 String peptide = translateCanonical(upper);
352   
353    /*
354    * if still not translated, check for ambiguity codes
355    */
356  2610 if (peptide == null)
357    {
358  128 peptide = getAmbiguousTranslation(upper, ambiguous, this);
359    }
360  2610 return peptide;
361    }
362   
 
363  6052 toggle @Override
364    public String getId()
365    {
366  6052 return id;
367    }
368   
 
369  6029 toggle @Override
370    public String getName()
371    {
372  6029 return name;
373    }
374    });
375    }
376   
377    /**
378    * Computes all possible translations of a codon including one or more
379    * ambiguity codes, and stores and returns the result (null if not all
380    * translations match). If the codon includes no ambiguity codes, simply
381    * returns null.
382    *
383    * @param codon
384    * @param ambiguous
385    * @param codeTable
386    * @return
387    */
 
388  128 toggle protected String getAmbiguousTranslation(String codon,
389    Map<String, String> ambiguous, GeneticCodeI codeTable)
390    {
391  128 if (codon.length() != CODON_LENGTH)
392    {
393  0 return null;
394    }
395   
396  128 boolean isAmbiguous = false;
397   
398  128 char[][] expanded = new char[CODON_LENGTH][];
399  512 for (int i = 0; i < CODON_LENGTH; i++)
400    {
401  384 String base = String.valueOf(codon.charAt(i));
402  384 if (ambiguityCodes.containsKey(base))
403    {
404  149 isAmbiguous = true;
405  149 base = ambiguityCodes.get(base);
406    }
407  384 expanded[i] = base.toCharArray();
408    }
409   
410  128 if (!isAmbiguous)
411    {
412    // no ambiguity code involved here
413  1 return null;
414    }
415   
416    /*
417    * generate and translate all permutations of the ambiguous codon
418    * only return the translation if they all agree, else null
419    */
420  127 String peptide = null;
421  127 for (char c1 : expanded[0])
422    {
423  138 for (char c2 : expanded[1])
424    {
425  146 for (char c3 : expanded[2])
426    {
427  321 char[] cdn = new char[] { c1, c2, c3 };
428  321 String possibleCodon = String.valueOf(cdn);
429  321 String pep = codeTable.translate(possibleCodon);
430  321 if (pep == null || (peptide != null && !pep.equals(peptide)))
431    {
432  41 ambiguous.put(codon, null);
433  41 return null;
434    }
435  280 peptide = pep;
436    }
437    }
438    }
439   
440    /*
441    * all translations of ambiguous codons matched!
442    */
443  86 ambiguous.put(codon, peptide);
444  86 return peptide;
445    }
446    }