Clover icon

Coverage Report

  1. Project Clover database Mon Nov 11 2024 20:42:03 GMT
  2. Package jalview.analysis

File GeneticCodes.java

 

Coverage histogram

../../img/srcFileCovDistChart9.png
12% of files have more coverage

Code metrics

46
100
15
1
447
257
47
0.47
6.67
15
3.13

Classes

Class Line # Actions
GeneticCodes 41 100 47
0.8944099589.4%
 

Contributing tests

This file is covered by 204 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.analysis;
22   
23    import java.util.Locale;
24    import java.io.BufferedReader;
25    import java.io.IOException;
26    import java.io.InputStream;
27    import java.io.InputStreamReader;
28    import java.util.HashMap;
29    import java.util.LinkedHashMap;
30    import java.util.Map;
31    import java.util.StringTokenizer;
32   
33    import jalview.bin.Console;
34   
35    /**
36    * A singleton that provides instances of genetic code translation tables
37    *
38    * @author gmcarstairs
39    * @see https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi
40    */
 
41    public final class GeneticCodes
42    {
43    private static final int CODON_LENGTH = 3;
44   
45    private static final String QUOTE = "\"";
46   
47    /*
48    * nucleotides as ordered in data file
49    */
50    private static final String NUCS = "TCAG";
51   
52    private static final int NUCS_COUNT = NUCS.length();
53   
54    private static final int NUCS_COUNT_SQUARED = NUCS_COUNT * NUCS_COUNT;
55   
56    private static final int NUCS_COUNT_CUBED = NUCS_COUNT * NUCS_COUNT
57    * NUCS_COUNT;
58   
59    private static final String AMBIGUITY_CODES_FILE = "/AmbiguityCodes.dat";
60   
61    private static final String RESOURCE_FILE = "/GeneticCodes.dat";
62   
63    private static GeneticCodes instance = new GeneticCodes();
64   
65    private Map<String, String> ambiguityCodes;
66   
67    /*
68    * loaded code tables, with keys in order of loading
69    */
70    private Map<String, GeneticCodeI> codeTables;
71   
72    /**
73    * Private constructor enforces singleton
74    */
 
75  50 toggle private GeneticCodes()
76    {
77  50 if (instance == null)
78    {
79  50 ambiguityCodes = new HashMap<>();
80   
81    /*
82    * LinkedHashMap preserves order of addition of entries,
83    * so we can assume the Standard Code Table is the first
84    */
85  50 codeTables = new LinkedHashMap<>();
86  50 loadAmbiguityCodes(AMBIGUITY_CODES_FILE);
87  50 loadCodes(RESOURCE_FILE);
88    }
89    }
90   
91    /**
92    * Returns the singleton instance of this class
93    *
94    * @return
95    */
 
96  756 toggle public static GeneticCodes getInstance()
97    {
98  756 return instance;
99    }
100   
101    /**
102    * Returns the known code tables, in order of loading.
103    *
104    * @return
105    */
 
106  453 toggle public Iterable<GeneticCodeI> getCodeTables()
107    {
108  453 return codeTables.values();
109    }
110   
111    /**
112    * Answers the code table with the given id
113    *
114    * @param id
115    * @return
116    */
 
117  14 toggle public GeneticCodeI getCodeTable(String id)
118    {
119  14 return codeTables.get(id);
120    }
121   
122    /**
123    * A convenience method that returns the standard code table (table 1). As
124    * implemented, this has to be the first table defined in the data file.
125    *
126    * @return
127    */
 
128  299 toggle public GeneticCodeI getStandardCodeTable()
129    {
130  299 return codeTables.values().iterator().next();
131    }
132   
133    /**
134    * Loads the code tables from a data file
135    */
 
136  50 toggle protected void loadCodes(String fileName)
137    {
138  50 try
139    {
140  50 InputStream is = getClass().getResourceAsStream(fileName);
141  50 if (is == null)
142    {
143  0 jalview.bin.Console
144    .errPrintln("Resource file not found: " + fileName);
145  0 return;
146    }
147  50 BufferedReader dataIn = new BufferedReader(new InputStreamReader(is));
148   
149    /*
150    * skip comments and start of table
151    */
152  50 String line = "";
153  4700 while (line != null && !line.startsWith("Genetic-code-table"))
154    {
155  4650 line = readLine(dataIn);
156    }
157  50 line = readLine(dataIn);
158   
159  1300 while (line.startsWith("{"))
160    {
161  1250 line = loadOneTable(dataIn);
162    }
163    } catch (IOException | NullPointerException e)
164    {
165  0 Console.error("Error reading genetic codes data file " + fileName
166    + ": " + e.getMessage());
167    }
168  50 if (codeTables.isEmpty())
169    {
170  0 jalview.bin.Console.errPrintln(
171    "No genetic code tables loaded, check format of file "
172    + fileName);
173    }
174    }
175   
176    /**
177    * Reads and saves Nucleotide ambiguity codes from a data file. The file may
178    * include comment lines (starting with #), a header 'DNA', and one line per
179    * ambiguity code, for example:
180    * <p>
181    * R&lt;tab&gt;AG
182    * <p>
183    * means that R is an ambiguity code meaning "A or G"
184    *
185    * @param fileName
186    */
 
187  50 toggle protected void loadAmbiguityCodes(String fileName)
188    {
189  50 try
190    {
191  50 InputStream is = getClass().getResourceAsStream(fileName);
192  50 if (is == null)
193    {
194  0 jalview.bin.Console
195    .errPrintln("Resource file not found: " + fileName);
196  0 return;
197    }
198  50 BufferedReader dataIn = new BufferedReader(new InputStreamReader(is));
199  50 String line = "";
200  700 while (line != null)
201    {
202  650 line = readLine(dataIn);
203  650 if (line != null && !"DNA".equals(line.toUpperCase(Locale.ROOT)))
204    {
205  550 String[] tokens = line.split("\\t");
206  550 if (tokens.length == 2)
207    {
208  550 ambiguityCodes.put(tokens[0].toUpperCase(Locale.ROOT),
209    tokens[1].toUpperCase(Locale.ROOT));
210    }
211    else
212    {
213  0 jalview.bin.Console.errPrintln(
214    "Unexpected data in " + fileName + ": " + line);
215    }
216    }
217    }
218    } catch (IOException e)
219    {
220  0 Console.error("Error reading nucleotide ambiguity codes data file: "
221    + e.getMessage());
222    }
223    }
224   
225    /**
226    * Reads up to and returns the next non-comment line, trimmed. Comment lines
227    * start with a #. Returns null at end of file.
228    *
229    * @param dataIn
230    * @return
231    * @throws IOException
232    */
 
233  17050 toggle protected String readLine(BufferedReader dataIn) throws IOException
234    {
235  17050 String line = dataIn.readLine();
236  17100 while (line != null && line.startsWith("#"))
237    {
238  50 line = readLine(dataIn);
239    }
240  17050 return line == null ? null : line.trim();
241    }
242   
243    /**
244    * Reads the lines of the data file describing one translation table, and
245    * creates and stores an instance of GeneticCodeI. Returns the '{' line
246    * starting the next table, or the '}' line at end of all tables. Data format
247    * is
248    *
249    * <pre>
250    * {
251    * name "Vertebrate Mitochondrial" ,
252    * name "SGC1" ,
253    * id 2 ,
254    * ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG",
255    * sncbieaa "----------**--------------------MMMM----------**---M------------"
256    * -- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
257    * -- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
258    * -- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
259    * },
260    * </pre>
261    *
262    * of which we parse the first name, the id, and the ncbieaa translations for
263    * codons as ordered by the Base1/2/3 lines. Note Base1/2/3 are included for
264    * readability and are in a fixed order, these are not parsed. The sncbieaa
265    * line marks alternative start codons, these are not parsed.
266    *
267    * @param dataIn
268    * @return
269    * @throws IOException
270    */
 
271  1250 toggle protected String loadOneTable(BufferedReader dataIn) throws IOException
272    {
273  1250 String name = null;
274  1250 String id = null;
275  1250 Map<String, String> codons = new HashMap<>();
276   
277  1250 String line = readLine(dataIn);
278   
279  10400 while (line != null && !line.startsWith("}"))
280    {
281  9150 if (line.startsWith("name") && name == null)
282    {
283  1250 name = line.substring(line.indexOf(QUOTE) + 1,
284    line.lastIndexOf(QUOTE));
285    }
286  7900 else if (line.startsWith("id"))
287    {
288  1250 id = new StringTokenizer(line.substring(2)).nextToken();
289    }
290  6650 else if (line.startsWith("ncbieaa"))
291    {
292  1250 String aminos = line.substring(line.indexOf(QUOTE) + 1,
293    line.lastIndexOf(QUOTE));
294  1250 if (aminos.length() != NUCS_COUNT_CUBED) // 4 * 4 * 4 combinations
295    {
296  0 Console.error("wrong data length in code table: " + line);
297    }
298    else
299    {
300  81250 for (int i = 0; i < aminos.length(); i++)
301    {
302  80000 String peptide = String.valueOf(aminos.charAt(i));
303  80000 char codon1 = NUCS.charAt(i / NUCS_COUNT_SQUARED);
304  80000 char codon2 = NUCS
305    .charAt((i % NUCS_COUNT_SQUARED) / NUCS_COUNT);
306  80000 char codon3 = NUCS.charAt(i % NUCS_COUNT);
307  80000 String codon = new String(
308    new char[]
309    { codon1, codon2, codon3 });
310  80000 codons.put(codon, peptide);
311    }
312    }
313    }
314  9150 line = readLine(dataIn);
315    }
316   
317  1250 registerCodeTable(id, name, codons);
318  1250 return readLine(dataIn);
319    }
320   
321    /**
322    * Constructs and registers a GeneticCodeI instance with the codon
323    * translations as defined in the data file. For all instances except the
324    * first, any undeclared translations default to those in the standard code
325    * table.
326    *
327    * @param id
328    * @param name
329    * @param codons
330    */
 
331  1250 toggle protected void registerCodeTable(final String id, final String name,
332    final Map<String, String> codons)
333    {
334  1250 codeTables.put(id, new GeneticCodeI()
335    {
336    /*
337    * map of ambiguous codons to their 'product'
338    * (null if not all possible translations match)
339    */
340    Map<String, String> ambiguous = new HashMap<>();
341   
 
342  2626 toggle @Override
343    public String translateCanonical(String codon)
344    {
345  2626 return codons.get(codon.toUpperCase(Locale.ROOT));
346    }
347   
 
348  2620 toggle @Override
349    public String translate(String codon)
350    {
351  2620 String upper = codon.toUpperCase(Locale.ROOT);
352  2620 String peptide = translateCanonical(upper);
353   
354    /*
355    * if still not translated, check for ambiguity codes
356    */
357  2620 if (peptide == null)
358    {
359  128 peptide = getAmbiguousTranslation(upper, ambiguous, this);
360    }
361  2620 return peptide;
362    }
363   
 
364  11327 toggle @Override
365    public String getId()
366    {
367  11327 return id;
368    }
369   
 
370  11304 toggle @Override
371    public String getName()
372    {
373  11304 return name;
374    }
375    });
376    }
377   
378    /**
379    * Computes all possible translations of a codon including one or more
380    * ambiguity codes, and stores and returns the result (null if not all
381    * translations match). If the codon includes no ambiguity codes, simply
382    * returns null.
383    *
384    * @param codon
385    * @param ambiguous
386    * @param codeTable
387    * @return
388    */
 
389  128 toggle protected String getAmbiguousTranslation(String codon,
390    Map<String, String> ambiguous, GeneticCodeI codeTable)
391    {
392  128 if (codon.length() != CODON_LENGTH)
393    {
394  0 return null;
395    }
396   
397  128 boolean isAmbiguous = false;
398   
399  128 char[][] expanded = new char[CODON_LENGTH][];
400  512 for (int i = 0; i < CODON_LENGTH; i++)
401    {
402  384 String base = String.valueOf(codon.charAt(i));
403  384 if (ambiguityCodes.containsKey(base))
404    {
405  149 isAmbiguous = true;
406  149 base = ambiguityCodes.get(base);
407    }
408  384 expanded[i] = base.toCharArray();
409    }
410   
411  128 if (!isAmbiguous)
412    {
413    // no ambiguity code involved here
414  1 return null;
415    }
416   
417    /*
418    * generate and translate all permutations of the ambiguous codon
419    * only return the translation if they all agree, else null
420    */
421  127 String peptide = null;
422  127 for (char c1 : expanded[0])
423    {
424  138 for (char c2 : expanded[1])
425    {
426  146 for (char c3 : expanded[2])
427    {
428  321 char[] cdn = new char[] { c1, c2, c3 };
429  321 String possibleCodon = String.valueOf(cdn);
430  321 String pep = codeTable.translate(possibleCodon);
431  321 if (pep == null || (peptide != null && !pep.equals(peptide)))
432    {
433  41 ambiguous.put(codon, null);
434  41 return null;
435    }
436  280 peptide = pep;
437    }
438    }
439    }
440   
441    /*
442    * all translations of ambiguous codons matched!
443    */
444  86 ambiguous.put(codon, peptide);
445  86 return peptide;
446    }
447    }