1 |
|
|
2 |
|
|
3 |
|
|
4 |
|
|
5 |
|
|
6 |
|
|
7 |
|
|
8 |
|
|
9 |
|
|
10 |
|
|
11 |
|
|
12 |
|
|
13 |
|
|
14 |
|
|
15 |
|
|
16 |
|
|
17 |
|
|
18 |
|
|
19 |
|
|
20 |
|
|
21 |
|
package jalview.analysis; |
22 |
|
|
23 |
|
import java.util.Locale; |
24 |
|
import java.io.BufferedReader; |
25 |
|
import java.io.IOException; |
26 |
|
import java.io.InputStream; |
27 |
|
import java.io.InputStreamReader; |
28 |
|
import java.util.HashMap; |
29 |
|
import java.util.LinkedHashMap; |
30 |
|
import java.util.Map; |
31 |
|
import java.util.StringTokenizer; |
32 |
|
|
33 |
|
import jalview.bin.Console; |
34 |
|
|
35 |
|
|
36 |
|
|
37 |
|
|
38 |
|
@author |
39 |
|
@see |
40 |
|
|
|
|
| 89.4% |
Uncovered Elements: 17 (161) |
Complexity: 47 |
Complexity Density: 0.47 |
|
41 |
|
public final class GeneticCodes |
42 |
|
{ |
43 |
|
private static final int CODON_LENGTH = 3; |
44 |
|
|
45 |
|
private static final String QUOTE = "\""; |
46 |
|
|
47 |
|
|
48 |
|
|
49 |
|
|
50 |
|
private static final String NUCS = "TCAG"; |
51 |
|
|
52 |
|
private static final int NUCS_COUNT = NUCS.length(); |
53 |
|
|
54 |
|
private static final int NUCS_COUNT_SQUARED = NUCS_COUNT * NUCS_COUNT; |
55 |
|
|
56 |
|
private static final int NUCS_COUNT_CUBED = NUCS_COUNT * NUCS_COUNT |
57 |
|
* NUCS_COUNT; |
58 |
|
|
59 |
|
private static final String AMBIGUITY_CODES_FILE = "/AmbiguityCodes.dat"; |
60 |
|
|
61 |
|
private static final String RESOURCE_FILE = "/GeneticCodes.dat"; |
62 |
|
|
63 |
|
private static GeneticCodes instance = new GeneticCodes(); |
64 |
|
|
65 |
|
private Map<String, String> ambiguityCodes; |
66 |
|
|
67 |
|
|
68 |
|
|
69 |
|
|
70 |
|
private Map<String, GeneticCodeI> codeTables; |
71 |
|
|
72 |
|
|
73 |
|
|
74 |
|
|
|
|
| 85.7% |
Uncovered Elements: 1 (7) |
Complexity: 2 |
Complexity Density: 0.4 |
|
75 |
1 |
private GeneticCodes()... |
76 |
|
{ |
77 |
1 |
if (instance == null) |
78 |
|
{ |
79 |
1 |
ambiguityCodes = new HashMap<>(); |
80 |
|
|
81 |
|
|
82 |
|
|
83 |
|
|
84 |
|
|
85 |
1 |
codeTables = new LinkedHashMap<>(); |
86 |
1 |
loadAmbiguityCodes(AMBIGUITY_CODES_FILE); |
87 |
1 |
loadCodes(RESOURCE_FILE); |
88 |
|
} |
89 |
|
} |
90 |
|
|
91 |
|
|
92 |
|
|
93 |
|
|
94 |
|
@return |
95 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
96 |
394 |
public static GeneticCodes getInstance()... |
97 |
|
{ |
98 |
394 |
return instance; |
99 |
|
} |
100 |
|
|
101 |
|
|
102 |
|
|
103 |
|
|
104 |
|
@return |
105 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
106 |
212 |
public Iterable<GeneticCodeI> getCodeTables()... |
107 |
|
{ |
108 |
212 |
return codeTables.values(); |
109 |
|
} |
110 |
|
|
111 |
|
|
112 |
|
|
113 |
|
|
114 |
|
@param |
115 |
|
@return |
116 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
117 |
14 |
public GeneticCodeI getCodeTable(String id)... |
118 |
|
{ |
119 |
14 |
return codeTables.get(id); |
120 |
|
} |
121 |
|
|
122 |
|
|
123 |
|
|
124 |
|
|
125 |
|
|
126 |
|
@return |
127 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
128 |
178 |
public GeneticCodeI getStandardCodeTable()... |
129 |
|
{ |
130 |
178 |
return codeTables.values().iterator().next(); |
131 |
|
} |
132 |
|
|
133 |
|
|
134 |
|
|
135 |
|
|
|
|
| 73.9% |
Uncovered Elements: 6 (23) |
Complexity: 7 |
Complexity Density: 0.47 |
|
136 |
1 |
protected void loadCodes(String fileName)... |
137 |
|
{ |
138 |
1 |
try |
139 |
|
{ |
140 |
1 |
InputStream is = getClass().getResourceAsStream(fileName); |
141 |
1 |
if (is == null) |
142 |
|
{ |
143 |
0 |
jalview.bin.Console |
144 |
|
.errPrintln("Resource file not found: " + fileName); |
145 |
0 |
return; |
146 |
|
} |
147 |
1 |
BufferedReader dataIn = new BufferedReader(new InputStreamReader(is)); |
148 |
|
|
149 |
|
|
150 |
|
|
151 |
|
|
152 |
1 |
String line = ""; |
153 |
94 |
while (line != null && !line.startsWith("Genetic-code-table")) |
154 |
|
{ |
155 |
93 |
line = readLine(dataIn); |
156 |
|
} |
157 |
1 |
line = readLine(dataIn); |
158 |
|
|
159 |
26 |
while (line.startsWith("{")) |
160 |
|
{ |
161 |
25 |
line = loadOneTable(dataIn); |
162 |
|
} |
163 |
|
} catch (IOException | NullPointerException e) |
164 |
|
{ |
165 |
0 |
Console.error("Error reading genetic codes data file " + fileName |
166 |
|
+ ": " + e.getMessage()); |
167 |
|
} |
168 |
1 |
if (codeTables.isEmpty()) |
169 |
|
{ |
170 |
0 |
jalview.bin.Console.errPrintln( |
171 |
|
"No genetic code tables loaded, check format of file " |
172 |
|
+ fileName); |
173 |
|
} |
174 |
|
} |
175 |
|
|
176 |
|
|
177 |
|
|
178 |
|
|
179 |
|
|
180 |
|
|
181 |
|
|
182 |
|
|
183 |
|
|
184 |
|
|
185 |
|
@param |
186 |
|
|
|
|
| 73.9% |
Uncovered Elements: 6 (23) |
Complexity: 7 |
Complexity Density: 0.47 |
|
187 |
1 |
protected void loadAmbiguityCodes(String fileName)... |
188 |
|
{ |
189 |
1 |
try |
190 |
|
{ |
191 |
1 |
InputStream is = getClass().getResourceAsStream(fileName); |
192 |
1 |
if (is == null) |
193 |
|
{ |
194 |
0 |
jalview.bin.Console |
195 |
|
.errPrintln("Resource file not found: " + fileName); |
196 |
0 |
return; |
197 |
|
} |
198 |
1 |
BufferedReader dataIn = new BufferedReader(new InputStreamReader(is)); |
199 |
1 |
String line = ""; |
200 |
14 |
while (line != null) |
201 |
|
{ |
202 |
13 |
line = readLine(dataIn); |
203 |
13 |
if (line != null && !"DNA".equals(line.toUpperCase(Locale.ROOT))) |
204 |
|
{ |
205 |
11 |
String[] tokens = line.split("\\t"); |
206 |
11 |
if (tokens.length == 2) |
207 |
|
{ |
208 |
11 |
ambiguityCodes.put(tokens[0].toUpperCase(Locale.ROOT), |
209 |
|
tokens[1].toUpperCase(Locale.ROOT)); |
210 |
|
} |
211 |
|
else |
212 |
|
{ |
213 |
0 |
jalview.bin.Console.errPrintln( |
214 |
|
"Unexpected data in " + fileName + ": " + line); |
215 |
|
} |
216 |
|
} |
217 |
|
} |
218 |
|
} catch (IOException e) |
219 |
|
{ |
220 |
0 |
Console.error("Error reading nucleotide ambiguity codes data file: " |
221 |
|
+ e.getMessage()); |
222 |
|
} |
223 |
|
} |
224 |
|
|
225 |
|
|
226 |
|
|
227 |
|
|
228 |
|
|
229 |
|
@param |
230 |
|
@return |
231 |
|
@throws |
232 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (8) |
Complexity: 4 |
Complexity Density: 1 |
|
233 |
341 |
protected String readLine(BufferedReader dataIn) throws IOException... |
234 |
|
{ |
235 |
341 |
String line = dataIn.readLine(); |
236 |
342 |
while (line != null && line.startsWith("#")) |
237 |
|
{ |
238 |
1 |
line = readLine(dataIn); |
239 |
|
} |
240 |
341 |
return line == null ? null : line.trim(); |
241 |
|
} |
242 |
|
|
243 |
|
|
244 |
|
|
245 |
|
|
246 |
|
|
247 |
|
|
248 |
|
|
249 |
|
|
250 |
|
|
251 |
|
|
252 |
|
|
253 |
|
|
254 |
|
|
255 |
|
|
256 |
|
|
257 |
|
|
258 |
|
|
259 |
|
|
260 |
|
|
261 |
|
|
262 |
|
|
263 |
|
|
264 |
|
|
265 |
|
|
266 |
|
|
267 |
|
@param |
268 |
|
@return |
269 |
|
@throws |
270 |
|
|
|
|
| 94.3% |
Uncovered Elements: 2 (35) |
Complexity: 9 |
Complexity Density: 0.39 |
|
271 |
25 |
protected String loadOneTable(BufferedReader dataIn) throws IOException... |
272 |
|
{ |
273 |
25 |
String name = null; |
274 |
25 |
String id = null; |
275 |
25 |
Map<String, String> codons = new HashMap<>(); |
276 |
|
|
277 |
25 |
String line = readLine(dataIn); |
278 |
|
|
279 |
208 |
while (line != null && !line.startsWith("}")) |
280 |
|
{ |
281 |
183 |
if (line.startsWith("name") && name == null) |
282 |
|
{ |
283 |
25 |
name = line.substring(line.indexOf(QUOTE) + 1, |
284 |
|
line.lastIndexOf(QUOTE)); |
285 |
|
} |
286 |
158 |
else if (line.startsWith("id")) |
287 |
|
{ |
288 |
25 |
id = new StringTokenizer(line.substring(2)).nextToken(); |
289 |
|
} |
290 |
133 |
else if (line.startsWith("ncbieaa")) |
291 |
|
{ |
292 |
25 |
String aminos = line.substring(line.indexOf(QUOTE) + 1, |
293 |
|
line.lastIndexOf(QUOTE)); |
294 |
25 |
if (aminos.length() != NUCS_COUNT_CUBED) |
295 |
|
{ |
296 |
0 |
Console.error("wrong data length in code table: " + line); |
297 |
|
} |
298 |
|
else |
299 |
|
{ |
300 |
1625 |
for (int i = 0; i < aminos.length(); i++) |
301 |
|
{ |
302 |
1600 |
String peptide = String.valueOf(aminos.charAt(i)); |
303 |
1600 |
char codon1 = NUCS.charAt(i / NUCS_COUNT_SQUARED); |
304 |
1600 |
char codon2 = NUCS |
305 |
|
.charAt((i % NUCS_COUNT_SQUARED) / NUCS_COUNT); |
306 |
1600 |
char codon3 = NUCS.charAt(i % NUCS_COUNT); |
307 |
1600 |
String codon = new String( |
308 |
|
new char[] |
309 |
|
{ codon1, codon2, codon3 }); |
310 |
1600 |
codons.put(codon, peptide); |
311 |
|
} |
312 |
|
} |
313 |
|
} |
314 |
183 |
line = readLine(dataIn); |
315 |
|
} |
316 |
|
|
317 |
25 |
registerCodeTable(id, name, codons); |
318 |
25 |
return readLine(dataIn); |
319 |
|
} |
320 |
|
|
321 |
|
|
322 |
|
|
323 |
|
|
324 |
|
|
325 |
|
|
326 |
|
|
327 |
|
@param |
328 |
|
@param |
329 |
|
@param |
330 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
331 |
25 |
protected void registerCodeTable(final String id, final String name,... |
332 |
|
final Map<String, String> codons) |
333 |
|
{ |
334 |
25 |
codeTables.put(id, new GeneticCodeI() |
335 |
|
{ |
336 |
|
|
337 |
|
|
338 |
|
|
339 |
|
|
340 |
|
Map<String, String> ambiguous = new HashMap<>(); |
341 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
342 |
2365 |
@Override... |
343 |
|
public String translateCanonical(String codon) |
344 |
|
{ |
345 |
2365 |
return codons.get(codon.toUpperCase(Locale.ROOT)); |
346 |
|
} |
347 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (7) |
Complexity: 2 |
Complexity Density: 0.4 |
|
348 |
2359 |
@Override... |
349 |
|
public String translate(String codon) |
350 |
|
{ |
351 |
2359 |
String upper = codon.toUpperCase(Locale.ROOT); |
352 |
2359 |
String peptide = translateCanonical(upper); |
353 |
|
|
354 |
|
|
355 |
|
|
356 |
|
|
357 |
2359 |
if (peptide == null) |
358 |
|
{ |
359 |
73 |
peptide = getAmbiguousTranslation(upper, ambiguous, this); |
360 |
|
} |
361 |
2359 |
return peptide; |
362 |
|
} |
363 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
364 |
5302 |
@Override... |
365 |
|
public String getId() |
366 |
|
{ |
367 |
5302 |
return id; |
368 |
|
} |
369 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
370 |
5279 |
@Override... |
371 |
|
public String getName() |
372 |
|
{ |
373 |
5279 |
return name; |
374 |
|
} |
375 |
|
}); |
376 |
|
} |
377 |
|
|
378 |
|
|
379 |
|
|
380 |
|
|
381 |
|
|
382 |
|
|
383 |
|
|
384 |
|
@param |
385 |
|
@param |
386 |
|
@param |
387 |
|
@return |
388 |
|
|
|
|
| 94.3% |
Uncovered Elements: 2 (35) |
Complexity: 8 |
Complexity Density: 0.32 |
|
389 |
73 |
protected String getAmbiguousTranslation(String codon,... |
390 |
|
Map<String, String> ambiguous, GeneticCodeI codeTable) |
391 |
|
{ |
392 |
73 |
if (codon.length() != CODON_LENGTH) |
393 |
|
{ |
394 |
0 |
return null; |
395 |
|
} |
396 |
|
|
397 |
73 |
boolean isAmbiguous = false; |
398 |
|
|
399 |
73 |
char[][] expanded = new char[CODON_LENGTH][]; |
400 |
292 |
for (int i = 0; i < CODON_LENGTH; i++) |
401 |
|
{ |
402 |
219 |
String base = String.valueOf(codon.charAt(i)); |
403 |
219 |
if (ambiguityCodes.containsKey(base)) |
404 |
|
{ |
405 |
83 |
isAmbiguous = true; |
406 |
83 |
base = ambiguityCodes.get(base); |
407 |
|
} |
408 |
219 |
expanded[i] = base.toCharArray(); |
409 |
|
} |
410 |
|
|
411 |
73 |
if (!isAmbiguous) |
412 |
|
{ |
413 |
|
|
414 |
1 |
return null; |
415 |
|
} |
416 |
|
|
417 |
|
|
418 |
|
|
419 |
|
|
420 |
|
|
421 |
72 |
String peptide = null; |
422 |
72 |
for (char c1 : expanded[0]) |
423 |
|
{ |
424 |
78 |
for (char c2 : expanded[1]) |
425 |
|
{ |
426 |
82 |
for (char c3 : expanded[2]) |
427 |
|
{ |
428 |
181 |
char[] cdn = new char[] { c1, c2, c3 }; |
429 |
181 |
String possibleCodon = String.valueOf(cdn); |
430 |
181 |
String pep = codeTable.translate(possibleCodon); |
431 |
181 |
if (pep == null || (peptide != null && !pep.equals(peptide))) |
432 |
|
{ |
433 |
23 |
ambiguous.put(codon, null); |
434 |
23 |
return null; |
435 |
|
} |
436 |
158 |
peptide = pep; |
437 |
|
} |
438 |
|
} |
439 |
|
} |
440 |
|
|
441 |
|
|
442 |
|
|
443 |
|
|
444 |
49 |
ambiguous.put(codon, peptide); |
445 |
49 |
return peptide; |
446 |
|
} |
447 |
|
} |