1 |
|
|
2 |
|
|
3 |
|
|
4 |
|
|
5 |
|
|
6 |
|
|
7 |
|
|
8 |
|
|
9 |
|
|
10 |
|
|
11 |
|
|
12 |
|
|
13 |
|
|
14 |
|
|
15 |
|
|
16 |
|
|
17 |
|
|
18 |
|
|
19 |
|
|
20 |
|
|
21 |
|
package jalview.analysis; |
22 |
|
|
23 |
|
import jalview.bin.Cache; |
24 |
|
|
25 |
|
import java.io.BufferedReader; |
26 |
|
import java.io.IOException; |
27 |
|
import java.io.InputStream; |
28 |
|
import java.io.InputStreamReader; |
29 |
|
import java.util.HashMap; |
30 |
|
import java.util.LinkedHashMap; |
31 |
|
import java.util.Map; |
32 |
|
import java.util.StringTokenizer; |
33 |
|
|
34 |
|
|
35 |
|
|
36 |
|
|
37 |
|
@author |
38 |
|
@see |
39 |
|
|
|
|
| 89.4% |
Uncovered Elements: 17 (161) |
Complexity: 47 |
Complexity Density: 0.47 |
|
40 |
|
public final class GeneticCodes |
41 |
|
{ |
42 |
|
private static final int CODON_LENGTH = 3; |
43 |
|
|
44 |
|
private static final String QUOTE = "\""; |
45 |
|
|
46 |
|
|
47 |
|
|
48 |
|
|
49 |
|
private static final String NUCS = "TCAG"; |
50 |
|
|
51 |
|
private static final int NUCS_COUNT = NUCS.length(); |
52 |
|
|
53 |
|
private static final int NUCS_COUNT_SQUARED = NUCS_COUNT * NUCS_COUNT; |
54 |
|
|
55 |
|
private static final int NUCS_COUNT_CUBED = NUCS_COUNT * NUCS_COUNT |
56 |
|
* NUCS_COUNT; |
57 |
|
|
58 |
|
private static final String AMBIGUITY_CODES_FILE = "/AmbiguityCodes.dat"; |
59 |
|
|
60 |
|
private static final String RESOURCE_FILE = "/GeneticCodes.dat"; |
61 |
|
|
62 |
|
private static GeneticCodes instance = new GeneticCodes(); |
63 |
|
|
64 |
|
private Map<String, String> ambiguityCodes; |
65 |
|
|
66 |
|
|
67 |
|
|
68 |
|
|
69 |
|
private Map<String, GeneticCodeI> codeTables; |
70 |
|
|
71 |
|
|
72 |
|
|
73 |
|
|
|
|
| 85.7% |
Uncovered Elements: 1 (7) |
Complexity: 2 |
Complexity Density: 0.4 |
|
74 |
18 |
private GeneticCodes()... |
75 |
|
{ |
76 |
18 |
if (instance == null) |
77 |
|
{ |
78 |
18 |
ambiguityCodes = new HashMap<>(); |
79 |
|
|
80 |
|
|
81 |
|
|
82 |
|
|
83 |
|
|
84 |
18 |
codeTables = new LinkedHashMap<>(); |
85 |
18 |
loadAmbiguityCodes(AMBIGUITY_CODES_FILE); |
86 |
18 |
loadCodes(RESOURCE_FILE); |
87 |
|
} |
88 |
|
} |
89 |
|
|
90 |
|
|
91 |
|
|
92 |
|
|
93 |
|
@return |
94 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
95 |
535 |
public static GeneticCodes getInstance()... |
96 |
|
{ |
97 |
535 |
return instance; |
98 |
|
} |
99 |
|
|
100 |
|
|
101 |
|
|
102 |
|
|
103 |
|
@return |
104 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
105 |
242 |
public Iterable<GeneticCodeI> getCodeTables()... |
106 |
|
{ |
107 |
242 |
return codeTables.values(); |
108 |
|
} |
109 |
|
|
110 |
|
|
111 |
|
|
112 |
|
|
113 |
|
@param |
114 |
|
@return |
115 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
116 |
14 |
public GeneticCodeI getCodeTable(String id)... |
117 |
|
{ |
118 |
14 |
return codeTables.get(id); |
119 |
|
} |
120 |
|
|
121 |
|
|
122 |
|
|
123 |
|
|
124 |
|
|
125 |
|
@return |
126 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
127 |
289 |
public GeneticCodeI getStandardCodeTable()... |
128 |
|
{ |
129 |
289 |
return codeTables.values().iterator().next(); |
130 |
|
} |
131 |
|
|
132 |
|
|
133 |
|
|
134 |
|
|
|
|
| 73.9% |
Uncovered Elements: 6 (23) |
Complexity: 7 |
Complexity Density: 0.47 |
|
135 |
18 |
protected void loadCodes(String fileName)... |
136 |
|
{ |
137 |
18 |
try |
138 |
|
{ |
139 |
18 |
InputStream is = getClass().getResourceAsStream(fileName); |
140 |
18 |
if (is == null) |
141 |
|
{ |
142 |
0 |
System.err.println("Resource file not found: " + fileName); |
143 |
0 |
return; |
144 |
|
} |
145 |
18 |
BufferedReader dataIn = new BufferedReader(new InputStreamReader(is)); |
146 |
|
|
147 |
|
|
148 |
|
|
149 |
|
|
150 |
18 |
String line = ""; |
151 |
1692 |
while (line != null && !line.startsWith("Genetic-code-table")) |
152 |
|
{ |
153 |
1674 |
line = readLine(dataIn); |
154 |
|
} |
155 |
18 |
line = readLine(dataIn); |
156 |
|
|
157 |
468 |
while (line.startsWith("{")) |
158 |
|
{ |
159 |
450 |
line = loadOneTable(dataIn); |
160 |
|
} |
161 |
|
} catch (IOException | NullPointerException e) |
162 |
|
{ |
163 |
0 |
Cache.log.error( |
164 |
|
"Error reading genetic codes data file " + fileName + ": " |
165 |
|
+ e.getMessage()); |
166 |
|
} |
167 |
18 |
if (codeTables.isEmpty()) |
168 |
|
{ |
169 |
0 |
System.err.println( |
170 |
|
"No genetic code tables loaded, check format of file " |
171 |
|
+ fileName); |
172 |
|
} |
173 |
|
} |
174 |
|
|
175 |
|
|
176 |
|
|
177 |
|
|
178 |
|
|
179 |
|
|
180 |
|
|
181 |
|
|
182 |
|
|
183 |
|
|
184 |
|
@param |
185 |
|
|
|
|
| 73.9% |
Uncovered Elements: 6 (23) |
Complexity: 7 |
Complexity Density: 0.47 |
|
186 |
18 |
protected void loadAmbiguityCodes(String fileName)... |
187 |
|
{ |
188 |
18 |
try |
189 |
|
{ |
190 |
18 |
InputStream is = getClass().getResourceAsStream(fileName); |
191 |
18 |
if (is == null) |
192 |
|
{ |
193 |
0 |
System.err.println("Resource file not found: " + fileName); |
194 |
0 |
return; |
195 |
|
} |
196 |
18 |
BufferedReader dataIn = new BufferedReader(new InputStreamReader(is)); |
197 |
18 |
String line = ""; |
198 |
252 |
while (line != null) |
199 |
|
{ |
200 |
234 |
line = readLine(dataIn); |
201 |
234 |
if (line != null && !"DNA".equals(line.toUpperCase())) |
202 |
|
{ |
203 |
198 |
String[] tokens = line.split("\\t"); |
204 |
198 |
if (tokens.length == 2) |
205 |
|
{ |
206 |
198 |
ambiguityCodes.put(tokens[0].toUpperCase(), |
207 |
|
tokens[1].toUpperCase()); |
208 |
|
} |
209 |
|
else |
210 |
|
{ |
211 |
0 |
System.err.println( |
212 |
|
"Unexpected data in " + fileName + ": " + line); |
213 |
|
} |
214 |
|
} |
215 |
|
} |
216 |
|
} catch (IOException e) |
217 |
|
{ |
218 |
0 |
Cache.log.error( |
219 |
|
"Error reading nucleotide ambiguity codes data file: " |
220 |
|
+ e.getMessage()); |
221 |
|
} |
222 |
|
} |
223 |
|
|
224 |
|
|
225 |
|
|
226 |
|
|
227 |
|
|
228 |
|
@param |
229 |
|
@return |
230 |
|
@throws |
231 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (8) |
Complexity: 4 |
Complexity Density: 1 |
|
232 |
6138 |
protected String readLine(BufferedReader dataIn) throws IOException... |
233 |
|
{ |
234 |
6138 |
String line = dataIn.readLine(); |
235 |
6156 |
while (line != null && line.startsWith("#")) |
236 |
|
{ |
237 |
18 |
line = readLine(dataIn); |
238 |
|
} |
239 |
6138 |
return line == null ? null : line.trim(); |
240 |
|
} |
241 |
|
|
242 |
|
|
243 |
|
|
244 |
|
|
245 |
|
|
246 |
|
|
247 |
|
|
248 |
|
|
249 |
|
|
250 |
|
|
251 |
|
|
252 |
|
|
253 |
|
|
254 |
|
|
255 |
|
|
256 |
|
|
257 |
|
|
258 |
|
|
259 |
|
|
260 |
|
|
261 |
|
|
262 |
|
|
263 |
|
|
264 |
|
|
265 |
|
|
266 |
|
@param |
267 |
|
@return |
268 |
|
@throws |
269 |
|
|
|
|
| 94.3% |
Uncovered Elements: 2 (35) |
Complexity: 9 |
Complexity Density: 0.39 |
|
270 |
450 |
protected String loadOneTable(BufferedReader dataIn) throws IOException... |
271 |
|
{ |
272 |
450 |
String name = null; |
273 |
450 |
String id = null; |
274 |
450 |
Map<String, String> codons = new HashMap<>(); |
275 |
|
|
276 |
450 |
String line = readLine(dataIn); |
277 |
|
|
278 |
3744 |
while (line != null && !line.startsWith("}")) |
279 |
|
{ |
280 |
3294 |
if (line.startsWith("name") && name == null) |
281 |
|
{ |
282 |
450 |
name = line.substring(line.indexOf(QUOTE) + 1, |
283 |
|
line.lastIndexOf(QUOTE)); |
284 |
|
} |
285 |
2844 |
else if (line.startsWith("id")) |
286 |
|
{ |
287 |
450 |
id = new StringTokenizer(line.substring(2)).nextToken(); |
288 |
|
} |
289 |
2394 |
else if (line.startsWith("ncbieaa")) |
290 |
|
{ |
291 |
450 |
String aminos = line.substring(line.indexOf(QUOTE) + 1, |
292 |
|
line.lastIndexOf(QUOTE)); |
293 |
450 |
if (aminos.length() != NUCS_COUNT_CUBED) |
294 |
|
{ |
295 |
0 |
Cache.log.error("wrong data length in code table: " + line); |
296 |
|
} |
297 |
|
else |
298 |
|
{ |
299 |
29250 |
for (int i = 0; i < aminos.length(); i++) |
300 |
|
{ |
301 |
28800 |
String peptide = String.valueOf(aminos.charAt(i)); |
302 |
28800 |
char codon1 = NUCS.charAt(i / NUCS_COUNT_SQUARED); |
303 |
28800 |
char codon2 = NUCS |
304 |
|
.charAt((i % NUCS_COUNT_SQUARED) / NUCS_COUNT); |
305 |
28800 |
char codon3 = NUCS.charAt(i % NUCS_COUNT); |
306 |
28800 |
String codon = new String( |
307 |
|
new char[] |
308 |
|
{ codon1, codon2, codon3 }); |
309 |
28800 |
codons.put(codon, peptide); |
310 |
|
} |
311 |
|
} |
312 |
|
} |
313 |
3294 |
line = readLine(dataIn); |
314 |
|
} |
315 |
|
|
316 |
450 |
registerCodeTable(id, name, codons); |
317 |
450 |
return readLine(dataIn); |
318 |
|
} |
319 |
|
|
320 |
|
|
321 |
|
|
322 |
|
|
323 |
|
|
324 |
|
|
325 |
|
|
326 |
|
@param |
327 |
|
@param |
328 |
|
@param |
329 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
330 |
450 |
protected void registerCodeTable(final String id, final String name,... |
331 |
|
final Map<String, String> codons) |
332 |
|
{ |
333 |
450 |
codeTables.put(id, new GeneticCodeI() |
334 |
|
{ |
335 |
|
|
336 |
|
|
337 |
|
|
338 |
|
|
339 |
|
Map<String, String> ambiguous = new HashMap<>(); |
340 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
341 |
2616 |
@Override... |
342 |
|
public String translateCanonical(String codon) |
343 |
|
{ |
344 |
2616 |
return codons.get(codon.toUpperCase()); |
345 |
|
} |
346 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (7) |
Complexity: 2 |
Complexity Density: 0.4 |
|
347 |
2610 |
@Override... |
348 |
|
public String translate(String codon) |
349 |
|
{ |
350 |
2610 |
String upper = codon.toUpperCase(); |
351 |
2610 |
String peptide = translateCanonical(upper); |
352 |
|
|
353 |
|
|
354 |
|
|
355 |
|
|
356 |
2610 |
if (peptide == null) |
357 |
|
{ |
358 |
128 |
peptide = getAmbiguousTranslation(upper, ambiguous, this); |
359 |
|
} |
360 |
2610 |
return peptide; |
361 |
|
} |
362 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
363 |
6052 |
@Override... |
364 |
|
public String getId() |
365 |
|
{ |
366 |
6052 |
return id; |
367 |
|
} |
368 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
369 |
6029 |
@Override... |
370 |
|
public String getName() |
371 |
|
{ |
372 |
6029 |
return name; |
373 |
|
} |
374 |
|
}); |
375 |
|
} |
376 |
|
|
377 |
|
|
378 |
|
|
379 |
|
|
380 |
|
|
381 |
|
|
382 |
|
|
383 |
|
@param |
384 |
|
@param |
385 |
|
@param |
386 |
|
@return |
387 |
|
|
|
|
| 94.3% |
Uncovered Elements: 2 (35) |
Complexity: 8 |
Complexity Density: 0.32 |
|
388 |
128 |
protected String getAmbiguousTranslation(String codon,... |
389 |
|
Map<String, String> ambiguous, GeneticCodeI codeTable) |
390 |
|
{ |
391 |
128 |
if (codon.length() != CODON_LENGTH) |
392 |
|
{ |
393 |
0 |
return null; |
394 |
|
} |
395 |
|
|
396 |
128 |
boolean isAmbiguous = false; |
397 |
|
|
398 |
128 |
char[][] expanded = new char[CODON_LENGTH][]; |
399 |
512 |
for (int i = 0; i < CODON_LENGTH; i++) |
400 |
|
{ |
401 |
384 |
String base = String.valueOf(codon.charAt(i)); |
402 |
384 |
if (ambiguityCodes.containsKey(base)) |
403 |
|
{ |
404 |
149 |
isAmbiguous = true; |
405 |
149 |
base = ambiguityCodes.get(base); |
406 |
|
} |
407 |
384 |
expanded[i] = base.toCharArray(); |
408 |
|
} |
409 |
|
|
410 |
128 |
if (!isAmbiguous) |
411 |
|
{ |
412 |
|
|
413 |
1 |
return null; |
414 |
|
} |
415 |
|
|
416 |
|
|
417 |
|
|
418 |
|
|
419 |
|
|
420 |
127 |
String peptide = null; |
421 |
127 |
for (char c1 : expanded[0]) |
422 |
|
{ |
423 |
138 |
for (char c2 : expanded[1]) |
424 |
|
{ |
425 |
146 |
for (char c3 : expanded[2]) |
426 |
|
{ |
427 |
321 |
char[] cdn = new char[] { c1, c2, c3 }; |
428 |
321 |
String possibleCodon = String.valueOf(cdn); |
429 |
321 |
String pep = codeTable.translate(possibleCodon); |
430 |
321 |
if (pep == null || (peptide != null && !pep.equals(peptide))) |
431 |
|
{ |
432 |
41 |
ambiguous.put(codon, null); |
433 |
41 |
return null; |
434 |
|
} |
435 |
280 |
peptide = pep; |
436 |
|
} |
437 |
|
} |
438 |
|
} |
439 |
|
|
440 |
|
|
441 |
|
|
442 |
|
|
443 |
86 |
ambiguous.put(codon, peptide); |
444 |
86 |
return peptide; |
445 |
|
} |
446 |
|
} |