Class |
Line # |
Actions |
|||
---|---|---|---|---|---|
CodingUtils | 29 | 51 | 20 |
1 | /* | |
2 | * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) | |
3 | * Copyright (C) $$Year-Rel$$ The Jalview Authors | |
4 | * | |
5 | * This file is part of Jalview. | |
6 | * | |
7 | * Jalview is free software: you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation, either version 3 | |
10 | * of the License, or (at your option) any later version. | |
11 | * | |
12 | * Jalview is distributed in the hope that it will be useful, but | |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty | |
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR | |
15 | * PURPOSE. See the GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with Jalview. If not, see <http://www.gnu.org/licenses/>. | |
19 | * The Jalview Authors are detailed in the 'AUTHORS' file. | |
20 | */ | |
21 | package jalview.analysis; | |
22 | ||
23 | /** | |
24 | * A utility class to provide encoding/decoding schemes for data. | |
25 | * | |
26 | * @author gmcarstairs | |
27 | * | |
28 | */ | |
29 | public class CodingUtils | |
30 | { | |
31 | ||
32 | /* | |
33 | * Number of bits used when encoding codon characters. 2 is enough for ACGT. | |
34 | * To accommodate more (e.g. ambiguity codes), simply increase this number | |
35 | * (and adjust unit tests to match). | |
36 | */ | |
37 | private static final int CODON_ENCODING_BITSHIFT = 2; | |
38 | ||
39 | /** | |
40 | * Encode a codon from e.g. ['A', 'G', 'C'] to a number in the range 0 - 63. | |
41 | * Converts lower to upper case, U to T, then assembles a binary value by | |
42 | * encoding A/C/G/T as 00/01/10/11 respectively and shifting. | |
43 | * | |
44 | * @param codon | |
45 | * @return the encoded codon, or a negative number if unexpected characters | |
46 | * found | |
47 | */ | |
48 | 10674 | public static int encodeCodon(char[] codon) |
49 | { | |
50 | 10674 | if (codon == null) |
51 | { | |
52 | 1 | return -1; |
53 | } | |
54 | 10673 | return encodeCodon(codon[2]) |
55 | + (encodeCodon(codon[1]) << CODON_ENCODING_BITSHIFT) | |
56 | + (encodeCodon(codon[0]) << (2 * CODON_ENCODING_BITSHIFT)); | |
57 | } | |
58 | ||
59 | /** | |
60 | * Encodes aA/cC/gG/tTuU as 0/1/2/3 respectively. Returns Integer.MIN_VALUE (a | |
61 | * large negative value) for any other character. | |
62 | * | |
63 | * @param c | |
64 | * @return | |
65 | */ | |
66 | 32030 | public static int encodeCodon(char c) |
67 | { | |
68 | 32030 | int result = Integer.MIN_VALUE; |
69 | 32030 | switch (c) |
70 | { | |
71 | 9171 | case 'A': |
72 | 4 | case 'a': |
73 | 9175 | result = 0; |
74 | 9175 | break; |
75 | 7472 | case 'C': |
76 | 5 | case 'c': |
77 | 7477 | result = 1; |
78 | 7477 | break; |
79 | 7389 | case 'G': |
80 | 5 | case 'g': |
81 | 7394 | result = 2; |
82 | 7394 | break; |
83 | 7974 | case 'T': |
84 | 7 | case 't': |
85 | 1 | case 'U': |
86 | 1 | case 'u': |
87 | 7983 | result = 3; |
88 | 7983 | break; |
89 | } | |
90 | 32030 | return result; |
91 | } | |
92 | ||
93 | /** | |
94 | * Converts a binary encoded codon into an ['A', 'C', 'G'] (or 'T') triplet. | |
95 | * | |
96 | * The two low-order bits encode for A/C/G/T as 0/1/2/3, etc. | |
97 | * | |
98 | * @param encoded | |
99 | * @return | |
100 | */ | |
101 | 1939 | public static char[] decodeCodon(int encoded) |
102 | { | |
103 | 1939 | char[] result = new char[3]; |
104 | 1939 | result[2] = decodeNucleotide(encoded & 3); |
105 | 1939 | encoded = encoded >>> CODON_ENCODING_BITSHIFT; |
106 | 1939 | result[1] = decodeNucleotide(encoded & 3); |
107 | 1939 | encoded = encoded >>> CODON_ENCODING_BITSHIFT; |
108 | 1939 | result[0] = decodeNucleotide(encoded & 3); |
109 | 1939 | return result; |
110 | } | |
111 | ||
112 | 0 | public static void decodeCodon2(int encoded, char[] result) |
113 | { | |
114 | 0 | result[2] = decodeNucleotide(encoded & 3); |
115 | 0 | encoded = encoded >>> CODON_ENCODING_BITSHIFT; |
116 | 0 | result[1] = decodeNucleotide(encoded & 3); |
117 | 0 | encoded = encoded >>> CODON_ENCODING_BITSHIFT; |
118 | 0 | result[0] = decodeNucleotide(encoded & 3); |
119 | } | |
120 | ||
121 | /** | |
122 | * Convert value 0/1/2/3 to 'A'/'C'/'G'/'T' | |
123 | * | |
124 | * @param i | |
125 | * @return | |
126 | */ | |
127 | 5822 | public static char decodeNucleotide(int i) |
128 | { | |
129 | 5822 | char result = '0'; |
130 | 5822 | switch (i) |
131 | { | |
132 | 1650 | case 0: |
133 | 1650 | result = 'A'; |
134 | 1650 | break; |
135 | 1318 | case 1: |
136 | 1318 | result = 'C'; |
137 | 1318 | break; |
138 | 1309 | case 2: |
139 | 1309 | result = 'G'; |
140 | 1309 | break; |
141 | 1544 | case 3: |
142 | 1544 | result = 'T'; |
143 | 1544 | break; |
144 | } | |
145 | 5822 | return result; |
146 | } | |
147 | ||
148 | } |