Class |
Line # |
Actions |
|||
---|---|---|---|---|---|
AAFrequencyTest | 41 | 192 | 11 |
1 | /* | |
2 | * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) | |
3 | * Copyright (C) $$Year-Rel$$ The Jalview Authors | |
4 | * | |
5 | * This file is part of Jalview. | |
6 | * | |
7 | * Jalview is free software: you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation, either version 3 | |
10 | * of the License, or (at your option) any later version. | |
11 | * | |
12 | * Jalview is distributed in the hope that it will be useful, but | |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty | |
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR | |
15 | * PURPOSE. See the GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with Jalview. If not, see <http://www.gnu.org/licenses/>. | |
19 | * The Jalview Authors are detailed in the 'AUTHORS' file. | |
20 | */ | |
21 | package jalview.analysis; | |
22 | ||
23 | import static org.testng.AssertJUnit.assertEquals; | |
24 | import static org.testng.AssertJUnit.assertNull; | |
25 | ||
26 | import jalview.datamodel.AlignmentAnnotation; | |
27 | import jalview.datamodel.Annotation; | |
28 | import jalview.datamodel.Profile; | |
29 | import jalview.datamodel.ProfileI; | |
30 | import jalview.datamodel.ProfilesI; | |
31 | import jalview.datamodel.ResidueCount; | |
32 | import jalview.datamodel.Sequence; | |
33 | import jalview.datamodel.SequenceI; | |
34 | import jalview.gui.JvOptionPane; | |
35 | ||
36 | import java.util.Hashtable; | |
37 | ||
38 | import org.testng.annotations.BeforeClass; | |
39 | import org.testng.annotations.Test; | |
40 | ||
41 | public class AAFrequencyTest | |
42 | { | |
43 | ||
44 | 1 | @BeforeClass(alwaysRun = true) |
45 | public void setUpJvOptionPane() | |
46 | { | |
47 | 1 | JvOptionPane.setInteractiveMode(false); |
48 | 1 | JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION); |
49 | } | |
50 | ||
51 | 1 | @Test(groups = { "Functional" }) |
52 | public void testCalculate_noProfile() | |
53 | { | |
54 | 1 | SequenceI seq1 = new Sequence("Seq1", "CAG-T"); |
55 | 1 | SequenceI seq2 = new Sequence("Seq2", "CAC-T"); |
56 | 1 | SequenceI seq3 = new Sequence("Seq3", "C---G"); |
57 | 1 | SequenceI seq4 = new Sequence("Seq4", "CA--t"); |
58 | 1 | SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 }; |
59 | 1 | int width = seq1.getLength(); |
60 | 1 | ProfilesI result = AAFrequency.calculate(seqs, width, 0, width, false); |
61 | ||
62 | // col 0 is 100% C | |
63 | 1 | ProfileI col = result.get(0); |
64 | 1 | assertEquals(100f, col.getPercentageIdentity(false)); |
65 | 1 | assertEquals(100f, col.getPercentageIdentity(true)); |
66 | 1 | assertEquals(4, col.getMaxCount()); |
67 | 1 | assertEquals("C", col.getModalResidue()); |
68 | 1 | assertNull(col.getCounts()); |
69 | ||
70 | // col 1 is 75% A | |
71 | 1 | col = result.get(1); |
72 | 1 | assertEquals(75f, col.getPercentageIdentity(false)); |
73 | 1 | assertEquals(100f, col.getPercentageIdentity(true)); |
74 | 1 | assertEquals(3, col.getMaxCount()); |
75 | 1 | assertEquals("A", col.getModalResidue()); |
76 | ||
77 | // col 2 is 50% G 50% C or 25/25 counting gaps | |
78 | 1 | col = result.get(2); |
79 | 1 | assertEquals(25f, col.getPercentageIdentity(false)); |
80 | 1 | assertEquals(50f, col.getPercentageIdentity(true)); |
81 | 1 | assertEquals(1, col.getMaxCount()); |
82 | 1 | assertEquals("CG", col.getModalResidue()); |
83 | ||
84 | // col 3 is all gaps | |
85 | 1 | col = result.get(3); |
86 | 1 | assertEquals(0f, col.getPercentageIdentity(false)); |
87 | 1 | assertEquals(0f, col.getPercentageIdentity(true)); |
88 | 1 | assertEquals(0, col.getMaxCount()); |
89 | 1 | assertEquals("", col.getModalResidue()); |
90 | ||
91 | // col 4 is 75% T 25% G | |
92 | 1 | col = result.get(4); |
93 | 1 | assertEquals(75f, col.getPercentageIdentity(false)); |
94 | 1 | assertEquals(75f, col.getPercentageIdentity(true)); |
95 | 1 | assertEquals(3, col.getMaxCount()); |
96 | 1 | assertEquals("T", col.getModalResidue()); |
97 | } | |
98 | ||
99 | 1 | @Test(groups = { "Functional" }) |
100 | public void testCalculate_withProfile() | |
101 | { | |
102 | 1 | SequenceI seq1 = new Sequence("Seq1", "CAGT"); |
103 | 1 | SequenceI seq2 = new Sequence("Seq2", "CACT"); |
104 | 1 | SequenceI seq3 = new Sequence("Seq3", "C--G"); |
105 | 1 | SequenceI seq4 = new Sequence("Seq4", "CA-t"); |
106 | 1 | SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 }; |
107 | 1 | int width = seq1.getLength(); |
108 | 1 | ProfilesI result = AAFrequency.calculate(seqs, width, 0, width, true); |
109 | ||
110 | 1 | ProfileI profile = result.get(0); |
111 | 1 | assertEquals(4, profile.getCounts().getCount('C')); |
112 | 1 | assertEquals(4, profile.getHeight()); |
113 | 1 | assertEquals(4, profile.getNonGapped()); |
114 | ||
115 | 1 | profile = result.get(1); |
116 | 1 | assertEquals(3, profile.getCounts().getCount('A')); |
117 | 1 | assertEquals(4, profile.getHeight()); |
118 | 1 | assertEquals(3, profile.getNonGapped()); |
119 | ||
120 | 1 | profile = result.get(2); |
121 | 1 | assertEquals(1, profile.getCounts().getCount('C')); |
122 | 1 | assertEquals(1, profile.getCounts().getCount('G')); |
123 | 1 | assertEquals(4, profile.getHeight()); |
124 | 1 | assertEquals(2, profile.getNonGapped()); |
125 | ||
126 | 1 | profile = result.get(3); |
127 | 1 | assertEquals(3, profile.getCounts().getCount('T')); |
128 | 1 | assertEquals(1, profile.getCounts().getCount('G')); |
129 | 1 | assertEquals(4, profile.getHeight()); |
130 | 1 | assertEquals(4, profile.getNonGapped()); |
131 | } | |
132 | ||
133 | 0 | @Test(groups = { "Functional" }, enabled = false) |
134 | public void testCalculate_withProfileTiming() | |
135 | { | |
136 | 0 | SequenceI seq1 = new Sequence("Seq1", "CAGT"); |
137 | 0 | SequenceI seq2 = new Sequence("Seq2", "CACT"); |
138 | 0 | SequenceI seq3 = new Sequence("Seq3", "C--G"); |
139 | 0 | SequenceI seq4 = new Sequence("Seq4", "CA-t"); |
140 | 0 | SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 }; |
141 | ||
142 | // ensure class loaded and initialised | |
143 | 0 | int width = seq1.getLength(); |
144 | 0 | AAFrequency.calculate(seqs, width, 0, width, true); |
145 | ||
146 | 0 | int reps = 100000; |
147 | 0 | long start = System.currentTimeMillis(); |
148 | 0 | for (int i = 0; i < reps; i++) |
149 | { | |
150 | 0 | AAFrequency.calculate(seqs, width, 0, width, true); |
151 | } | |
152 | 0 | System.out.println(System.currentTimeMillis() - start); |
153 | } | |
154 | ||
155 | /** | |
156 | * Test generation of consensus annotation with options 'include gaps' | |
157 | * (profile percentages are of all sequences, whether gapped or not), and | |
158 | * 'show logo' (the full profile with all residue percentages is reported in | |
159 | * the description for the tooltip) | |
160 | */ | |
161 | 1 | @Test(groups = { "Functional" }) |
162 | public void testCompleteConsensus_includeGaps_showLogo() | |
163 | { | |
164 | /* | |
165 | * first compute the profiles | |
166 | */ | |
167 | 1 | SequenceI seq1 = new Sequence("Seq1", "CAG-T"); |
168 | 1 | SequenceI seq2 = new Sequence("Seq2", "CAC-T"); |
169 | 1 | SequenceI seq3 = new Sequence("Seq3", "C---G"); |
170 | 1 | SequenceI seq4 = new Sequence("Seq4", "CA--t"); |
171 | 1 | SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 }; |
172 | 1 | int width = seq1.getLength(); |
173 | 1 | ProfilesI profiles = AAFrequency.calculate(seqs, width, 0, width, true); |
174 | ||
175 | 1 | AlignmentAnnotation consensus = new AlignmentAnnotation("Consensus", |
176 | "PID", new Annotation[width]); | |
177 | 1 | AAFrequency.completeConsensus(consensus, profiles, 0, 5, false, true, |
178 | 4); | |
179 | ||
180 | 1 | Annotation ann = consensus.annotations[0]; |
181 | 1 | assertEquals("C 100%", ann.description); |
182 | 1 | assertEquals("C", ann.displayCharacter); |
183 | 1 | ann = consensus.annotations[1]; |
184 | 1 | assertEquals("A 75%", ann.description); |
185 | 1 | assertEquals("A", ann.displayCharacter); |
186 | 1 | ann = consensus.annotations[2]; |
187 | 1 | assertEquals("C 25%; G 25%", ann.description); |
188 | 1 | assertEquals("+", ann.displayCharacter); |
189 | 1 | ann = consensus.annotations[3]; |
190 | 1 | assertEquals("", ann.description); |
191 | 1 | assertEquals("-", ann.displayCharacter); |
192 | 1 | ann = consensus.annotations[4]; |
193 | 1 | assertEquals("T 75%; G 25%", ann.description); |
194 | 1 | assertEquals("T", ann.displayCharacter); |
195 | } | |
196 | ||
197 | /** | |
198 | * Test generation of consensus annotation with options 'ignore gaps' (profile | |
199 | * percentages are of the non-gapped sequences) and 'no logo' (only the modal | |
200 | * residue[s] percentage is reported in the description for the tooltip) | |
201 | */ | |
202 | 1 | @Test(groups = { "Functional" }) |
203 | public void testCompleteConsensus_ignoreGaps_noLogo() | |
204 | { | |
205 | /* | |
206 | * first compute the profiles | |
207 | */ | |
208 | 1 | SequenceI seq1 = new Sequence("Seq1", "CAG-T"); |
209 | 1 | SequenceI seq2 = new Sequence("Seq2", "CAC-T"); |
210 | 1 | SequenceI seq3 = new Sequence("Seq3", "C---G"); |
211 | 1 | SequenceI seq4 = new Sequence("Seq4", "CA--t"); |
212 | 1 | SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 }; |
213 | 1 | int width = seq1.getLength(); |
214 | 1 | ProfilesI profiles = AAFrequency.calculate(seqs, width, 0, width, true); |
215 | ||
216 | 1 | AlignmentAnnotation consensus = new AlignmentAnnotation("Consensus", |
217 | "PID", new Annotation[width]); | |
218 | 1 | AAFrequency.completeConsensus(consensus, profiles, 0, 5, true, false, |
219 | 4); | |
220 | ||
221 | 1 | Annotation ann = consensus.annotations[0]; |
222 | 1 | assertEquals("C 100%", ann.description); |
223 | 1 | assertEquals("C", ann.displayCharacter); |
224 | 1 | ann = consensus.annotations[1]; |
225 | 1 | assertEquals("A 100%", ann.description); |
226 | 1 | assertEquals("A", ann.displayCharacter); |
227 | 1 | ann = consensus.annotations[2]; |
228 | 1 | assertEquals("[CG] 50%", ann.description); |
229 | 1 | assertEquals("+", ann.displayCharacter); |
230 | 1 | ann = consensus.annotations[3]; |
231 | 1 | assertEquals("", ann.description); |
232 | 1 | assertEquals("-", ann.displayCharacter); |
233 | 1 | ann = consensus.annotations[4]; |
234 | 1 | assertEquals("T 75%", ann.description); |
235 | 1 | assertEquals("T", ann.displayCharacter); |
236 | } | |
237 | ||
238 | /** | |
239 | * Test to include rounding down of a non-zero count to 0% (JAL-3202) | |
240 | */ | |
241 | 1 | @Test(groups = { "Functional" }) |
242 | public void testExtractProfile() | |
243 | { | |
244 | /* | |
245 | * 200 sequences of which 30 gapped (170 ungapped) | |
246 | * max count 70 for modal residue 'G' | |
247 | */ | |
248 | 1 | ProfileI profile = new Profile(200, 30, 70, "G"); |
249 | 1 | ResidueCount counts = new ResidueCount(); |
250 | 1 | counts.put('G', 70); |
251 | 1 | counts.put('R', 60); |
252 | 1 | counts.put('L', 38); |
253 | 1 | counts.put('H', 2); |
254 | 1 | profile.setCounts(counts); |
255 | ||
256 | /* | |
257 | * [0, noOfValues, totalPercent, char1, count1, ...] | |
258 | * G: 70/170 = 41.2 = 41 | |
259 | * R: 60/170 = 35.3 = 35 | |
260 | * L: 38/170 = 22.3 = 22 | |
261 | * H: 2/170 = 1 | |
262 | * total (rounded) percentages = 99 | |
263 | */ | |
264 | 1 | int[] extracted = AAFrequency.extractProfile(profile, true); |
265 | 1 | int[] expected = new int[] { 0, 4, 99, 'G', 41, 'R', 35, 'L', 22, 'H', |
266 | 1 }; | |
267 | 1 | org.testng.Assert.assertEquals(extracted, expected); |
268 | ||
269 | /* | |
270 | * add some counts of 1; these round down to 0% and should be discarded | |
271 | */ | |
272 | 1 | counts.put('G', 68); // 68/170 = 40% exactly (percentages now total 98) |
273 | 1 | counts.put('Q', 1); |
274 | 1 | counts.put('K', 1); |
275 | 1 | extracted = AAFrequency.extractProfile(profile, true); |
276 | 1 | expected = new int[] { 0, 4, 98, 'G', 40, 'R', 35, 'L', 22, 'H', 1 }; |
277 | 1 | org.testng.Assert.assertEquals(extracted, expected); |
278 | ||
279 | } | |
280 | ||
281 | /** | |
282 | * Tests for the profile calculation where gaps are included i.e. the | |
283 | * denominator is the total number of sequences in the column | |
284 | */ | |
285 | 1 | @Test(groups = { "Functional" }) |
286 | public void testExtractProfile_countGaps() | |
287 | { | |
288 | /* | |
289 | * 200 sequences of which 30 gapped (170 ungapped) | |
290 | * max count 70 for modal residue 'G' | |
291 | */ | |
292 | 1 | ProfileI profile = new Profile(200, 30, 70, "G"); |
293 | 1 | ResidueCount counts = new ResidueCount(); |
294 | 1 | counts.put('G', 70); |
295 | 1 | counts.put('R', 60); |
296 | 1 | counts.put('L', 38); |
297 | 1 | counts.put('H', 2); |
298 | 1 | profile.setCounts(counts); |
299 | ||
300 | /* | |
301 | * [0, noOfValues, totalPercent, char1, count1, ...] | |
302 | * G: 70/200 = 35% | |
303 | * R: 60/200 = 30% | |
304 | * L: 38/200 = 19% | |
305 | * H: 2/200 = 1% | |
306 | * total (rounded) percentages = 85 | |
307 | */ | |
308 | 1 | int[] extracted = AAFrequency.extractProfile(profile, false); |
309 | 1 | int[] expected = new int[] { AlignmentAnnotation.SEQUENCE_PROFILE, 4, |
310 | 85, 'G', 35, 'R', 30, 'L', 19, 'H', 1 }; | |
311 | 1 | org.testng.Assert.assertEquals(extracted, expected); |
312 | ||
313 | /* | |
314 | * add some counts of 1; these round down to 0% and should be discarded | |
315 | */ | |
316 | 1 | counts.put('G', 68); // 68/200 = 34% |
317 | 1 | counts.put('Q', 1); |
318 | 1 | counts.put('K', 1); |
319 | 1 | extracted = AAFrequency.extractProfile(profile, false); |
320 | 1 | expected = new int[] { AlignmentAnnotation.SEQUENCE_PROFILE, 4, 84, 'G', |
321 | 34, 'R', 30, 'L', 19, 'H', 1 }; | |
322 | 1 | org.testng.Assert.assertEquals(extracted, expected); |
323 | ||
324 | } | |
325 | ||
326 | 1 | @Test(groups = { "Functional" }) |
327 | public void testExtractCdnaProfile() | |
328 | { | |
329 | /* | |
330 | * 200 sequences of which 30 gapped (170 ungapped) | |
331 | * max count 70 for modal residue 'G' | |
332 | */ | |
333 | 1 | Hashtable profile = new Hashtable(); |
334 | ||
335 | /* | |
336 | * cdna profile is {seqCount, ungappedCount, codonCount1, ...codonCount64} | |
337 | * where 1..64 positions correspond to encoded codons | |
338 | * see CodingUtils.encodeCodon() | |
339 | */ | |
340 | 1 | int[] codonCounts = new int[66]; |
341 | 1 | char[] codon1 = new char[] { 'G', 'C', 'A' }; |
342 | 1 | char[] codon2 = new char[] { 'c', 'C', 'A' }; |
343 | 1 | char[] codon3 = new char[] { 't', 'g', 'A' }; |
344 | 1 | char[] codon4 = new char[] { 'G', 'C', 't' }; |
345 | 1 | int encoded1 = CodingUtils.encodeCodon(codon1); |
346 | 1 | int encoded2 = CodingUtils.encodeCodon(codon2); |
347 | 1 | int encoded3 = CodingUtils.encodeCodon(codon3); |
348 | 1 | int encoded4 = CodingUtils.encodeCodon(codon4); |
349 | 1 | codonCounts[2 + encoded1] = 30; |
350 | 1 | codonCounts[2 + encoded2] = 70; |
351 | 1 | codonCounts[2 + encoded3] = 9; |
352 | 1 | codonCounts[2 + encoded4] = 1; |
353 | 1 | codonCounts[0] = 120; |
354 | 1 | codonCounts[1] = 110; |
355 | 1 | profile.put(AAFrequency.PROFILE, codonCounts); |
356 | ||
357 | /* | |
358 | * [0, noOfValues, totalPercent, char1, count1, ...] | |
359 | * codon1: 30/110 = 27.2 = 27% | |
360 | * codon2: 70/110 = 63.6% = 63% | |
361 | * codon3: 9/110 = 8.1% = 8% | |
362 | * codon4: 1/110 = 0.9% = 0% should be discarded | |
363 | * total (rounded) percentages = 98 | |
364 | */ | |
365 | 1 | int[] extracted = AAFrequency.extractCdnaProfile(profile, true); |
366 | 1 | int[] expected = new int[] { AlignmentAnnotation.CDNA_PROFILE, 3, 98, |
367 | encoded2, 63, encoded1, 27, encoded3, 8 }; | |
368 | 1 | org.testng.Assert.assertEquals(extracted, expected); |
369 | } | |
370 | ||
371 | 1 | @Test(groups = { "Functional" }) |
372 | public void testExtractCdnaProfile_countGaps() | |
373 | { | |
374 | /* | |
375 | * 200 sequences of which 30 gapped (170 ungapped) | |
376 | * max count 70 for modal residue 'G' | |
377 | */ | |
378 | 1 | Hashtable profile = new Hashtable(); |
379 | ||
380 | /* | |
381 | * cdna profile is {seqCount, ungappedCount, codonCount1, ...codonCount64} | |
382 | * where 1..64 positions correspond to encoded codons | |
383 | * see CodingUtils.encodeCodon() | |
384 | */ | |
385 | 1 | int[] codonCounts = new int[66]; |
386 | 1 | char[] codon1 = new char[] { 'G', 'C', 'A' }; |
387 | 1 | char[] codon2 = new char[] { 'c', 'C', 'A' }; |
388 | 1 | char[] codon3 = new char[] { 't', 'g', 'A' }; |
389 | 1 | char[] codon4 = new char[] { 'G', 'C', 't' }; |
390 | 1 | int encoded1 = CodingUtils.encodeCodon(codon1); |
391 | 1 | int encoded2 = CodingUtils.encodeCodon(codon2); |
392 | 1 | int encoded3 = CodingUtils.encodeCodon(codon3); |
393 | 1 | int encoded4 = CodingUtils.encodeCodon(codon4); |
394 | 1 | codonCounts[2 + encoded1] = 30; |
395 | 1 | codonCounts[2 + encoded2] = 70; |
396 | 1 | codonCounts[2 + encoded3] = 9; |
397 | 1 | codonCounts[2 + encoded4] = 1; |
398 | 1 | codonCounts[0] = 120; |
399 | 1 | codonCounts[1] = 110; |
400 | 1 | profile.put(AAFrequency.PROFILE, codonCounts); |
401 | ||
402 | /* | |
403 | * [0, noOfValues, totalPercent, char1, count1, ...] | |
404 | * codon1: 30/120 = 25% | |
405 | * codon2: 70/120 = 58.3 = 58% | |
406 | * codon3: 9/120 = 7.5 = 7% | |
407 | * codon4: 1/120 = 0.8 = 0% should be discarded | |
408 | * total (rounded) percentages = 90 | |
409 | */ | |
410 | 1 | int[] extracted = AAFrequency.extractCdnaProfile(profile, false); |
411 | 1 | int[] expected = new int[] { AlignmentAnnotation.CDNA_PROFILE, 3, 90, |
412 | encoded2, 58, encoded1, 25, encoded3, 7 }; | |
413 | 1 | org.testng.Assert.assertEquals(extracted, expected); |
414 | } | |
415 | } |