Class |
Line # |
Actions |
|||
---|---|---|---|---|---|
ComparisonTest | 36 | 123 | 11 |
1 | /* | |
2 | * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) | |
3 | * Copyright (C) $$Year-Rel$$ The Jalview Authors | |
4 | * | |
5 | * This file is part of Jalview. | |
6 | * | |
7 | * Jalview is free software: you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation, either version 3 | |
10 | * of the License, or (at your option) any later version. | |
11 | * | |
12 | * Jalview is distributed in the hope that it will be useful, but | |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty | |
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR | |
15 | * PURPOSE. See the GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with Jalview. If not, see <http://www.gnu.org/licenses/>. | |
19 | * The Jalview Authors are detailed in the 'AUTHORS' file. | |
20 | */ | |
21 | package jalview.util; | |
22 | ||
23 | import static org.testng.AssertJUnit.assertEquals; | |
24 | import static org.testng.AssertJUnit.assertFalse; | |
25 | import static org.testng.AssertJUnit.assertTrue; | |
26 | ||
27 | import org.testng.annotations.BeforeClass; | |
28 | import org.testng.annotations.BeforeMethod; | |
29 | import org.testng.annotations.Test; | |
30 | ||
31 | import jalview.bin.Cache; | |
32 | import jalview.datamodel.Sequence; | |
33 | import jalview.datamodel.SequenceI; | |
34 | import jalview.gui.JvOptionPane; | |
35 | ||
36 | public class ComparisonTest | |
37 | { | |
38 | ||
39 | 1 | @BeforeClass(alwaysRun = true) |
40 | public void setUpJvOptionPane() | |
41 | { | |
42 | 1 | JvOptionPane.setInteractiveMode(false); |
43 | 1 | JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION); |
44 | } | |
45 | ||
46 | 9 | @BeforeMethod(alwaysRun = true) |
47 | public void loadProperties() | |
48 | { | |
49 | 9 | Cache.loadProperties("test/jalview/util/comparisonTestProps.jvprops"); |
50 | } | |
51 | ||
52 | 1 | @Test(groups = { "Functional" }) |
53 | public void testIsGap() | |
54 | { | |
55 | 1 | assertTrue(Comparison.isGap('-')); |
56 | 1 | assertTrue(Comparison.isGap('.')); |
57 | 1 | assertTrue(Comparison.isGap(' ')); |
58 | 1 | assertFalse(Comparison.isGap('X')); |
59 | 1 | assertFalse(Comparison.isGap('x')); |
60 | 1 | assertFalse(Comparison.isGap('*')); |
61 | 1 | assertFalse(Comparison.isGap('G')); |
62 | } | |
63 | ||
64 | /** | |
65 | * Test for isNucleotide is that sequences in a dataset are more than 85% | |
66 | * AGCTU. Test is not case-sensitive and ignores gaps. | |
67 | */ | |
68 | 1 | @Test(groups = { "Functional" }) |
69 | public void testIsNucleotide_sequences() | |
70 | { | |
71 | 1 | SequenceI seq = new Sequence("eightypercent+fivepercent", "agctuagcPV"); |
72 | 1 | assertFalse(Comparison.isNucleotide(new SequenceI[] { seq })); |
73 | 1 | assertFalse( |
74 | Comparison.isNucleotide(new SequenceI[][] | |
75 | { new SequenceI[] { seq } })); | |
76 | ||
77 | 1 | seq = new Sequence("eightyfivepercent+tenpercent", |
78 | "agctuagcgVagctuagcuVE"); | |
79 | 1 | assertFalse(Comparison.isNucleotide(new SequenceI[] { seq })); |
80 | ||
81 | 1 | seq = new Sequence(">nineyfivepercent+0percent", |
82 | "aagctuagcgEagctuagcua"); | |
83 | 1 | assertFalse(Comparison.isNucleotide(new SequenceI[] { seq })); |
84 | ||
85 | 1 | seq = new Sequence("nineyfivepercent+0percent", "agctuagcgEagctuagcua"); |
86 | 1 | assertFalse(Comparison.isNucleotide(new SequenceI[] { seq })); |
87 | ||
88 | 1 | seq = new Sequence("nineyfivepercent+fivepercent", |
89 | "agctuagcgWagctuagcua"); | |
90 | 1 | assertTrue(Comparison.isNucleotide(new SequenceI[] { seq })); |
91 | ||
92 | 1 | seq = new Sequence("nineyfivepercent+tenpercent", |
93 | "agctuagcgEWWctuagcua"); | |
94 | 1 | assertFalse(Comparison.isNucleotide(new SequenceI[] { seq })); |
95 | ||
96 | 1 | seq = new Sequence("eightyfivepercent+fifteenpercent", |
97 | "agctuagcgWWWctuagcua"); | |
98 | 1 | assertFalse(Comparison.isNucleotide(new SequenceI[] { seq })); |
99 | ||
100 | 1 | seq = new Sequence("eightyfivepercentgapped", |
101 | "--agc--tuA--GCPV-a---gct-uA-GC---UV"); | |
102 | 1 | assertFalse(Comparison.isNucleotide(new SequenceI[] { seq })); |
103 | ||
104 | 1 | seq = new Sequence("ninetyfivepercentgapped", |
105 | "ag--ct-u-a---gc---g----aag--c---tuagcuV"); | |
106 | 1 | assertTrue(Comparison.isNucleotide(new SequenceI[] { seq })); |
107 | ||
108 | 1 | seq = new Sequence("allgap", "---------"); |
109 | 1 | assertFalse(Comparison.isNucleotide(new SequenceI[] { seq })); |
110 | ||
111 | 1 | seq = new Sequence("DNA", "ACTugGCCAG"); |
112 | 1 | SequenceI seq2 = new Sequence("Protein", "FLIMVSPTYW"); |
113 | /* | |
114 | * 90% DNA but one protein sequence - expect false | |
115 | */ | |
116 | 1 | assertFalse( |
117 | Comparison.isNucleotide(new SequenceI[] | |
118 | { seq, seq, seq, seq, seq, seq, seq, seq, seq, seq2 })); | |
119 | 1 | assertFalse( |
120 | Comparison.isNucleotide(new SequenceI[][] | |
121 | { new SequenceI[] { seq }, new SequenceI[] { seq, seq, seq }, | |
122 | new SequenceI[] | |
123 | { seq, seq, seq, seq, seq, seq2 } })); | |
124 | /* | |
125 | * 80% DNA but one protein sequence - Expect false | |
126 | */ | |
127 | 1 | assertFalse( |
128 | Comparison.isNucleotide(new SequenceI[] | |
129 | { seq, seq, seq, seq, seq, seq, seq, seq, seq2, seq2 })); | |
130 | 1 | assertFalse( |
131 | Comparison.isNucleotide(new SequenceI[][] | |
132 | { new SequenceI[] { seq }, new SequenceI[] { seq, seq, seq }, | |
133 | new SequenceI[] | |
134 | { seq, seq, seq, seq, seq2, seq2, null } })); | |
135 | ||
136 | 1 | String seqString = "aaatatatatgEcctgagtcgt"; |
137 | 1 | seq = new Sequence("ShortProteinThatLooksLikeDNA", seqString); |
138 | 1 | assertFalse(Comparison.isNucleotide(new SequenceI[] { seq })); |
139 | 1 | seq = new Sequence("LongProteinThatLooksLikeDNA", seqString.repeat(10)); |
140 | 1 | assertTrue(Comparison.isNucleotide(new SequenceI[] { seq })); |
141 | ||
142 | 1 | assertFalse(Comparison.isNucleotide((SequenceI[]) null)); |
143 | 1 | assertFalse(Comparison.isNucleotide((SequenceI[][]) null)); |
144 | } | |
145 | ||
146 | /** | |
147 | * Test the percentage identity calculation for two sequences | |
148 | */ | |
149 | 1 | @Test(groups = { "Functional" }) |
150 | public void testPID_includingGaps() | |
151 | { | |
152 | 1 | String seq1 = "ABCDEFG"; // extra length here is ignored |
153 | 1 | String seq2 = "abcdef"; |
154 | 1 | assertEquals("identical", 100f, Comparison.PID(seq1, seq2), 0.001f); |
155 | ||
156 | // comparison range defaults to length of first sequence | |
157 | 1 | seq2 = "abcdefghijklmnopqrstuvwxyz"; |
158 | 1 | assertEquals("identical", 100f, Comparison.PID(seq1, seq2), 0.001f); |
159 | ||
160 | // 5 identical, 2 gap-gap, 2 gap-residue, 1 mismatch | |
161 | 1 | seq1 = "a--b-cdefh"; |
162 | 1 | seq2 = "a---bcdefg"; |
163 | 1 | int length = seq1.length(); |
164 | ||
165 | // match gap-residue, match gap-gap: 9/10 identical | |
166 | // TODO should gap-gap be included in a PID score? JAL-791 | |
167 | 1 | assertEquals(90f, Comparison.PID(seq1, seq2, 0, length, true, false), |
168 | 0.001f); | |
169 | // overloaded version of the method signature above: | |
170 | 1 | assertEquals(90f, Comparison.PID(seq1, seq2), 0.001f); |
171 | ||
172 | // don't match gap-residue, match gap-gap: 7/10 identical | |
173 | // TODO should gap-gap be included in a PID score? | |
174 | 1 | assertEquals(70f, Comparison.PID(seq1, seq2, 0, length, false, false), |
175 | 0.001f); | |
176 | } | |
177 | ||
178 | 1 | @Test(groups = { "Functional" }) |
179 | public void testIsNucleotide() | |
180 | { | |
181 | 1 | assertTrue(Comparison.isNucleotide('a')); |
182 | 1 | assertTrue(Comparison.isNucleotide('A')); |
183 | 1 | assertTrue(Comparison.isNucleotide('c')); |
184 | 1 | assertTrue(Comparison.isNucleotide('C')); |
185 | 1 | assertTrue(Comparison.isNucleotide('g')); |
186 | 1 | assertTrue(Comparison.isNucleotide('G')); |
187 | 1 | assertTrue(Comparison.isNucleotide('t')); |
188 | 1 | assertTrue(Comparison.isNucleotide('T')); |
189 | 1 | assertTrue(Comparison.isNucleotide('u')); |
190 | 1 | assertTrue(Comparison.isNucleotide('U')); |
191 | 1 | assertFalse(Comparison.isNucleotide('-')); |
192 | 1 | assertFalse(Comparison.isNucleotide('P')); |
193 | } | |
194 | ||
195 | 1 | @Test(groups = { "Functional" }) |
196 | public void testIsNucleotideAmbiguity() | |
197 | { | |
198 | 1 | assertTrue(Comparison.isNucleotide('b', true)); |
199 | 1 | assertTrue(Comparison.isNucleotide('B', true)); |
200 | 1 | assertTrue(Comparison.isNucleotide('d', true)); |
201 | 1 | assertTrue(Comparison.isNucleotide('V', true)); |
202 | 1 | assertTrue(Comparison.isNucleotide('M', true)); |
203 | 1 | assertTrue(Comparison.isNucleotide('s', true)); |
204 | 1 | assertTrue(Comparison.isNucleotide('W', true)); |
205 | 1 | assertTrue(Comparison.isNucleotide('x', true)); |
206 | 1 | assertTrue(Comparison.isNucleotide('Y', true)); |
207 | 1 | assertTrue(Comparison.isNucleotide('r', true)); |
208 | 1 | assertTrue(Comparison.isNucleotide('i', true)); |
209 | 1 | assertFalse(Comparison.isNucleotide('-', true)); |
210 | 1 | assertFalse(Comparison.isNucleotide('n', true)); |
211 | 1 | assertFalse(Comparison.isNucleotide('P', true)); |
212 | } | |
213 | ||
214 | /** | |
215 | * Test the percentage identity calculation for two sequences | |
216 | */ | |
217 | 1 | @Test(groups = { "Functional" }) |
218 | public void testPID_ungappedOnly() | |
219 | { | |
220 | // 5 identical, 2 gap-gap, 2 gap-residue, 1 mismatch | |
221 | // the extra length of seq1 is ignored | |
222 | 1 | String seq1 = "a--b-cdefhr"; |
223 | 1 | String seq2 = "a---bcdefg"; |
224 | 1 | int length = seq1.length(); |
225 | ||
226 | /* | |
227 | * As currently coded, 'ungappedOnly' ignores gap-residue but counts | |
228 | * gap-gap. Is this a bug - should gap-gap also be ignored, giving a PID of | |
229 | * 5/6? | |
230 | * | |
231 | * Note also there is no variant of the calculation that penalises | |
232 | * gap-residue i.e. counts it as a mismatch. This would give a score of 5/8 | |
233 | * (if we ignore gap-gap) or 5/10 (if we count gap-gap as a match). | |
234 | */ | |
235 | // match gap-residue, match gap-gap: 7/8 identical | |
236 | 1 | assertEquals(87.5f, Comparison.PID(seq1, seq2, 0, length, true, true), |
237 | 0.001f); | |
238 | ||
239 | // don't match gap-residue with 'ungapped only' - same as above | |
240 | 1 | assertEquals(87.5f, Comparison.PID(seq1, seq2, 0, length, false, true), |
241 | 0.001f); | |
242 | } | |
243 | ||
244 | 1 | @Test(groups = { "Functional" }) |
245 | public void testIsNucleotideSequence() | |
246 | { | |
247 | 1 | assertFalse(Comparison.isNucleotideSequence(null, true)); |
248 | 1 | assertTrue(Comparison.isNucleotideSequence("", true)); |
249 | 1 | assertTrue(Comparison.isNucleotideSequence("aAgGcCtTuU", true)); |
250 | 1 | assertTrue(Comparison.isNucleotideSequence("aAgGcCtTuU", false)); |
251 | 1 | assertFalse(Comparison.isNucleotideSequence("xAgGcCtTuU", false)); |
252 | 1 | assertFalse(Comparison.isNucleotideSequence("aAgGcCtTuUx", false)); |
253 | 1 | assertTrue(Comparison.isNucleotideSequence("a A-g.GcCtTuU", true)); |
254 | 1 | assertFalse(Comparison.isNucleotideSequence("a A-g.GcCtTuU", false)); |
255 | 1 | assertFalse(Comparison.isNucleotideSequence("gatactawgataca", false)); |
256 | // including nucleotide ambiguity | |
257 | 1 | assertTrue( |
258 | Comparison.isNucleotideSequence("gatacaWgataca", true, true)); | |
259 | 1 | assertFalse( |
260 | Comparison.isNucleotideSequence("gatacaEgataca", true, true)); | |
261 | ||
262 | // not quite all nucleotides and ambiguity codes | |
263 | 1 | Sequence seq = new Sequence("Ambiguity DNA codes", "gatacagatacabve"); |
264 | 1 | assertFalse(Comparison.isNucleotide(seq)); |
265 | // all nucleotide and nucleotide ambiguity codes | |
266 | 1 | seq = new Sequence("Ambiguity DNA codes", "gatacagatacabvt"); |
267 | 1 | assertFalse(Comparison.isNucleotide(seq)); |
268 | 1 | seq = new Sequence("Ambiguity DNA codes", "agatacabb"); |
269 | 1 | assertFalse(Comparison.isNucleotide(seq)); |
270 | // 55% nucleotide with only Xs or Ns | |
271 | 1 | assertTrue(Comparison |
272 | .isNucleotide(new Sequence("dnaWithXs", "gatacaXXXX"))); | |
273 | 1 | assertTrue(Comparison |
274 | .isNucleotide(new Sequence("dnaWithXs", "gatacaNNNN"))); | |
275 | 1 | assertFalse(Comparison |
276 | .isNucleotide(new Sequence("dnaWithXs", "gatacXXXXX"))); | |
277 | 1 | assertFalse(Comparison |
278 | .isNucleotide(new Sequence("dnaWithXs", "gatacNNNNN"))); | |
279 | } | |
280 | ||
281 | 1 | @Test(groups = { "Functional" }) |
282 | public void testIsSameResidue() | |
283 | { | |
284 | 1 | assertTrue(Comparison.isSameResidue('a', 'a', false)); |
285 | 1 | assertTrue(Comparison.isSameResidue('a', 'a', true)); |
286 | 1 | assertTrue(Comparison.isSameResidue('A', 'a', false)); |
287 | 1 | assertTrue(Comparison.isSameResidue('a', 'A', false)); |
288 | ||
289 | 1 | assertFalse(Comparison.isSameResidue('a', 'A', true)); |
290 | 1 | assertFalse(Comparison.isSameResidue('A', 'a', true)); |
291 | } | |
292 | ||
293 | 1 | @Test(groups = { "Functional" }) |
294 | public void testNucleotideProportion() | |
295 | { | |
296 | 1 | assertFalse(Comparison.myShortSequenceNucleotideProportionCount(2, 3)); |
297 | 1 | assertTrue(Comparison.myShortSequenceNucleotideProportionCount(3, 3)); |
298 | 1 | assertFalse(Comparison.myShortSequenceNucleotideProportionCount(2, 4)); |
299 | 1 | assertTrue(Comparison.myShortSequenceNucleotideProportionCount(3, 4)); |
300 | 1 | assertFalse( |
301 | Comparison.myShortSequenceNucleotideProportionCount(17, 20)); | |
302 | 1 | assertTrue(Comparison.myShortSequenceNucleotideProportionCount(18, 20)); |
303 | 1 | assertFalse( |
304 | Comparison.myShortSequenceNucleotideProportionCount(38, 50)); | |
305 | 1 | assertTrue(Comparison.myShortSequenceNucleotideProportionCount(39, 50)); |
306 | 1 | assertFalse( |
307 | Comparison.myShortSequenceNucleotideProportionCount(54, 100)); | |
308 | 1 | assertTrue( |
309 | Comparison.myShortSequenceNucleotideProportionCount(55, 100)); | |
310 | } | |
311 | } |