Class |
Line # |
Actions |
|||
---|---|---|---|---|---|
AlignmentUtilsTests | 76 | 1,281 | 61 |
1 | /* | |
2 | * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) | |
3 | * Copyright (C) $$Year-Rel$$ The Jalview Authors | |
4 | * | |
5 | * This file is part of Jalview. | |
6 | * | |
7 | * Jalview is free software: you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation, either version 3 | |
10 | * of the License, or (at your option) any later version. | |
11 | * | |
12 | * Jalview is distributed in the hope that it will be useful, but | |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty | |
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR | |
15 | * PURPOSE. See the GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with Jalview. If not, see <http://www.gnu.org/licenses/>. | |
19 | * The Jalview Authors are detailed in the 'AUTHORS' file. | |
20 | */ | |
21 | package jalview.analysis; | |
22 | ||
23 | import static org.testng.Assert.assertNotEquals; | |
24 | import static org.testng.AssertJUnit.assertEquals; | |
25 | import static org.testng.AssertJUnit.assertFalse; | |
26 | import static org.testng.AssertJUnit.assertNotNull; | |
27 | import static org.testng.AssertJUnit.assertNull; | |
28 | import static org.testng.AssertJUnit.assertSame; | |
29 | import static org.testng.AssertJUnit.assertTrue; | |
30 | ||
31 | import java.awt.Color; | |
32 | import java.io.IOException; | |
33 | import java.util.ArrayList; | |
34 | import java.util.Arrays; | |
35 | import java.util.HashMap; | |
36 | import java.util.LinkedHashMap; | |
37 | import java.util.List; | |
38 | import java.util.Map; | |
39 | import java.util.Set; | |
40 | import java.util.SortedMap; | |
41 | import java.util.TreeMap; | |
42 | ||
43 | import org.testng.Assert; | |
44 | import org.testng.annotations.BeforeClass; | |
45 | import org.testng.annotations.DataProvider; | |
46 | import org.testng.annotations.Test; | |
47 | ||
48 | import jalview.datamodel.AlignedCodonFrame; | |
49 | import jalview.datamodel.Alignment; | |
50 | import jalview.datamodel.AlignmentAnnotation; | |
51 | import jalview.datamodel.AlignmentI; | |
52 | import jalview.datamodel.Annotation; | |
53 | import jalview.datamodel.ContactListI; | |
54 | import jalview.datamodel.ContactMatrixI; | |
55 | import jalview.datamodel.DBRefEntry; | |
56 | import jalview.datamodel.GeneLociI; | |
57 | import jalview.datamodel.Mapping; | |
58 | import jalview.datamodel.SearchResultMatchI; | |
59 | import jalview.datamodel.SearchResultsI; | |
60 | import jalview.datamodel.SeqDistanceContactMatrix; | |
61 | import jalview.datamodel.Sequence; | |
62 | import jalview.datamodel.SequenceFeature; | |
63 | import jalview.datamodel.SequenceGroup; | |
64 | import jalview.datamodel.SequenceI; | |
65 | import jalview.gui.JvOptionPane; | |
66 | import jalview.io.AppletFormatAdapter; | |
67 | import jalview.io.DataSourceType; | |
68 | import jalview.io.FileFormat; | |
69 | import jalview.io.FileFormatI; | |
70 | import jalview.io.FormatAdapter; | |
71 | import jalview.io.gff.SequenceOntologyI; | |
72 | import jalview.util.Comparison; | |
73 | import jalview.util.MapList; | |
74 | import jalview.util.MappingUtils; | |
75 | ||
76 | public class AlignmentUtilsTests | |
77 | { | |
78 | private static Sequence ts = new Sequence("short", | |
79 | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklm"); | |
80 | ||
81 | 1 | @BeforeClass(alwaysRun = true) |
82 | public void setUpJvOptionPane() | |
83 | { | |
84 | 1 | JvOptionPane.setInteractiveMode(false); |
85 | 1 | JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION); |
86 | ||
87 | 1 | AlignmentAnnotation ann1 = new AlignmentAnnotation( |
88 | "Secondary Structure", "Secondary Structure", | |
89 | new Annotation[] {}); | |
90 | 1 | AlignmentAnnotation ann2 = new AlignmentAnnotation("jnetpred", |
91 | "jnetpred", new Annotation[] {}); | |
92 | 1 | AlignmentAnnotation ann3 = new AlignmentAnnotation("Temp", "Temp", |
93 | new Annotation[] {}); | |
94 | 1 | AlignmentAnnotation ann4 = new AlignmentAnnotation("Temp", "Temp", |
95 | new Annotation[] {}); | |
96 | ||
97 | 1 | AlignmentAnnotation[] anns1 = new AlignmentAnnotation[] { ann1, ann3, |
98 | ann4 }; | |
99 | ||
100 | 1 | AlignmentAnnotation[] anns2 = new AlignmentAnnotation[] { ann2, ann3, |
101 | ann4 }; | |
102 | ||
103 | 1 | AlignmentAnnotation[] anns3 = new AlignmentAnnotation[] { ann3, ann4 }; |
104 | ||
105 | 1 | AlignmentAnnotation[] anns4 = new AlignmentAnnotation[0]; |
106 | ||
107 | 1 | AlignmentAnnotation[] anns5 = new AlignmentAnnotation[] { ann1, ann2, |
108 | ann3, ann4 }; | |
109 | } | |
110 | ||
111 | 1 | @Test(groups = { "Functional" }) |
112 | public void testExpandContext() | |
113 | { | |
114 | 1 | AlignmentI al = new Alignment(new Sequence[] {}); |
115 | 6 | for (int i = 4; i < 14; i += 2) |
116 | { | |
117 | 5 | SequenceI s1 = ts.deriveSequence().getSubSequence(i, i + 7); |
118 | 5 | al.addSequence(s1); |
119 | } | |
120 | 1 | System.out.println(new AppletFormatAdapter() |
121 | .formatSequences(FileFormat.Clustal, al, true)); | |
122 | 27 | for (int flnk = -1; flnk < 25; flnk++) |
123 | { | |
124 | 26 | AlignmentI exp = AlignmentUtils.expandContext(al, flnk); |
125 | 26 | System.out.println("\nFlank size: " + flnk); |
126 | 26 | System.out.println(new AppletFormatAdapter() |
127 | .formatSequences(FileFormat.Clustal, exp, true)); | |
128 | 26 | if (flnk == -1) |
129 | { | |
130 | /* | |
131 | * Full expansion to complete sequences | |
132 | */ | |
133 | 1 | for (SequenceI sq : exp.getSequences()) |
134 | { | |
135 | 5 | String ung = sq.getSequenceAsString().replaceAll("-+", ""); |
136 | 5 | final String errorMsg = "Flanking sequence not the same as original dataset sequence.\n" |
137 | + ung + "\n" | |
138 | + sq.getDatasetSequence().getSequenceAsString(); | |
139 | 5 | assertTrue(errorMsg, ung.equalsIgnoreCase( |
140 | sq.getDatasetSequence().getSequenceAsString())); | |
141 | } | |
142 | } | |
143 | 25 | else if (flnk == 24) |
144 | { | |
145 | /* | |
146 | * Last sequence is fully expanded, others have leading gaps to match | |
147 | */ | |
148 | 1 | assertTrue(exp.getSequenceAt(4).getSequenceAsString() |
149 | .startsWith("abc")); | |
150 | 1 | assertTrue(exp.getSequenceAt(3).getSequenceAsString() |
151 | .startsWith("--abc")); | |
152 | 1 | assertTrue(exp.getSequenceAt(2).getSequenceAsString() |
153 | .startsWith("----abc")); | |
154 | 1 | assertTrue(exp.getSequenceAt(1).getSequenceAsString() |
155 | .startsWith("------abc")); | |
156 | 1 | assertTrue(exp.getSequenceAt(0).getSequenceAsString() |
157 | .startsWith("--------abc")); | |
158 | } | |
159 | } | |
160 | } | |
161 | ||
162 | /** | |
163 | * Test that annotations are correctly adjusted by expandContext | |
164 | */ | |
165 | 1 | @Test(groups = { "Functional" }) |
166 | public void testExpandContext_annotation() | |
167 | { | |
168 | 1 | AlignmentI al = new Alignment(new Sequence[] {}); |
169 | 1 | SequenceI ds = new Sequence("Seq1", "ABCDEFGHI"); |
170 | // subsequence DEF: | |
171 | 1 | SequenceI seq1 = ds.deriveSequence().getSubSequence(3, 6); |
172 | 1 | al.addSequence(seq1); |
173 | ||
174 | /* | |
175 | * Annotate DEF with 4/5/6 respectively | |
176 | */ | |
177 | 1 | Annotation[] anns = new Annotation[] { new Annotation(4), |
178 | new Annotation(5), new Annotation(6) }; | |
179 | 1 | AlignmentAnnotation ann = new AlignmentAnnotation("SS", |
180 | "secondary structure", anns); | |
181 | 1 | seq1.addAlignmentAnnotation(ann); |
182 | ||
183 | /* | |
184 | * The annotations array should match aligned positions | |
185 | */ | |
186 | 1 | assertEquals(3, ann.annotations.length); |
187 | 1 | assertEquals(4, ann.annotations[0].value, 0.001); |
188 | 1 | assertEquals(5, ann.annotations[1].value, 0.001); |
189 | 1 | assertEquals(6, ann.annotations[2].value, 0.001); |
190 | ||
191 | /* | |
192 | * Check annotation to sequence position mappings before expanding the | |
193 | * sequence; these are set up in Sequence.addAlignmentAnnotation -> | |
194 | * Annotation.setSequenceRef -> createSequenceMappings | |
195 | */ | |
196 | 1 | assertNull(ann.getAnnotationForPosition(1)); |
197 | 1 | assertNull(ann.getAnnotationForPosition(2)); |
198 | 1 | assertNull(ann.getAnnotationForPosition(3)); |
199 | 1 | assertEquals(4, ann.getAnnotationForPosition(4).value, 0.001); |
200 | 1 | assertEquals(5, ann.getAnnotationForPosition(5).value, 0.001); |
201 | 1 | assertEquals(6, ann.getAnnotationForPosition(6).value, 0.001); |
202 | 1 | assertNull(ann.getAnnotationForPosition(7)); |
203 | 1 | assertNull(ann.getAnnotationForPosition(8)); |
204 | 1 | assertNull(ann.getAnnotationForPosition(9)); |
205 | ||
206 | /* | |
207 | * Expand the subsequence to the full sequence abcDEFghi | |
208 | */ | |
209 | 1 | AlignmentI expanded = AlignmentUtils.expandContext(al, -1); |
210 | 1 | assertEquals("abcDEFghi", |
211 | expanded.getSequenceAt(0).getSequenceAsString()); | |
212 | ||
213 | /* | |
214 | * Confirm the alignment and sequence have the same SS annotation, | |
215 | * referencing the expanded sequence | |
216 | */ | |
217 | 1 | ann = expanded.getSequenceAt(0).getAnnotation()[0]; |
218 | 1 | assertSame(ann, expanded.getAlignmentAnnotation()[0]); |
219 | 1 | assertSame(expanded.getSequenceAt(0), ann.sequenceRef); |
220 | ||
221 | /* | |
222 | * The annotations array should have null values except for annotated | |
223 | * positions | |
224 | */ | |
225 | 1 | assertNull(ann.annotations[0]); |
226 | 1 | assertNull(ann.annotations[1]); |
227 | 1 | assertNull(ann.annotations[2]); |
228 | 1 | assertEquals(4, ann.annotations[3].value, 0.001); |
229 | 1 | assertEquals(5, ann.annotations[4].value, 0.001); |
230 | 1 | assertEquals(6, ann.annotations[5].value, 0.001); |
231 | 1 | assertNull(ann.annotations[6]); |
232 | 1 | assertNull(ann.annotations[7]); |
233 | 1 | assertNull(ann.annotations[8]); |
234 | ||
235 | /* | |
236 | * sequence position mappings should be unchanged | |
237 | */ | |
238 | 1 | assertNull(ann.getAnnotationForPosition(1)); |
239 | 1 | assertNull(ann.getAnnotationForPosition(2)); |
240 | 1 | assertNull(ann.getAnnotationForPosition(3)); |
241 | 1 | assertEquals(4, ann.getAnnotationForPosition(4).value, 0.001); |
242 | 1 | assertEquals(5, ann.getAnnotationForPosition(5).value, 0.001); |
243 | 1 | assertEquals(6, ann.getAnnotationForPosition(6).value, 0.001); |
244 | 1 | assertNull(ann.getAnnotationForPosition(7)); |
245 | 1 | assertNull(ann.getAnnotationForPosition(8)); |
246 | 1 | assertNull(ann.getAnnotationForPosition(9)); |
247 | } | |
248 | ||
249 | /** | |
250 | * Test method that returns a map of lists of sequences by sequence name. | |
251 | * | |
252 | * @throws IOException | |
253 | */ | |
254 | 1 | @Test(groups = { "Functional" }) |
255 | public void testGetSequencesByName() throws IOException | |
256 | { | |
257 | 1 | final String data = ">Seq1Name\nKQYL\n" + ">Seq2Name\nRFPW\n" |
258 | + ">Seq1Name\nABCD\n"; | |
259 | 1 | AlignmentI al = loadAlignment(data, FileFormat.Fasta); |
260 | 1 | Map<String, List<SequenceI>> map = AlignmentUtils |
261 | .getSequencesByName(al); | |
262 | 1 | assertEquals(2, map.keySet().size()); |
263 | 1 | assertEquals(2, map.get("Seq1Name").size()); |
264 | 1 | assertEquals("KQYL", map.get("Seq1Name").get(0).getSequenceAsString()); |
265 | 1 | assertEquals("ABCD", map.get("Seq1Name").get(1).getSequenceAsString()); |
266 | 1 | assertEquals(1, map.get("Seq2Name").size()); |
267 | 1 | assertEquals("RFPW", map.get("Seq2Name").get(0).getSequenceAsString()); |
268 | } | |
269 | ||
270 | /** | |
271 | * Helper method to load an alignment and ensure dataset sequences are set up. | |
272 | * | |
273 | * @param data | |
274 | * @param format | |
275 | * TODO | |
276 | * @return | |
277 | * @throws IOException | |
278 | */ | |
279 | 1 | protected AlignmentI loadAlignment(final String data, FileFormatI format) |
280 | throws IOException | |
281 | { | |
282 | 1 | AlignmentI a = new FormatAdapter().readFile(data, DataSourceType.PASTE, |
283 | format); | |
284 | 1 | a.setDataset(null); |
285 | 1 | return a; |
286 | } | |
287 | ||
288 | /** | |
289 | * Test mapping of protein to cDNA, for the case where we have no sequence | |
290 | * cross-references, so mappings are made first-served 1-1 where sequences | |
291 | * translate. | |
292 | * | |
293 | * @throws IOException | |
294 | */ | |
295 | 1 | @Test(groups = { "Functional" }) |
296 | public void testMapProteinAlignmentToCdna_noXrefs() throws IOException | |
297 | { | |
298 | 1 | List<SequenceI> protseqs = new ArrayList<>(); |
299 | 1 | protseqs.add(new Sequence("UNIPROT|V12345", "EIQ")); |
300 | 1 | protseqs.add(new Sequence("UNIPROT|V12346", "EIQ")); |
301 | 1 | protseqs.add(new Sequence("UNIPROT|V12347", "SAR")); |
302 | 1 | AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3])); |
303 | 1 | protein.setDataset(null); |
304 | ||
305 | 1 | List<SequenceI> dnaseqs = new ArrayList<>(); |
306 | 1 | dnaseqs.add(new Sequence("EMBL|A11111", "TCAGCACGC")); // = SAR |
307 | 1 | dnaseqs.add(new Sequence("EMBL|A22222", "GAGATACAA")); // = EIQ |
308 | 1 | dnaseqs.add(new Sequence("EMBL|A33333", "GAAATCCAG")); // = EIQ |
309 | 1 | dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG")); // = EIQ |
310 | 1 | AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[4])); |
311 | 1 | cdna.setDataset(null); |
312 | ||
313 | 1 | assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna)); |
314 | ||
315 | // 3 mappings made, each from 1 to 1 sequence | |
316 | 1 | assertEquals(3, protein.getCodonFrames().size()); |
317 | 1 | assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size()); |
318 | 1 | assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size()); |
319 | 1 | assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size()); |
320 | ||
321 | // V12345 mapped to A22222 | |
322 | 1 | AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0)) |
323 | .get(0); | |
324 | 1 | assertEquals(1, acf.getdnaSeqs().length); |
325 | 1 | assertEquals(cdna.getSequenceAt(1).getDatasetSequence(), |
326 | acf.getdnaSeqs()[0]); | |
327 | 1 | Mapping[] protMappings = acf.getProtMappings(); |
328 | 1 | assertEquals(1, protMappings.length); |
329 | 1 | MapList mapList = protMappings[0].getMap(); |
330 | 1 | assertEquals(3, mapList.getFromRatio()); |
331 | 1 | assertEquals(1, mapList.getToRatio()); |
332 | 1 | assertTrue( |
333 | Arrays.equals(new int[] | |
334 | { 1, 9 }, mapList.getFromRanges().get(0))); | |
335 | 1 | assertEquals(1, mapList.getFromRanges().size()); |
336 | 1 | assertTrue( |
337 | Arrays.equals(new int[] | |
338 | { 1, 3 }, mapList.getToRanges().get(0))); | |
339 | 1 | assertEquals(1, mapList.getToRanges().size()); |
340 | ||
341 | // V12346 mapped to A33333 | |
342 | 1 | acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0); |
343 | 1 | assertEquals(1, acf.getdnaSeqs().length); |
344 | 1 | assertEquals(cdna.getSequenceAt(2).getDatasetSequence(), |
345 | acf.getdnaSeqs()[0]); | |
346 | ||
347 | // V12347 mapped to A11111 | |
348 | 1 | acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0); |
349 | 1 | assertEquals(1, acf.getdnaSeqs().length); |
350 | 1 | assertEquals(cdna.getSequenceAt(0).getDatasetSequence(), |
351 | acf.getdnaSeqs()[0]); | |
352 | ||
353 | // no mapping involving the 'extra' A44444 | |
354 | 1 | assertTrue(protein.getCodonFrame(cdna.getSequenceAt(3)).isEmpty()); |
355 | } | |
356 | ||
357 | /** | |
358 | * Test for the alignSequenceAs method that takes two sequences and a mapping. | |
359 | */ | |
360 | 1 | @Test(groups = { "Functional" }) |
361 | public void testAlignSequenceAs_withMapping_noIntrons() | |
362 | { | |
363 | 1 | MapList map = new MapList(new int[] { 1, 6 }, new int[] { 1, 2 }, 3, 1); |
364 | ||
365 | /* | |
366 | * No existing gaps in dna: | |
367 | */ | |
368 | 1 | checkAlignSequenceAs("GGGAAA", "-A-L-", false, false, map, |
369 | "---GGG---AAA"); | |
370 | ||
371 | /* | |
372 | * Now introduce gaps in dna but ignore them when realigning. | |
373 | */ | |
374 | 1 | checkAlignSequenceAs("-G-G-G-A-A-A-", "-A-L-", false, false, map, |
375 | "---GGG---AAA"); | |
376 | ||
377 | /* | |
378 | * Now include gaps in dna when realigning. First retaining 'mapped' gaps | |
379 | * only, i.e. those within the exon region. | |
380 | */ | |
381 | 1 | checkAlignSequenceAs("-G-G--G-A--A-A-", "-A-L-", true, false, map, |
382 | "---G-G--G---A--A-A"); | |
383 | ||
384 | /* | |
385 | * Include all gaps in dna when realigning (within and without the exon | |
386 | * region). The leading gap, and the gaps between codons, are subsumed by | |
387 | * the protein alignment gap. | |
388 | */ | |
389 | 1 | checkAlignSequenceAs("-G-GG--AA-A---", "-A-L-", true, true, map, |
390 | "---G-GG---AA-A---"); | |
391 | ||
392 | /* | |
393 | * Include only unmapped gaps in dna when realigning (outside the exon | |
394 | * region). The leading gap, and the gaps between codons, are subsumed by | |
395 | * the protein alignment gap. | |
396 | */ | |
397 | 1 | checkAlignSequenceAs("-G-GG--AA-A-", "-A-L-", false, true, map, |
398 | "---GGG---AAA---"); | |
399 | } | |
400 | ||
401 | /** | |
402 | * Test for the alignSequenceAs method that takes two sequences and a mapping. | |
403 | */ | |
404 | 1 | @Test(groups = { "Functional" }) |
405 | public void testAlignSequenceAs_withMapping_withIntrons() | |
406 | { | |
407 | /* | |
408 | * Exons at codon 2 (AAA) and 4 (TTT) | |
409 | */ | |
410 | 1 | MapList map = new MapList(new int[] { 4, 6, 10, 12 }, |
411 | new int[] | |
412 | { 1, 2 }, 3, 1); | |
413 | ||
414 | /* | |
415 | * Simple case: no gaps in dna | |
416 | */ | |
417 | 1 | checkAlignSequenceAs("GGGAAACCCTTTGGG", "--A-L-", false, false, map, |
418 | "GGG---AAACCCTTTGGG"); | |
419 | ||
420 | /* | |
421 | * Add gaps to dna - but ignore when realigning. | |
422 | */ | |
423 | 1 | checkAlignSequenceAs("-G-G-G--A--A---AC-CC-T-TT-GG-G-", "--A-L-", false, |
424 | false, map, "GGG---AAACCCTTTGGG"); | |
425 | ||
426 | /* | |
427 | * Add gaps to dna - include within exons only when realigning. | |
428 | */ | |
429 | 1 | checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-", true, |
430 | false, map, "GGG---A--A---ACCCT-TTGGG"); | |
431 | ||
432 | /* | |
433 | * Include gaps outside exons only when realigning. | |
434 | */ | |
435 | 1 | checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-", |
436 | false, true, map, "-G-G-GAAAC-CCTTT-GG-G-"); | |
437 | ||
438 | /* | |
439 | * Include gaps following first intron if we are 'preserving mapped gaps' | |
440 | */ | |
441 | 1 | checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-", true, |
442 | true, map, "-G-G-G--A--A---A-C-CC-T-TT-GG-G-"); | |
443 | ||
444 | /* | |
445 | * Include all gaps in dna when realigning. | |
446 | */ | |
447 | 1 | checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-", true, |
448 | true, map, "-G-G-G--A--A---A-C-CC-T-TT-GG-G-"); | |
449 | } | |
450 | ||
451 | /** | |
452 | * Test for the case where not all of the protein sequence is mapped to cDNA. | |
453 | */ | |
454 | 1 | @Test(groups = { "Functional" }) |
455 | public void testAlignSequenceAs_withMapping_withUnmappedProtein() | |
456 | { | |
457 | /* | |
458 | * Exons at codon 2 (AAA) and 4 (TTT) mapped to A and P | |
459 | */ | |
460 | 1 | final MapList map = new MapList(new int[] { 4, 6, 10, 12 }, |
461 | new int[] | |
462 | { 1, 1, 3, 3 }, 3, 1); | |
463 | ||
464 | /* | |
465 | * -L- 'aligns' ccc------ | |
466 | */ | |
467 | 1 | checkAlignSequenceAs("gggAAAcccTTTggg", "-A-L-P-", false, false, map, |
468 | "gggAAAccc------TTTggg"); | |
469 | } | |
470 | ||
471 | /** | |
472 | * Helper method that performs and verifies the method under test. | |
473 | * | |
474 | * @param alignee | |
475 | * the sequence to be realigned | |
476 | * @param alignModel | |
477 | * the sequence whose alignment is to be copied | |
478 | * @param preserveMappedGaps | |
479 | * @param preserveUnmappedGaps | |
480 | * @param map | |
481 | * @param expected | |
482 | */ | |
483 | 14 | protected void checkAlignSequenceAs(final String alignee, |
484 | final String alignModel, final boolean preserveMappedGaps, | |
485 | final boolean preserveUnmappedGaps, MapList map, | |
486 | final String expected) | |
487 | { | |
488 | 14 | SequenceI alignMe = new Sequence("Seq1", alignee); |
489 | 14 | alignMe.createDatasetSequence(); |
490 | 14 | SequenceI alignFrom = new Sequence("Seq2", alignModel); |
491 | 14 | alignFrom.createDatasetSequence(); |
492 | 14 | AlignedCodonFrame acf = new AlignedCodonFrame(); |
493 | 14 | acf.addMap(alignMe.getDatasetSequence(), alignFrom.getDatasetSequence(), |
494 | map); | |
495 | ||
496 | 14 | AlignmentUtils.alignSequenceAs(alignMe, alignFrom, acf, "---", '-', |
497 | preserveMappedGaps, preserveUnmappedGaps); | |
498 | 14 | assertEquals(expected, alignMe.getSequenceAsString()); |
499 | } | |
500 | ||
501 | /** | |
502 | * Test for the alignSequenceAs method where we preserve gaps in introns only. | |
503 | */ | |
504 | 1 | @Test(groups = { "Functional" }) |
505 | public void testAlignSequenceAs_keepIntronGapsOnly() | |
506 | { | |
507 | ||
508 | /* | |
509 | * Intron GGGAAA followed by exon CCCTTT | |
510 | */ | |
511 | 1 | MapList map = new MapList(new int[] { 7, 12 }, new int[] { 1, 2 }, 3, |
512 | 1); | |
513 | ||
514 | 1 | checkAlignSequenceAs("GG-G-AA-A-C-CC-T-TT", "AL", false, true, map, |
515 | "GG-G-AA-ACCCTTT"); | |
516 | } | |
517 | ||
518 | /** | |
519 | * Test the method that realigns protein to match mapped codon alignment. | |
520 | */ | |
521 | 1 | @Test(groups = { "Functional" }) |
522 | public void testAlignProteinAsDna() | |
523 | { | |
524 | // seq1 codons are [1,2,3] [4,5,6] [7,8,9] [10,11,12] | |
525 | 1 | SequenceI dna1 = new Sequence("Seq1", "TGCCATTACCAG-"); |
526 | // seq2 codons are [1,3,4] [5,6,7] [8,9,10] [11,12,13] | |
527 | 1 | SequenceI dna2 = new Sequence("Seq2", "T-GCCATTACCAG"); |
528 | // seq3 codons are [1,2,3] [4,5,7] [8,9,10] [11,12,13] | |
529 | 1 | SequenceI dna3 = new Sequence("Seq3", "TGCCA-TTACCAG"); |
530 | 1 | AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 }); |
531 | 1 | dna.setDataset(null); |
532 | ||
533 | // protein alignment will be realigned like dna | |
534 | 1 | SequenceI prot1 = new Sequence("Seq1", "CHYQ"); |
535 | 1 | SequenceI prot2 = new Sequence("Seq2", "CHYQ"); |
536 | 1 | SequenceI prot3 = new Sequence("Seq3", "CHYQ"); |
537 | 1 | SequenceI prot4 = new Sequence("Seq4", "R-QSV"); // unmapped, unchanged |
538 | 1 | AlignmentI protein = new Alignment( |
539 | new SequenceI[] | |
540 | { prot1, prot2, prot3, prot4 }); | |
541 | 1 | protein.setDataset(null); |
542 | ||
543 | 1 | MapList map = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 }, 3, |
544 | 1); | |
545 | 1 | AlignedCodonFrame acf = new AlignedCodonFrame(); |
546 | 1 | acf.addMap(dna1.getDatasetSequence(), prot1.getDatasetSequence(), map); |
547 | 1 | acf.addMap(dna2.getDatasetSequence(), prot2.getDatasetSequence(), map); |
548 | 1 | acf.addMap(dna3.getDatasetSequence(), prot3.getDatasetSequence(), map); |
549 | 1 | ArrayList<AlignedCodonFrame> acfs = new ArrayList<>(); |
550 | 1 | acfs.add(acf); |
551 | 1 | protein.setCodonFrames(acfs); |
552 | ||
553 | /* | |
554 | * Translated codon order is [1,2,3] [1,3,4] [4,5,6] [4,5,7] [5,6,7] [7,8,9] | |
555 | * [8,9,10] [10,11,12] [11,12,13] | |
556 | */ | |
557 | 1 | AlignmentUtils.alignProteinAsDna(protein, dna); |
558 | 1 | assertEquals("C-H--Y-Q-", prot1.getSequenceAsString()); |
559 | 1 | assertEquals("-C--H-Y-Q", prot2.getSequenceAsString()); |
560 | 1 | assertEquals("C--H--Y-Q", prot3.getSequenceAsString()); |
561 | 1 | assertEquals("R-QSV", prot4.getSequenceAsString()); |
562 | } | |
563 | ||
564 | /** | |
565 | * Test the method that tests whether a CDNA sequence translates to a protein | |
566 | * sequence | |
567 | */ | |
568 | 1 | @Test(groups = { "Functional" }) |
569 | public void testTranslatesAs() | |
570 | { | |
571 | // null arguments check | |
572 | 1 | assertFalse(AlignmentUtils.translatesAs(null, 0, null)); |
573 | 1 | assertFalse(AlignmentUtils.translatesAs(new char[] { 't' }, 0, null)); |
574 | 1 | assertFalse(AlignmentUtils.translatesAs(null, 0, new char[] { 'a' })); |
575 | ||
576 | // straight translation | |
577 | 1 | assertTrue(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), 0, |
578 | "FPKG".toCharArray())); | |
579 | // with extra start codon (not in protein) | |
580 | 1 | assertTrue(AlignmentUtils.translatesAs("atgtttcccaaaggg".toCharArray(), |
581 | 3, "FPKG".toCharArray())); | |
582 | // with stop codon1 (not in protein) | |
583 | 1 | assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtaa".toCharArray(), |
584 | 0, "FPKG".toCharArray())); | |
585 | // with stop codon1 (in protein as *) | |
586 | 1 | assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtaa".toCharArray(), |
587 | 0, "FPKG*".toCharArray())); | |
588 | // with stop codon2 (not in protein) | |
589 | 1 | assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtag".toCharArray(), |
590 | 0, "FPKG".toCharArray())); | |
591 | // with stop codon3 (not in protein) | |
592 | 1 | assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtga".toCharArray(), |
593 | 0, "FPKG".toCharArray())); | |
594 | // with start and stop codon1 | |
595 | 1 | assertTrue(AlignmentUtils.translatesAs( |
596 | "atgtttcccaaagggtaa".toCharArray(), 3, "FPKG".toCharArray())); | |
597 | // with start and stop codon1 (in protein as *) | |
598 | 1 | assertTrue(AlignmentUtils.translatesAs( |
599 | "atgtttcccaaagggtaa".toCharArray(), 3, "FPKG*".toCharArray())); | |
600 | // with start and stop codon2 | |
601 | 1 | assertTrue(AlignmentUtils.translatesAs( |
602 | "atgtttcccaaagggtag".toCharArray(), 3, "FPKG".toCharArray())); | |
603 | // with start and stop codon3 | |
604 | 1 | assertTrue(AlignmentUtils.translatesAs( |
605 | "atgtttcccaaagggtga".toCharArray(), 3, "FPKG".toCharArray())); | |
606 | ||
607 | // with embedded stop codons | |
608 | 1 | assertTrue(AlignmentUtils.translatesAs( |
609 | "atgtttTAGcccaaaTAAgggtga".toCharArray(), 3, | |
610 | "F*PK*G".toCharArray())); | |
611 | ||
612 | // wrong protein | |
613 | 1 | assertFalse(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), 0, |
614 | "FPMG".toCharArray())); | |
615 | ||
616 | // truncated dna | |
617 | 1 | assertFalse(AlignmentUtils.translatesAs("tttcccaaagg".toCharArray(), 0, |
618 | "FPKG".toCharArray())); | |
619 | ||
620 | // truncated protein | |
621 | 1 | assertFalse(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), 0, |
622 | "FPK".toCharArray())); | |
623 | ||
624 | // overlong dna (doesn't end in stop codon) | |
625 | 1 | assertFalse(AlignmentUtils.translatesAs("tttcccaaagggttt".toCharArray(), |
626 | 0, "FPKG".toCharArray())); | |
627 | ||
628 | // dna + stop codon + more | |
629 | 1 | assertFalse(AlignmentUtils.translatesAs( |
630 | "tttcccaaagggttaga".toCharArray(), 0, "FPKG".toCharArray())); | |
631 | ||
632 | // overlong protein | |
633 | 1 | assertFalse(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), 0, |
634 | "FPKGQ".toCharArray())); | |
635 | } | |
636 | ||
637 | /** | |
638 | * Test mapping of protein to cDNA, for cases where the cDNA has start and/or | |
639 | * stop codons in addition to the protein coding sequence. | |
640 | * | |
641 | * @throws IOException | |
642 | */ | |
643 | 1 | @Test(groups = { "Functional" }) |
644 | public void testMapProteinAlignmentToCdna_withStartAndStopCodons() | |
645 | throws IOException | |
646 | { | |
647 | 1 | List<SequenceI> protseqs = new ArrayList<>(); |
648 | 1 | protseqs.add(new Sequence("UNIPROT|V12345", "EIQ")); |
649 | 1 | protseqs.add(new Sequence("UNIPROT|V12346", "EIQ")); |
650 | 1 | protseqs.add(new Sequence("UNIPROT|V12347", "SAR")); |
651 | 1 | AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3])); |
652 | 1 | protein.setDataset(null); |
653 | ||
654 | 1 | List<SequenceI> dnaseqs = new ArrayList<>(); |
655 | // start + SAR: | |
656 | 1 | dnaseqs.add(new Sequence("EMBL|A11111", "ATGTCAGCACGC")); |
657 | // = EIQ + stop | |
658 | 1 | dnaseqs.add(new Sequence("EMBL|A22222", "GAGATACAATAA")); |
659 | // = start +EIQ + stop | |
660 | 1 | dnaseqs.add(new Sequence("EMBL|A33333", "ATGGAAATCCAGTAG")); |
661 | 1 | dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG")); |
662 | 1 | AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[4])); |
663 | 1 | cdna.setDataset(null); |
664 | ||
665 | 1 | assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna)); |
666 | ||
667 | // 3 mappings made, each from 1 to 1 sequence | |
668 | 1 | assertEquals(3, protein.getCodonFrames().size()); |
669 | 1 | assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size()); |
670 | 1 | assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size()); |
671 | 1 | assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size()); |
672 | ||
673 | // V12345 mapped from A22222 | |
674 | 1 | AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0)) |
675 | .get(0); | |
676 | 1 | assertEquals(1, acf.getdnaSeqs().length); |
677 | 1 | assertEquals(cdna.getSequenceAt(1).getDatasetSequence(), |
678 | acf.getdnaSeqs()[0]); | |
679 | 1 | Mapping[] protMappings = acf.getProtMappings(); |
680 | 1 | assertEquals(1, protMappings.length); |
681 | 1 | MapList mapList = protMappings[0].getMap(); |
682 | 1 | assertEquals(3, mapList.getFromRatio()); |
683 | 1 | assertEquals(1, mapList.getToRatio()); |
684 | 1 | assertTrue( |
685 | Arrays.equals(new int[] | |
686 | { 1, 9 }, mapList.getFromRanges().get(0))); | |
687 | 1 | assertEquals(1, mapList.getFromRanges().size()); |
688 | 1 | assertTrue( |
689 | Arrays.equals(new int[] | |
690 | { 1, 3 }, mapList.getToRanges().get(0))); | |
691 | 1 | assertEquals(1, mapList.getToRanges().size()); |
692 | ||
693 | // V12346 mapped from A33333 starting position 4 | |
694 | 1 | acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0); |
695 | 1 | assertEquals(1, acf.getdnaSeqs().length); |
696 | 1 | assertEquals(cdna.getSequenceAt(2).getDatasetSequence(), |
697 | acf.getdnaSeqs()[0]); | |
698 | 1 | protMappings = acf.getProtMappings(); |
699 | 1 | assertEquals(1, protMappings.length); |
700 | 1 | mapList = protMappings[0].getMap(); |
701 | 1 | assertEquals(3, mapList.getFromRatio()); |
702 | 1 | assertEquals(1, mapList.getToRatio()); |
703 | 1 | assertTrue( |
704 | Arrays.equals(new int[] | |
705 | { 4, 12 }, mapList.getFromRanges().get(0))); | |
706 | 1 | assertEquals(1, mapList.getFromRanges().size()); |
707 | 1 | assertTrue( |
708 | Arrays.equals(new int[] | |
709 | { 1, 3 }, mapList.getToRanges().get(0))); | |
710 | 1 | assertEquals(1, mapList.getToRanges().size()); |
711 | ||
712 | // V12347 mapped to A11111 starting position 4 | |
713 | 1 | acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0); |
714 | 1 | assertEquals(1, acf.getdnaSeqs().length); |
715 | 1 | assertEquals(cdna.getSequenceAt(0).getDatasetSequence(), |
716 | acf.getdnaSeqs()[0]); | |
717 | 1 | protMappings = acf.getProtMappings(); |
718 | 1 | assertEquals(1, protMappings.length); |
719 | 1 | mapList = protMappings[0].getMap(); |
720 | 1 | assertEquals(3, mapList.getFromRatio()); |
721 | 1 | assertEquals(1, mapList.getToRatio()); |
722 | 1 | assertTrue( |
723 | Arrays.equals(new int[] | |
724 | { 4, 12 }, mapList.getFromRanges().get(0))); | |
725 | 1 | assertEquals(1, mapList.getFromRanges().size()); |
726 | 1 | assertTrue( |
727 | Arrays.equals(new int[] | |
728 | { 1, 3 }, mapList.getToRanges().get(0))); | |
729 | 1 | assertEquals(1, mapList.getToRanges().size()); |
730 | ||
731 | // no mapping involving the 'extra' A44444 | |
732 | 1 | assertTrue(protein.getCodonFrame(cdna.getSequenceAt(3)).isEmpty()); |
733 | } | |
734 | ||
735 | /** | |
736 | * Test mapping of protein to cDNA, for the case where we have some sequence | |
737 | * cross-references. Verify that 1-to-many mappings are made where | |
738 | * cross-references exist and sequences are mappable. | |
739 | * | |
740 | * @throws IOException | |
741 | */ | |
742 | 1 | @Test(groups = { "Functional" }) |
743 | public void testMapProteinAlignmentToCdna_withXrefs() throws IOException | |
744 | { | |
745 | 1 | List<SequenceI> protseqs = new ArrayList<>(); |
746 | 1 | protseqs.add(new Sequence("UNIPROT|V12345", "EIQ")); |
747 | 1 | protseqs.add(new Sequence("UNIPROT|V12346", "EIQ")); |
748 | 1 | protseqs.add(new Sequence("UNIPROT|V12347", "SAR")); |
749 | 1 | AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3])); |
750 | 1 | protein.setDataset(null); |
751 | ||
752 | 1 | List<SequenceI> dnaseqs = new ArrayList<>(); |
753 | 1 | dnaseqs.add(new Sequence("EMBL|A11111", "TCAGCACGC")); // = SAR |
754 | 1 | dnaseqs.add(new Sequence("EMBL|A22222", "ATGGAGATACAA")); // = start + EIQ |
755 | 1 | dnaseqs.add(new Sequence("EMBL|A33333", "GAAATCCAG")); // = EIQ |
756 | 1 | dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG")); // = EIQ |
757 | 1 | dnaseqs.add(new Sequence("EMBL|A55555", "GAGATTCAG")); // = EIQ |
758 | 1 | AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[5])); |
759 | 1 | cdna.setDataset(null); |
760 | ||
761 | // Xref A22222 to V12345 (should get mapped) | |
762 | 1 | dnaseqs.get(1).addDBRef(new DBRefEntry("UNIPROT", "1", "V12345")); |
763 | // Xref V12345 to A44444 (should get mapped) | |
764 | 1 | protseqs.get(0).addDBRef(new DBRefEntry("EMBL", "1", "A44444")); |
765 | // Xref A33333 to V12347 (sequence mismatch - should not get mapped) | |
766 | 1 | dnaseqs.get(2).addDBRef(new DBRefEntry("UNIPROT", "1", "V12347")); |
767 | // as V12345 is mapped to A22222 and A44444, this leaves V12346 unmapped. | |
768 | // it should get paired up with the unmapped A33333 | |
769 | // A11111 should be mapped to V12347 | |
770 | // A55555 is spare and has no xref so is not mapped | |
771 | ||
772 | 1 | assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna)); |
773 | ||
774 | // 4 protein mappings made for 3 proteins, 2 to V12345, 1 each to V12346/7 | |
775 | 1 | assertEquals(3, protein.getCodonFrames().size()); |
776 | 1 | assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size()); |
777 | 1 | assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size()); |
778 | 1 | assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size()); |
779 | ||
780 | // one mapping for each of the first 4 cDNA sequences | |
781 | 1 | assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(0)).size()); |
782 | 1 | assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(1)).size()); |
783 | 1 | assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(2)).size()); |
784 | 1 | assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(3)).size()); |
785 | ||
786 | // V12345 mapped to A22222 and A44444 | |
787 | 1 | AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0)) |
788 | .get(0); | |
789 | 1 | assertEquals(2, acf.getdnaSeqs().length); |
790 | 1 | assertEquals(cdna.getSequenceAt(1).getDatasetSequence(), |
791 | acf.getdnaSeqs()[0]); | |
792 | 1 | assertEquals(cdna.getSequenceAt(3).getDatasetSequence(), |
793 | acf.getdnaSeqs()[1]); | |
794 | ||
795 | // V12346 mapped to A33333 | |
796 | 1 | acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0); |
797 | 1 | assertEquals(1, acf.getdnaSeqs().length); |
798 | 1 | assertEquals(cdna.getSequenceAt(2).getDatasetSequence(), |
799 | acf.getdnaSeqs()[0]); | |
800 | ||
801 | // V12347 mapped to A11111 | |
802 | 1 | acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0); |
803 | 1 | assertEquals(1, acf.getdnaSeqs().length); |
804 | 1 | assertEquals(cdna.getSequenceAt(0).getDatasetSequence(), |
805 | acf.getdnaSeqs()[0]); | |
806 | ||
807 | // no mapping involving the 'extra' A55555 | |
808 | 1 | assertTrue(protein.getCodonFrame(cdna.getSequenceAt(4)).isEmpty()); |
809 | } | |
810 | ||
811 | /** | |
812 | * Test mapping of protein to cDNA, for the case where we have some sequence | |
813 | * cross-references. Verify that once we have made an xref mapping we don't | |
814 | * also map un-xrefd sequeces. | |
815 | * | |
816 | * @throws IOException | |
817 | */ | |
818 | 1 | @Test(groups = { "Functional" }) |
819 | public void testMapProteinAlignmentToCdna_prioritiseXrefs() | |
820 | throws IOException | |
821 | { | |
822 | 1 | List<SequenceI> protseqs = new ArrayList<>(); |
823 | 1 | protseqs.add(new Sequence("UNIPROT|V12345", "EIQ")); |
824 | 1 | protseqs.add(new Sequence("UNIPROT|V12346", "EIQ")); |
825 | 1 | AlignmentI protein = new Alignment( |
826 | protseqs.toArray(new SequenceI[protseqs.size()])); | |
827 | 1 | protein.setDataset(null); |
828 | ||
829 | 1 | List<SequenceI> dnaseqs = new ArrayList<>(); |
830 | 1 | dnaseqs.add(new Sequence("EMBL|A11111", "GAAATCCAG")); // = EIQ |
831 | 1 | dnaseqs.add(new Sequence("EMBL|A22222", "GAAATTCAG")); // = EIQ |
832 | 1 | AlignmentI cdna = new Alignment( |
833 | dnaseqs.toArray(new SequenceI[dnaseqs.size()])); | |
834 | 1 | cdna.setDataset(null); |
835 | ||
836 | // Xref A22222 to V12345 (should get mapped) | |
837 | // A11111 should then be mapped to the unmapped V12346 | |
838 | 1 | dnaseqs.get(1).addDBRef(new DBRefEntry("UNIPROT", "1", "V12345")); |
839 | ||
840 | 1 | assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna)); |
841 | ||
842 | // 2 protein mappings made | |
843 | 1 | assertEquals(2, protein.getCodonFrames().size()); |
844 | 1 | assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size()); |
845 | 1 | assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size()); |
846 | ||
847 | // one mapping for each of the cDNA sequences | |
848 | 1 | assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(0)).size()); |
849 | 1 | assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(1)).size()); |
850 | ||
851 | // V12345 mapped to A22222 | |
852 | 1 | AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0)) |
853 | .get(0); | |
854 | 1 | assertEquals(1, acf.getdnaSeqs().length); |
855 | 1 | assertEquals(cdna.getSequenceAt(1).getDatasetSequence(), |
856 | acf.getdnaSeqs()[0]); | |
857 | ||
858 | // V12346 mapped to A11111 | |
859 | 1 | acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0); |
860 | 1 | assertEquals(1, acf.getdnaSeqs().length); |
861 | 1 | assertEquals(cdna.getSequenceAt(0).getDatasetSequence(), |
862 | acf.getdnaSeqs()[0]); | |
863 | } | |
864 | ||
865 | /** | |
866 | * Test the method that shows or hides sequence annotations by type(s) and | |
867 | * selection group. | |
868 | */ | |
869 | 1 | @Test(groups = { "Functional" }) |
870 | public void testShowOrHideSequenceAnnotations() | |
871 | { | |
872 | 1 | SequenceI seq1 = new Sequence("Seq1", "AAA"); |
873 | 1 | SequenceI seq2 = new Sequence("Seq2", "BBB"); |
874 | 1 | SequenceI seq3 = new Sequence("Seq3", "CCC"); |
875 | 1 | Annotation[] anns = new Annotation[] { new Annotation(2f) }; |
876 | 1 | AlignmentAnnotation ann1 = new AlignmentAnnotation("Structure", "ann1", |
877 | anns); | |
878 | 1 | ann1.setSequenceRef(seq1); |
879 | 1 | AlignmentAnnotation ann2 = new AlignmentAnnotation("Structure", "ann2", |
880 | anns); | |
881 | 1 | ann2.setSequenceRef(seq2); |
882 | 1 | AlignmentAnnotation ann3 = new AlignmentAnnotation("Structure", "ann3", |
883 | anns); | |
884 | 1 | AlignmentAnnotation ann4 = new AlignmentAnnotation("Temp", "ann4", |
885 | anns); | |
886 | 1 | ann4.setSequenceRef(seq1); |
887 | 1 | AlignmentAnnotation ann5 = new AlignmentAnnotation("Temp", "ann5", |
888 | anns); | |
889 | 1 | ann5.setSequenceRef(seq2); |
890 | 1 | AlignmentAnnotation ann6 = new AlignmentAnnotation("Temp", "ann6", |
891 | anns); | |
892 | 1 | AlignmentI al = new Alignment(new SequenceI[] { seq1, seq2, seq3 }); |
893 | 1 | al.addAnnotation(ann1); // Structure for Seq1 |
894 | 1 | al.addAnnotation(ann2); // Structure for Seq2 |
895 | 1 | al.addAnnotation(ann3); // Structure for no sequence |
896 | 1 | al.addAnnotation(ann4); // Temp for seq1 |
897 | 1 | al.addAnnotation(ann5); // Temp for seq2 |
898 | 1 | al.addAnnotation(ann6); // Temp for no sequence |
899 | 1 | List<String> types = new ArrayList<>(); |
900 | 1 | List<SequenceI> scope = new ArrayList<>(); |
901 | ||
902 | /* | |
903 | * Set all sequence related Structure to hidden (ann1, ann2) | |
904 | */ | |
905 | 1 | types.add("Structure"); |
906 | 1 | AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, false, |
907 | false); | |
908 | 1 | assertFalse(ann1.visible); |
909 | 1 | assertFalse(ann2.visible); |
910 | 1 | assertTrue(ann3.visible); // not sequence-related, not affected |
911 | 1 | assertTrue(ann4.visible); // not Structure, not affected |
912 | 1 | assertTrue(ann5.visible); // " |
913 | 1 | assertTrue(ann6.visible); // not sequence-related, not affected |
914 | ||
915 | /* | |
916 | * Set Temp in {seq1, seq3} to hidden | |
917 | */ | |
918 | 1 | types.clear(); |
919 | 1 | types.add("Temp"); |
920 | 1 | scope.add(seq1); |
921 | 1 | scope.add(seq3); |
922 | 1 | AlignmentUtils.showOrHideSequenceAnnotations(al, types, scope, false, |
923 | false); | |
924 | 1 | assertFalse(ann1.visible); // unchanged |
925 | 1 | assertFalse(ann2.visible); // unchanged |
926 | 1 | assertTrue(ann3.visible); // not sequence-related, not affected |
927 | 1 | assertFalse(ann4.visible); // Temp for seq1 hidden |
928 | 1 | assertTrue(ann5.visible); // not in scope, not affected |
929 | 1 | assertTrue(ann6.visible); // not sequence-related, not affected |
930 | ||
931 | /* | |
932 | * Set Temp in all sequences to hidden | |
933 | */ | |
934 | 1 | types.clear(); |
935 | 1 | types.add("Temp"); |
936 | 1 | scope.add(seq1); |
937 | 1 | scope.add(seq3); |
938 | 1 | AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, false, |
939 | false); | |
940 | 1 | assertFalse(ann1.visible); // unchanged |
941 | 1 | assertFalse(ann2.visible); // unchanged |
942 | 1 | assertTrue(ann3.visible); // not sequence-related, not affected |
943 | 1 | assertFalse(ann4.visible); // Temp for seq1 hidden |
944 | 1 | assertFalse(ann5.visible); // Temp for seq2 hidden |
945 | 1 | assertTrue(ann6.visible); // not sequence-related, not affected |
946 | ||
947 | /* | |
948 | * Set all types in {seq1, seq3} to visible | |
949 | */ | |
950 | 1 | types.clear(); |
951 | 1 | scope.clear(); |
952 | 1 | scope.add(seq1); |
953 | 1 | scope.add(seq3); |
954 | 1 | AlignmentUtils.showOrHideSequenceAnnotations(al, types, scope, true, |
955 | true); | |
956 | 1 | assertTrue(ann1.visible); // Structure for seq1 set visible |
957 | 1 | assertFalse(ann2.visible); // not in scope, unchanged |
958 | 1 | assertTrue(ann3.visible); // not sequence-related, not affected |
959 | 1 | assertTrue(ann4.visible); // Temp for seq1 set visible |
960 | 1 | assertFalse(ann5.visible); // not in scope, unchanged |
961 | 1 | assertTrue(ann6.visible); // not sequence-related, not affected |
962 | ||
963 | /* | |
964 | * Set all types in all scope to hidden | |
965 | */ | |
966 | 1 | AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, true, |
967 | false); | |
968 | 1 | assertFalse(ann1.visible); |
969 | 1 | assertFalse(ann2.visible); |
970 | 1 | assertTrue(ann3.visible); // not sequence-related, not affected |
971 | 1 | assertFalse(ann4.visible); |
972 | 1 | assertFalse(ann5.visible); |
973 | 1 | assertTrue(ann6.visible); // not sequence-related, not affected |
974 | } | |
975 | ||
976 | /** | |
977 | * Tests for the method that checks if one sequence cross-references another | |
978 | */ | |
979 | 1 | @Test(groups = { "Functional" }) |
980 | public void testHasCrossRef() | |
981 | { | |
982 | 1 | assertFalse(AlignmentUtils.hasCrossRef(null, null)); |
983 | 1 | SequenceI seq1 = new Sequence("EMBL|A12345", "ABCDEF"); |
984 | 1 | assertFalse(AlignmentUtils.hasCrossRef(seq1, null)); |
985 | 1 | assertFalse(AlignmentUtils.hasCrossRef(null, seq1)); |
986 | 1 | SequenceI seq2 = new Sequence("UNIPROT|V20192", "ABCDEF"); |
987 | 1 | assertFalse(AlignmentUtils.hasCrossRef(seq1, seq2)); |
988 | ||
989 | // different ref | |
990 | 1 | seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "v20193")); |
991 | 1 | assertFalse(AlignmentUtils.hasCrossRef(seq1, seq2)); |
992 | ||
993 | // case-insensitive; version number is ignored | |
994 | 1 | seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "v20192")); |
995 | 1 | assertTrue(AlignmentUtils.hasCrossRef(seq1, seq2)); |
996 | ||
997 | // right case! | |
998 | 1 | seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "V20192")); |
999 | 1 | assertTrue(AlignmentUtils.hasCrossRef(seq1, seq2)); |
1000 | // test is one-way only | |
1001 | 1 | assertFalse(AlignmentUtils.hasCrossRef(seq2, seq1)); |
1002 | } | |
1003 | ||
1004 | /** | |
1005 | * Tests for the method that checks if either sequence cross-references the | |
1006 | * other | |
1007 | */ | |
1008 | 1 | @Test(groups = { "Functional" }) |
1009 | public void testHaveCrossRef() | |
1010 | { | |
1011 | 1 | assertFalse(AlignmentUtils.hasCrossRef(null, null)); |
1012 | 1 | SequenceI seq1 = new Sequence("EMBL|A12345", "ABCDEF"); |
1013 | 1 | assertFalse(AlignmentUtils.haveCrossRef(seq1, null)); |
1014 | 1 | assertFalse(AlignmentUtils.haveCrossRef(null, seq1)); |
1015 | 1 | SequenceI seq2 = new Sequence("UNIPROT|V20192", "ABCDEF"); |
1016 | 1 | assertFalse(AlignmentUtils.haveCrossRef(seq1, seq2)); |
1017 | ||
1018 | 1 | seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "V20192")); |
1019 | 1 | assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2)); |
1020 | // next is true for haveCrossRef, false for hasCrossRef | |
1021 | 1 | assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1)); |
1022 | ||
1023 | // now the other way round | |
1024 | 1 | seq1.setDBRefs(null); |
1025 | 1 | seq2.addDBRef(new DBRefEntry("EMBL", "1", "A12345")); |
1026 | 1 | assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2)); |
1027 | 1 | assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1)); |
1028 | ||
1029 | // now both ways | |
1030 | 1 | seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "V20192")); |
1031 | 1 | assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2)); |
1032 | 1 | assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1)); |
1033 | } | |
1034 | ||
1035 | /** | |
1036 | * Test the method that extracts the cds-only part of a dna alignment. | |
1037 | */ | |
1038 | 1 | @Test(groups = { "Functional" }) |
1039 | public void testMakeCdsAlignment() | |
1040 | { | |
1041 | /* | |
1042 | * scenario: | |
1043 | * dna1 --> [4, 6] [10,12] --> pep1 | |
1044 | * dna2 --> [1, 3] [7, 9] [13,15] --> pep2 | |
1045 | */ | |
1046 | 1 | SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa"); |
1047 | 1 | SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC"); |
1048 | 1 | SequenceI pep1 = new Sequence("pep1", "GF"); |
1049 | 1 | SequenceI pep2 = new Sequence("pep2", "GFP"); |
1050 | 1 | pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "pep1")); |
1051 | 1 | pep2.addDBRef(new DBRefEntry("UNIPROT", "0", "pep2")); |
1052 | 1 | dna1.createDatasetSequence(); |
1053 | 1 | dna2.createDatasetSequence(); |
1054 | 1 | pep1.createDatasetSequence(); |
1055 | 1 | pep2.createDatasetSequence(); |
1056 | 1 | AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 }); |
1057 | 1 | dna.setDataset(null); |
1058 | ||
1059 | /* | |
1060 | * put a variant feature on dna2 base 8 | |
1061 | * - should transfer to cds2 base 5 | |
1062 | */ | |
1063 | 1 | dna2.addSequenceFeature( |
1064 | new SequenceFeature("variant", "hgmd", 8, 8, 0f, null)); | |
1065 | ||
1066 | /* | |
1067 | * need a sourceDbRef if we are to construct dbrefs to the CDS | |
1068 | * sequence from the dna contig sequences | |
1069 | */ | |
1070 | 1 | DBRefEntry dbref = new DBRefEntry("ENSEMBL", "0", "dna1"); |
1071 | 1 | dna1.getDatasetSequence().addDBRef(dbref); |
1072 | 1 | org.testng.Assert.assertEquals(dbref, dna1.getPrimaryDBRefs().get(0)); |
1073 | 1 | dbref = new DBRefEntry("ENSEMBL", "0", "dna2"); |
1074 | 1 | dna2.getDatasetSequence().addDBRef(dbref); |
1075 | 1 | org.testng.Assert.assertEquals(dbref, dna2.getPrimaryDBRefs().get(0)); |
1076 | ||
1077 | /* | |
1078 | * CDS sequences are 'discovered' from dna-to-protein mappings on the alignment | |
1079 | * dataset (e.g. added from dbrefs by CrossRef.findXrefSequences) | |
1080 | */ | |
1081 | 1 | MapList mapfordna1 = new MapList(new int[] { 4, 6, 10, 12 }, |
1082 | new int[] | |
1083 | { 1, 2 }, 3, 1); | |
1084 | 1 | AlignedCodonFrame acf = new AlignedCodonFrame(); |
1085 | 1 | acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), |
1086 | mapfordna1); | |
1087 | 1 | dna.addCodonFrame(acf); |
1088 | 1 | MapList mapfordna2 = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, |
1089 | new int[] | |
1090 | { 1, 3 }, 3, 1); | |
1091 | 1 | acf = new AlignedCodonFrame(); |
1092 | 1 | acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), |
1093 | mapfordna2); | |
1094 | 1 | dna.addCodonFrame(acf); |
1095 | ||
1096 | /* | |
1097 | * In this case, mappings originally came from matching Uniprot accessions | |
1098 | * - so need an xref on dna involving those regions. | |
1099 | * These are normally constructed from CDS annotation | |
1100 | */ | |
1101 | 1 | DBRefEntry dna1xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep1", |
1102 | new Mapping(mapfordna1)); | |
1103 | 1 | dna1.addDBRef(dna1xref); |
1104 | 1 | assertEquals(2, dna1.getDBRefs().size()); // to self and to pep1 |
1105 | 1 | DBRefEntry dna2xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep2", |
1106 | new Mapping(mapfordna2)); | |
1107 | 1 | dna2.addDBRef(dna2xref); |
1108 | 1 | assertEquals(2, dna2.getDBRefs().size()); // to self and to pep2 |
1109 | ||
1110 | /* | |
1111 | * execute method under test: | |
1112 | */ | |
1113 | 1 | AlignmentI cds = AlignmentUtils |
1114 | .makeCdsAlignment(new SequenceI[] | |
1115 | { dna1, dna2 }, dna.getDataset(), null); | |
1116 | ||
1117 | /* | |
1118 | * verify cds sequences | |
1119 | */ | |
1120 | 1 | assertEquals(2, cds.getSequences().size()); |
1121 | 1 | assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString()); |
1122 | 1 | assertEquals("GGGTTTCCC", cds.getSequenceAt(1).getSequenceAsString()); |
1123 | ||
1124 | /* | |
1125 | * verify shared, extended alignment dataset | |
1126 | */ | |
1127 | 1 | assertSame(dna.getDataset(), cds.getDataset()); |
1128 | 1 | SequenceI cds1Dss = cds.getSequenceAt(0).getDatasetSequence(); |
1129 | 1 | SequenceI cds2Dss = cds.getSequenceAt(1).getDatasetSequence(); |
1130 | 1 | assertTrue(dna.getDataset().getSequences().contains(cds1Dss)); |
1131 | 1 | assertTrue(dna.getDataset().getSequences().contains(cds2Dss)); |
1132 | ||
1133 | /* | |
1134 | * verify CDS has a dbref with mapping to peptide | |
1135 | */ | |
1136 | 1 | assertNotNull(cds1Dss.getDBRefs()); |
1137 | 1 | assertEquals(2, cds1Dss.getDBRefs().size()); |
1138 | 1 | dbref = cds1Dss.getDBRefs().get(0); |
1139 | 1 | assertEquals(dna1xref.getSource(), dbref.getSource()); |
1140 | // version is via ensembl's primary ref | |
1141 | 1 | assertEquals(dna1xref.getVersion(), dbref.getVersion()); |
1142 | 1 | assertEquals(dna1xref.getAccessionId(), dbref.getAccessionId()); |
1143 | 1 | assertNotNull(dbref.getMap()); |
1144 | 1 | assertSame(pep1.getDatasetSequence(), dbref.getMap().getTo()); |
1145 | 1 | MapList cdsMapping = new MapList(new int[] { 1, 6 }, new int[] { 1, 2 }, |
1146 | 3, 1); | |
1147 | 1 | assertEquals(cdsMapping, dbref.getMap().getMap()); |
1148 | ||
1149 | /* | |
1150 | * verify peptide has added a dbref with reverse mapping to CDS | |
1151 | */ | |
1152 | 1 | assertNotNull(pep1.getDBRefs()); |
1153 | // FIXME pep1.getDBRefs() is 1 - is that the correct behaviour ? | |
1154 | 1 | assertEquals(2, pep1.getDBRefs().size()); |
1155 | 1 | dbref = pep1.getDBRefs().get(1); |
1156 | 1 | assertEquals("ENSEMBL", dbref.getSource()); |
1157 | 1 | assertEquals("0", dbref.getVersion()); |
1158 | 1 | assertEquals("CDS|dna1", dbref.getAccessionId()); |
1159 | 1 | assertNotNull(dbref.getMap()); |
1160 | 1 | assertSame(cds1Dss, dbref.getMap().getTo()); |
1161 | 1 | assertEquals(cdsMapping.getInverse(), dbref.getMap().getMap()); |
1162 | ||
1163 | /* | |
1164 | * verify cDNA has added a dbref with mapping to CDS | |
1165 | */ | |
1166 | 1 | assertEquals(3, dna1.getDBRefs().size()); |
1167 | 1 | DBRefEntry dbRefEntry = dna1.getDBRefs().get(2); |
1168 | 1 | assertSame(cds1Dss, dbRefEntry.getMap().getTo()); |
1169 | 1 | MapList dnaToCdsMapping = new MapList(new int[] { 4, 6, 10, 12 }, |
1170 | new int[] | |
1171 | { 1, 6 }, 1, 1); | |
1172 | 1 | assertEquals(dnaToCdsMapping, dbRefEntry.getMap().getMap()); |
1173 | 1 | assertEquals(3, dna2.getDBRefs().size()); |
1174 | 1 | dbRefEntry = dna2.getDBRefs().get(2); |
1175 | 1 | assertSame(cds2Dss, dbRefEntry.getMap().getTo()); |
1176 | 1 | dnaToCdsMapping = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, |
1177 | new int[] | |
1178 | { 1, 9 }, 1, 1); | |
1179 | 1 | assertEquals(dnaToCdsMapping, dbRefEntry.getMap().getMap()); |
1180 | ||
1181 | /* | |
1182 | * verify CDS has added a dbref with mapping to cDNA | |
1183 | */ | |
1184 | 1 | assertEquals(2, cds1Dss.getDBRefs().size()); |
1185 | 1 | dbRefEntry = cds1Dss.getDBRefs().get(1); |
1186 | 1 | assertSame(dna1.getDatasetSequence(), dbRefEntry.getMap().getTo()); |
1187 | 1 | MapList cdsToDnaMapping = new MapList(new int[] { 1, 6 }, |
1188 | new int[] | |
1189 | { 4, 6, 10, 12 }, 1, 1); | |
1190 | 1 | assertEquals(cdsToDnaMapping, dbRefEntry.getMap().getMap()); |
1191 | 1 | assertEquals(2, cds2Dss.getDBRefs().size()); |
1192 | 1 | dbRefEntry = cds2Dss.getDBRefs().get(1); |
1193 | 1 | assertSame(dna2.getDatasetSequence(), dbRefEntry.getMap().getTo()); |
1194 | 1 | cdsToDnaMapping = new MapList(new int[] { 1, 9 }, |
1195 | new int[] | |
1196 | { 1, 3, 7, 9, 13, 15 }, 1, 1); | |
1197 | 1 | assertEquals(cdsToDnaMapping, dbRefEntry.getMap().getMap()); |
1198 | ||
1199 | /* | |
1200 | * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide | |
1201 | * the mappings are on the shared alignment dataset | |
1202 | * 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep) | |
1203 | */ | |
1204 | 1 | List<AlignedCodonFrame> cdsMappings = cds.getDataset().getCodonFrames(); |
1205 | 1 | assertEquals(6, cdsMappings.size()); |
1206 | ||
1207 | /* | |
1208 | * verify that mapping sets for dna and cds alignments are different | |
1209 | * [not current behaviour - all mappings are on the alignment dataset] | |
1210 | */ | |
1211 | // select -> subselect type to test. | |
1212 | // Assert.assertNotSame(dna.getCodonFrames(), cds.getCodonFrames()); | |
1213 | // assertEquals(4, dna.getCodonFrames().size()); | |
1214 | // assertEquals(4, cds.getCodonFrames().size()); | |
1215 | ||
1216 | /* | |
1217 | * Two mappings involve pep1 (dna to pep1, cds to pep1) | |
1218 | * Mapping from pep1 to GGGTTT in first new exon sequence | |
1219 | */ | |
1220 | 1 | List<AlignedCodonFrame> pep1Mappings = MappingUtils |
1221 | .findMappingsForSequence(pep1, cdsMappings); | |
1222 | 1 | assertEquals(2, pep1Mappings.size()); |
1223 | 1 | List<AlignedCodonFrame> mappings = MappingUtils |
1224 | .findMappingsForSequence(cds.getSequenceAt(0), pep1Mappings); | |
1225 | 1 | assertEquals(1, mappings.size()); |
1226 | ||
1227 | // map G to GGG | |
1228 | 1 | SearchResultsI sr = MappingUtils.buildSearchResults(pep1, 1, mappings); |
1229 | 1 | assertEquals(1, sr.getResults().size()); |
1230 | 1 | SearchResultMatchI m = sr.getResults().get(0); |
1231 | 1 | assertSame(cds1Dss, m.getSequence()); |
1232 | 1 | assertEquals(1, m.getStart()); |
1233 | 1 | assertEquals(3, m.getEnd()); |
1234 | // map F to TTT | |
1235 | 1 | sr = MappingUtils.buildSearchResults(pep1, 2, mappings); |
1236 | 1 | m = sr.getResults().get(0); |
1237 | 1 | assertSame(cds1Dss, m.getSequence()); |
1238 | 1 | assertEquals(4, m.getStart()); |
1239 | 1 | assertEquals(6, m.getEnd()); |
1240 | ||
1241 | /* | |
1242 | * Two mappings involve pep2 (dna to pep2, cds to pep2) | |
1243 | * Verify mapping from pep2 to GGGTTTCCC in second new exon sequence | |
1244 | */ | |
1245 | 1 | List<AlignedCodonFrame> pep2Mappings = MappingUtils |
1246 | .findMappingsForSequence(pep2, cdsMappings); | |
1247 | 1 | assertEquals(2, pep2Mappings.size()); |
1248 | 1 | mappings = MappingUtils.findMappingsForSequence(cds.getSequenceAt(1), |
1249 | pep2Mappings); | |
1250 | 1 | assertEquals(1, mappings.size()); |
1251 | // map G to GGG | |
1252 | 1 | sr = MappingUtils.buildSearchResults(pep2, 1, mappings); |
1253 | 1 | assertEquals(1, sr.getResults().size()); |
1254 | 1 | m = sr.getResults().get(0); |
1255 | 1 | assertSame(cds2Dss, m.getSequence()); |
1256 | 1 | assertEquals(1, m.getStart()); |
1257 | 1 | assertEquals(3, m.getEnd()); |
1258 | // map F to TTT | |
1259 | 1 | sr = MappingUtils.buildSearchResults(pep2, 2, mappings); |
1260 | 1 | m = sr.getResults().get(0); |
1261 | 1 | assertSame(cds2Dss, m.getSequence()); |
1262 | 1 | assertEquals(4, m.getStart()); |
1263 | 1 | assertEquals(6, m.getEnd()); |
1264 | // map P to CCC | |
1265 | 1 | sr = MappingUtils.buildSearchResults(pep2, 3, mappings); |
1266 | 1 | m = sr.getResults().get(0); |
1267 | 1 | assertSame(cds2Dss, m.getSequence()); |
1268 | 1 | assertEquals(7, m.getStart()); |
1269 | 1 | assertEquals(9, m.getEnd()); |
1270 | ||
1271 | /* | |
1272 | * check cds2 acquired a variant feature in position 5 | |
1273 | */ | |
1274 | 1 | List<SequenceFeature> sfs = cds2Dss.getSequenceFeatures(); |
1275 | 1 | assertNotNull(sfs); |
1276 | 1 | assertEquals(1, sfs.size()); |
1277 | 1 | assertEquals("variant", sfs.get(0).type); |
1278 | 1 | assertEquals(5, sfs.get(0).begin); |
1279 | 1 | assertEquals(5, sfs.get(0).end); |
1280 | } | |
1281 | ||
1282 | /** | |
1283 | * Test the method that makes a cds-only alignment from a DNA sequence and its | |
1284 | * product mappings, for the case where there are multiple exon mappings to | |
1285 | * different protein products. | |
1286 | */ | |
1287 | 1 | @Test(groups = { "Functional" }) |
1288 | public void testMakeCdsAlignment_multipleProteins() | |
1289 | { | |
1290 | 1 | SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa"); |
1291 | 1 | SequenceI pep1 = new Sequence("pep1", "GF"); // GGGTTT |
1292 | 1 | SequenceI pep2 = new Sequence("pep2", "KP"); // aaaccc |
1293 | 1 | SequenceI pep3 = new Sequence("pep3", "KF"); // aaaTTT |
1294 | 1 | dna1.createDatasetSequence(); |
1295 | 1 | pep1.createDatasetSequence(); |
1296 | 1 | pep2.createDatasetSequence(); |
1297 | 1 | pep3.createDatasetSequence(); |
1298 | 1 | pep1.getDatasetSequence() |
1299 | .addDBRef(new DBRefEntry("EMBLCDS", "2", "A12345")); | |
1300 | 1 | pep2.getDatasetSequence() |
1301 | .addDBRef(new DBRefEntry("EMBLCDS", "3", "A12346")); | |
1302 | 1 | pep3.getDatasetSequence() |
1303 | .addDBRef(new DBRefEntry("EMBLCDS", "4", "A12347")); | |
1304 | ||
1305 | /* | |
1306 | * Create the CDS alignment | |
1307 | */ | |
1308 | 1 | AlignmentI dna = new Alignment(new SequenceI[] { dna1 }); |
1309 | 1 | dna.setDataset(null); |
1310 | ||
1311 | /* | |
1312 | * Make the mappings from dna to protein | |
1313 | */ | |
1314 | // map ...GGG...TTT to GF | |
1315 | 1 | MapList map = new MapList(new int[] { 4, 6, 10, 12 }, |
1316 | new int[] | |
1317 | { 1, 2 }, 3, 1); | |
1318 | 1 | AlignedCodonFrame acf = new AlignedCodonFrame(); |
1319 | 1 | acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map); |
1320 | 1 | dna.addCodonFrame(acf); |
1321 | ||
1322 | // map aaa...ccc to KP | |
1323 | 1 | map = new MapList(new int[] { 1, 3, 7, 9 }, new int[] { 1, 2 }, 3, 1); |
1324 | 1 | acf = new AlignedCodonFrame(); |
1325 | 1 | acf.addMap(dna1.getDatasetSequence(), pep2.getDatasetSequence(), map); |
1326 | 1 | dna.addCodonFrame(acf); |
1327 | ||
1328 | // map aaa......TTT to KF | |
1329 | 1 | map = new MapList(new int[] { 1, 3, 10, 12 }, new int[] { 1, 2 }, 3, 1); |
1330 | 1 | acf = new AlignedCodonFrame(); |
1331 | 1 | acf.addMap(dna1.getDatasetSequence(), pep3.getDatasetSequence(), map); |
1332 | 1 | dna.addCodonFrame(acf); |
1333 | ||
1334 | /* | |
1335 | * execute method under test | |
1336 | */ | |
1337 | 1 | AlignmentI cdsal = AlignmentUtils |
1338 | .makeCdsAlignment(new SequenceI[] | |
1339 | { dna1 }, dna.getDataset(), null); | |
1340 | ||
1341 | /* | |
1342 | * Verify we have 3 cds sequences, mapped to pep1/2/3 respectively | |
1343 | */ | |
1344 | 1 | List<SequenceI> cds = cdsal.getSequences(); |
1345 | 1 | assertEquals(3, cds.size()); |
1346 | ||
1347 | /* | |
1348 | * verify shared, extended alignment dataset | |
1349 | */ | |
1350 | 1 | assertSame(cdsal.getDataset(), dna.getDataset()); |
1351 | 1 | assertTrue(dna.getDataset().getSequences() |
1352 | .contains(cds.get(0).getDatasetSequence())); | |
1353 | 1 | assertTrue(dna.getDataset().getSequences() |
1354 | .contains(cds.get(1).getDatasetSequence())); | |
1355 | 1 | assertTrue(dna.getDataset().getSequences() |
1356 | .contains(cds.get(2).getDatasetSequence())); | |
1357 | ||
1358 | /* | |
1359 | * verify aligned cds sequences and their xrefs | |
1360 | */ | |
1361 | 1 | SequenceI cdsSeq = cds.get(0); |
1362 | 1 | assertEquals("GGGTTT", cdsSeq.getSequenceAsString()); |
1363 | // assertEquals("dna1|A12345", cdsSeq.getName()); | |
1364 | 1 | assertEquals("CDS|dna1", cdsSeq.getName()); |
1365 | // assertEquals(1, cdsSeq.getDBRefs().length); | |
1366 | // DBRefEntry cdsRef = cdsSeq.getDBRefs()[0]; | |
1367 | // assertEquals("EMBLCDS", cdsRef.getSource()); | |
1368 | // assertEquals("2", cdsRef.getVersion()); | |
1369 | // assertEquals("A12345", cdsRef.getAccessionId()); | |
1370 | ||
1371 | 1 | cdsSeq = cds.get(1); |
1372 | 1 | assertEquals("aaaccc", cdsSeq.getSequenceAsString()); |
1373 | // assertEquals("dna1|A12346", cdsSeq.getName()); | |
1374 | 1 | assertEquals("CDS|dna1", cdsSeq.getName()); |
1375 | // assertEquals(1, cdsSeq.getDBRefs().length); | |
1376 | // cdsRef = cdsSeq.getDBRefs()[0]; | |
1377 | // assertEquals("EMBLCDS", cdsRef.getSource()); | |
1378 | // assertEquals("3", cdsRef.getVersion()); | |
1379 | // assertEquals("A12346", cdsRef.getAccessionId()); | |
1380 | ||
1381 | 1 | cdsSeq = cds.get(2); |
1382 | 1 | assertEquals("aaaTTT", cdsSeq.getSequenceAsString()); |
1383 | // assertEquals("dna1|A12347", cdsSeq.getName()); | |
1384 | 1 | assertEquals("CDS|dna1", cdsSeq.getName()); |
1385 | // assertEquals(1, cdsSeq.getDBRefs().length); | |
1386 | // cdsRef = cdsSeq.getDBRefs()[0]; | |
1387 | // assertEquals("EMBLCDS", cdsRef.getSource()); | |
1388 | // assertEquals("4", cdsRef.getVersion()); | |
1389 | // assertEquals("A12347", cdsRef.getAccessionId()); | |
1390 | ||
1391 | /* | |
1392 | * Verify there are mappings from each cds sequence to its protein product | |
1393 | * and also to its dna source | |
1394 | */ | |
1395 | 1 | List<AlignedCodonFrame> newMappings = cdsal.getCodonFrames(); |
1396 | ||
1397 | /* | |
1398 | * 6 mappings involve dna1 (to pep1/2/3, cds1/2/3) | |
1399 | */ | |
1400 | 1 | List<AlignedCodonFrame> dnaMappings = MappingUtils |
1401 | .findMappingsForSequence(dna1, newMappings); | |
1402 | 1 | assertEquals(6, dnaMappings.size()); |
1403 | ||
1404 | /* | |
1405 | * dna1 to pep1 | |
1406 | */ | |
1407 | 1 | List<AlignedCodonFrame> mappings = MappingUtils |
1408 | .findMappingsForSequence(pep1, dnaMappings); | |
1409 | 1 | assertEquals(1, mappings.size()); |
1410 | 1 | assertEquals(1, mappings.get(0).getMappings().size()); |
1411 | 1 | assertSame(pep1.getDatasetSequence(), |
1412 | mappings.get(0).getMappings().get(0).getMapping().getTo()); | |
1413 | ||
1414 | /* | |
1415 | * dna1 to cds1 | |
1416 | */ | |
1417 | 1 | List<AlignedCodonFrame> dnaToCds1Mappings = MappingUtils |
1418 | .findMappingsForSequence(cds.get(0), dnaMappings); | |
1419 | 1 | Mapping mapping = dnaToCds1Mappings.get(0).getMappings().get(0) |
1420 | .getMapping(); | |
1421 | 1 | assertSame(cds.get(0).getDatasetSequence(), mapping.getTo()); |
1422 | 1 | assertEquals("G(1) in CDS should map to G(4) in DNA", 4, |
1423 | mapping.getMap().getToPosition(1)); | |
1424 | ||
1425 | /* | |
1426 | * dna1 to pep2 | |
1427 | */ | |
1428 | 1 | mappings = MappingUtils.findMappingsForSequence(pep2, dnaMappings); |
1429 | 1 | assertEquals(1, mappings.size()); |
1430 | 1 | assertEquals(1, mappings.get(0).getMappings().size()); |
1431 | 1 | assertSame(pep2.getDatasetSequence(), |
1432 | mappings.get(0).getMappings().get(0).getMapping().getTo()); | |
1433 | ||
1434 | /* | |
1435 | * dna1 to cds2 | |
1436 | */ | |
1437 | 1 | List<AlignedCodonFrame> dnaToCds2Mappings = MappingUtils |
1438 | .findMappingsForSequence(cds.get(1), dnaMappings); | |
1439 | 1 | mapping = dnaToCds2Mappings.get(0).getMappings().get(0).getMapping(); |
1440 | 1 | assertSame(cds.get(1).getDatasetSequence(), mapping.getTo()); |
1441 | 1 | assertEquals("c(4) in CDS should map to c(7) in DNA", 7, |
1442 | mapping.getMap().getToPosition(4)); | |
1443 | ||
1444 | /* | |
1445 | * dna1 to pep3 | |
1446 | */ | |
1447 | 1 | mappings = MappingUtils.findMappingsForSequence(pep3, dnaMappings); |
1448 | 1 | assertEquals(1, mappings.size()); |
1449 | 1 | assertEquals(1, mappings.get(0).getMappings().size()); |
1450 | 1 | assertSame(pep3.getDatasetSequence(), |
1451 | mappings.get(0).getMappings().get(0).getMapping().getTo()); | |
1452 | ||
1453 | /* | |
1454 | * dna1 to cds3 | |
1455 | */ | |
1456 | 1 | List<AlignedCodonFrame> dnaToCds3Mappings = MappingUtils |
1457 | .findMappingsForSequence(cds.get(2), dnaMappings); | |
1458 | 1 | mapping = dnaToCds3Mappings.get(0).getMappings().get(0).getMapping(); |
1459 | 1 | assertSame(cds.get(2).getDatasetSequence(), mapping.getTo()); |
1460 | 1 | assertEquals("T(4) in CDS should map to T(10) in DNA", 10, |
1461 | mapping.getMap().getToPosition(4)); | |
1462 | } | |
1463 | ||
1464 | 1 | @Test(groups = { "Functional" }) |
1465 | public void testIsMappable() | |
1466 | { | |
1467 | 1 | SequenceI dna1 = new Sequence("dna1", "cgCAGtgGT"); |
1468 | 1 | SequenceI aa1 = new Sequence("aa1", "RSG"); |
1469 | 1 | AlignmentI al1 = new Alignment(new SequenceI[] { dna1 }); |
1470 | 1 | AlignmentI al2 = new Alignment(new SequenceI[] { aa1 }); |
1471 | ||
1472 | 1 | assertFalse(AlignmentUtils.isMappable(null, null)); |
1473 | 1 | assertFalse(AlignmentUtils.isMappable(al1, null)); |
1474 | 1 | assertFalse(AlignmentUtils.isMappable(null, al1)); |
1475 | 1 | assertFalse(AlignmentUtils.isMappable(al1, al1)); |
1476 | 1 | assertFalse(AlignmentUtils.isMappable(al2, al2)); |
1477 | ||
1478 | 1 | assertTrue(AlignmentUtils.isMappable(al1, al2)); |
1479 | 1 | assertTrue(AlignmentUtils.isMappable(al2, al1)); |
1480 | } | |
1481 | ||
1482 | /** | |
1483 | * Test creating a mapping when the sequences involved do not start at residue | |
1484 | * 1 | |
1485 | * | |
1486 | * @throws IOException | |
1487 | */ | |
1488 | 1 | @Test(groups = { "Functional" }) |
1489 | public void testMapCdnaToProtein_forSubsequence() throws IOException | |
1490 | { | |
1491 | 1 | SequenceI prot = new Sequence("UNIPROT|V12345", "E-I--Q", 10, 12); |
1492 | 1 | prot.createDatasetSequence(); |
1493 | ||
1494 | 1 | SequenceI dna = new Sequence("EMBL|A33333", "GAA--AT-C-CAG", 40, 48); |
1495 | 1 | dna.createDatasetSequence(); |
1496 | ||
1497 | 1 | MapList map = AlignmentUtils.mapCdnaToProtein(prot, dna); |
1498 | 1 | assertEquals(10, map.getToLowest()); |
1499 | 1 | assertEquals(12, map.getToHighest()); |
1500 | 1 | assertEquals(40, map.getFromLowest()); |
1501 | 1 | assertEquals(48, map.getFromHighest()); |
1502 | } | |
1503 | ||
1504 | /** | |
1505 | * Test for the alignSequenceAs method where we have protein mapped to protein | |
1506 | */ | |
1507 | 1 | @Test(groups = { "Functional" }) |
1508 | public void testAlignSequenceAs_mappedProteinProtein() | |
1509 | { | |
1510 | ||
1511 | 1 | SequenceI alignMe = new Sequence("Match", "MGAASEV"); |
1512 | 1 | alignMe.createDatasetSequence(); |
1513 | 1 | SequenceI alignFrom = new Sequence("Query", "LQTGYMGAASEVMFSPTRR"); |
1514 | 1 | alignFrom.createDatasetSequence(); |
1515 | ||
1516 | 1 | AlignedCodonFrame acf = new AlignedCodonFrame(); |
1517 | // this is like a domain or motif match of part of a peptide sequence | |
1518 | 1 | MapList map = new MapList(new int[] { 6, 12 }, new int[] { 1, 7 }, 1, |
1519 | 1); | |
1520 | 1 | acf.addMap(alignFrom.getDatasetSequence(), alignMe.getDatasetSequence(), |
1521 | map); | |
1522 | ||
1523 | 1 | AlignmentUtils.alignSequenceAs(alignMe, alignFrom, acf, "-", '-', true, |
1524 | true); | |
1525 | 1 | assertEquals("-----MGAASEV-------", alignMe.getSequenceAsString()); |
1526 | } | |
1527 | ||
1528 | /** | |
1529 | * Test for the alignSequenceAs method where there are trailing unmapped | |
1530 | * residues in the model sequence | |
1531 | */ | |
1532 | 1 | @Test(groups = { "Functional" }) |
1533 | public void testAlignSequenceAs_withTrailingPeptide() | |
1534 | { | |
1535 | // map first 3 codons to KPF; G is a trailing unmapped residue | |
1536 | 1 | MapList map = new MapList(new int[] { 1, 9 }, new int[] { 1, 3 }, 3, 1); |
1537 | ||
1538 | 1 | checkAlignSequenceAs("AAACCCTTT", "K-PFG", true, true, map, |
1539 | "AAA---CCCTTT---"); | |
1540 | } | |
1541 | ||
1542 | /** | |
1543 | * Tests for transferring features between mapped sequences | |
1544 | */ | |
1545 | 1 | @Test(groups = { "Functional" }) |
1546 | public void testTransferFeatures() | |
1547 | { | |
1548 | 1 | SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt"); |
1549 | 1 | SequenceI cds = new Sequence("cds/10-15", "TAGGCC"); |
1550 | ||
1551 | // no overlap | |
1552 | 1 | dna.addSequenceFeature( |
1553 | new SequenceFeature("type1", "desc1", 1, 2, 1f, null)); | |
1554 | // partial overlap - to [1, 1] | |
1555 | 1 | dna.addSequenceFeature( |
1556 | new SequenceFeature("type2", "desc2", 3, 4, 2f, null)); | |
1557 | // exact overlap - to [1, 3] | |
1558 | 1 | dna.addSequenceFeature( |
1559 | new SequenceFeature("type3", "desc3", 4, 6, 3f, null)); | |
1560 | // spanning overlap - to [2, 5] | |
1561 | 1 | dna.addSequenceFeature( |
1562 | new SequenceFeature("type4", "desc4", 5, 11, 4f, null)); | |
1563 | // exactly overlaps whole mapped range [1, 6] | |
1564 | 1 | dna.addSequenceFeature( |
1565 | new SequenceFeature("type5", "desc5", 4, 12, 5f, null)); | |
1566 | // no overlap (internal) | |
1567 | 1 | dna.addSequenceFeature( |
1568 | new SequenceFeature("type6", "desc6", 7, 9, 6f, null)); | |
1569 | // no overlap (3' end) | |
1570 | 1 | dna.addSequenceFeature( |
1571 | new SequenceFeature("type7", "desc7", 13, 15, 7f, null)); | |
1572 | // overlap (3' end) - to [6, 6] | |
1573 | 1 | dna.addSequenceFeature( |
1574 | new SequenceFeature("type8", "desc8", 12, 12, 8f, null)); | |
1575 | // extended overlap - to [6, +] | |
1576 | 1 | dna.addSequenceFeature( |
1577 | new SequenceFeature("type9", "desc9", 12, 13, 9f, null)); | |
1578 | ||
1579 | 1 | MapList map = new MapList(new int[] { 4, 6, 10, 12 }, |
1580 | new int[] | |
1581 | { 1, 6 }, 1, 1); | |
1582 | ||
1583 | /* | |
1584 | * transferFeatures() will build 'partial overlap' for regions | |
1585 | * that partially overlap 5' or 3' (start or end) of target sequence | |
1586 | */ | |
1587 | 1 | AlignmentUtils.transferFeatures(dna, cds, map, null); |
1588 | 1 | List<SequenceFeature> sfs = cds.getSequenceFeatures(); |
1589 | 1 | assertEquals(6, sfs.size()); |
1590 | ||
1591 | 1 | SequenceFeature sf = sfs.get(0); |
1592 | 1 | assertEquals("type2", sf.getType()); |
1593 | 1 | assertEquals("desc2", sf.getDescription()); |
1594 | 1 | assertEquals(2f, sf.getScore()); |
1595 | 1 | assertEquals(1, sf.getBegin()); |
1596 | 1 | assertEquals(1, sf.getEnd()); |
1597 | ||
1598 | 1 | sf = sfs.get(1); |
1599 | 1 | assertEquals("type3", sf.getType()); |
1600 | 1 | assertEquals("desc3", sf.getDescription()); |
1601 | 1 | assertEquals(3f, sf.getScore()); |
1602 | 1 | assertEquals(1, sf.getBegin()); |
1603 | 1 | assertEquals(3, sf.getEnd()); |
1604 | ||
1605 | 1 | sf = sfs.get(2); |
1606 | 1 | assertEquals("type4", sf.getType()); |
1607 | 1 | assertEquals(2, sf.getBegin()); |
1608 | 1 | assertEquals(5, sf.getEnd()); |
1609 | ||
1610 | 1 | sf = sfs.get(3); |
1611 | 1 | assertEquals("type5", sf.getType()); |
1612 | 1 | assertEquals(1, sf.getBegin()); |
1613 | 1 | assertEquals(6, sf.getEnd()); |
1614 | ||
1615 | 1 | sf = sfs.get(4); |
1616 | 1 | assertEquals("type8", sf.getType()); |
1617 | 1 | assertEquals(6, sf.getBegin()); |
1618 | 1 | assertEquals(6, sf.getEnd()); |
1619 | ||
1620 | 1 | sf = sfs.get(5); |
1621 | 1 | assertEquals("type9", sf.getType()); |
1622 | 1 | assertEquals(6, sf.getBegin()); |
1623 | 1 | assertEquals(6, sf.getEnd()); |
1624 | } | |
1625 | ||
1626 | /** | |
1627 | * Tests for transferring features between mapped sequences | |
1628 | */ | |
1629 | 1 | @Test(groups = { "Functional" }) |
1630 | public void testTransferFeatures_withOmit() | |
1631 | { | |
1632 | 1 | SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt"); |
1633 | 1 | SequenceI cds = new Sequence("cds/10-15", "TAGGCC"); |
1634 | ||
1635 | 1 | MapList map = new MapList(new int[] { 4, 6, 10, 12 }, |
1636 | new int[] | |
1637 | { 1, 6 }, 1, 1); | |
1638 | ||
1639 | // [5, 11] maps to [2, 5] | |
1640 | 1 | dna.addSequenceFeature( |
1641 | new SequenceFeature("type4", "desc4", 5, 11, 4f, null)); | |
1642 | // [4, 12] maps to [1, 6] | |
1643 | 1 | dna.addSequenceFeature( |
1644 | new SequenceFeature("type5", "desc5", 4, 12, 5f, null)); | |
1645 | // [12, 12] maps to [6, 6] | |
1646 | 1 | dna.addSequenceFeature( |
1647 | new SequenceFeature("type8", "desc8", 12, 12, 8f, null)); | |
1648 | ||
1649 | // desc4 and desc8 are the 'omit these' varargs | |
1650 | 1 | AlignmentUtils.transferFeatures(dna, cds, map, null, "type4", "type8"); |
1651 | 1 | List<SequenceFeature> sfs = cds.getSequenceFeatures(); |
1652 | 1 | assertEquals(1, sfs.size()); |
1653 | ||
1654 | 1 | SequenceFeature sf = sfs.get(0); |
1655 | 1 | assertEquals("type5", sf.getType()); |
1656 | 1 | assertEquals(1, sf.getBegin()); |
1657 | 1 | assertEquals(6, sf.getEnd()); |
1658 | } | |
1659 | ||
1660 | /** | |
1661 | * Tests for transferring features between mapped sequences | |
1662 | */ | |
1663 | 1 | @Test(groups = { "Functional" }) |
1664 | public void testTransferFeatures_withSelect() | |
1665 | { | |
1666 | 1 | SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt"); |
1667 | 1 | SequenceI cds = new Sequence("cds/10-15", "TAGGCC"); |
1668 | ||
1669 | 1 | MapList map = new MapList(new int[] { 4, 6, 10, 12 }, |
1670 | new int[] | |
1671 | { 1, 6 }, 1, 1); | |
1672 | ||
1673 | // [5, 11] maps to [2, 5] | |
1674 | 1 | dna.addSequenceFeature( |
1675 | new SequenceFeature("type4", "desc4", 5, 11, 4f, null)); | |
1676 | // [4, 12] maps to [1, 6] | |
1677 | 1 | dna.addSequenceFeature( |
1678 | new SequenceFeature("type5", "desc5", 4, 12, 5f, null)); | |
1679 | // [12, 12] maps to [6, 6] | |
1680 | 1 | dna.addSequenceFeature( |
1681 | new SequenceFeature("type8", "desc8", 12, 12, 8f, null)); | |
1682 | ||
1683 | // "type5" is the 'select this type' argument | |
1684 | 1 | AlignmentUtils.transferFeatures(dna, cds, map, "type5"); |
1685 | 1 | List<SequenceFeature> sfs = cds.getSequenceFeatures(); |
1686 | 1 | assertEquals(1, sfs.size()); |
1687 | ||
1688 | 1 | SequenceFeature sf = sfs.get(0); |
1689 | 1 | assertEquals("type5", sf.getType()); |
1690 | 1 | assertEquals(1, sf.getBegin()); |
1691 | 1 | assertEquals(6, sf.getEnd()); |
1692 | } | |
1693 | ||
1694 | /** | |
1695 | * Test the method that extracts the cds-only part of a dna alignment, for the | |
1696 | * case where the cds should be aligned to match its nucleotide sequence. | |
1697 | */ | |
1698 | 1 | @Test(groups = { "Functional" }) |
1699 | public void testMakeCdsAlignment_alternativeTranscripts() | |
1700 | { | |
1701 | 1 | SequenceI dna1 = new Sequence("dna1", "aaaGGGCC-----CTTTaaaGGG"); |
1702 | // alternative transcript of same dna skips CCC codon | |
1703 | 1 | SequenceI dna2 = new Sequence("dna2", "aaaGGGCC-----cttTaaaGGG"); |
1704 | // dna3 has no mapping (protein product) so should be ignored here | |
1705 | 1 | SequenceI dna3 = new Sequence("dna3", "aaaGGGCCCCCGGGcttTaaaGGG"); |
1706 | 1 | SequenceI pep1 = new Sequence("pep1", "GPFG"); |
1707 | 1 | SequenceI pep2 = new Sequence("pep2", "GPG"); |
1708 | 1 | dna1.createDatasetSequence(); |
1709 | 1 | dna2.createDatasetSequence(); |
1710 | 1 | dna3.createDatasetSequence(); |
1711 | 1 | pep1.createDatasetSequence(); |
1712 | 1 | pep2.createDatasetSequence(); |
1713 | ||
1714 | 1 | AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 }); |
1715 | 1 | dna.setDataset(null); |
1716 | ||
1717 | 1 | MapList map = new MapList(new int[] { 4, 12, 16, 18 }, |
1718 | new int[] | |
1719 | { 1, 4 }, 3, 1); | |
1720 | 1 | AlignedCodonFrame acf = new AlignedCodonFrame(); |
1721 | 1 | acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map); |
1722 | 1 | dna.addCodonFrame(acf); |
1723 | 1 | map = new MapList(new int[] { 4, 8, 12, 12, 16, 18 }, |
1724 | new int[] | |
1725 | { 1, 3 }, 3, 1); | |
1726 | 1 | acf = new AlignedCodonFrame(); |
1727 | 1 | acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map); |
1728 | 1 | dna.addCodonFrame(acf); |
1729 | ||
1730 | 1 | AlignmentI cds = AlignmentUtils |
1731 | .makeCdsAlignment(new SequenceI[] | |
1732 | { dna1, dna2, dna3 }, dna.getDataset(), null); | |
1733 | 1 | List<SequenceI> cdsSeqs = cds.getSequences(); |
1734 | 1 | assertEquals(2, cdsSeqs.size()); |
1735 | 1 | assertEquals("GGGCCCTTTGGG", cdsSeqs.get(0).getSequenceAsString()); |
1736 | 1 | assertEquals("GGGCCTGGG", cdsSeqs.get(1).getSequenceAsString()); |
1737 | ||
1738 | /* | |
1739 | * verify shared, extended alignment dataset | |
1740 | */ | |
1741 | 1 | assertSame(dna.getDataset(), cds.getDataset()); |
1742 | 1 | assertTrue(dna.getDataset().getSequences() |
1743 | .contains(cdsSeqs.get(0).getDatasetSequence())); | |
1744 | 1 | assertTrue(dna.getDataset().getSequences() |
1745 | .contains(cdsSeqs.get(1).getDatasetSequence())); | |
1746 | ||
1747 | /* | |
1748 | * Verify 6 mappings: dna1 to cds1, cds1 to pep1, dna1 to pep1 | |
1749 | * and the same for dna2/cds2/pep2 | |
1750 | */ | |
1751 | 1 | List<AlignedCodonFrame> mappings = cds.getCodonFrames(); |
1752 | 1 | assertEquals(6, mappings.size()); |
1753 | ||
1754 | /* | |
1755 | * 2 mappings involve pep1 | |
1756 | */ | |
1757 | 1 | List<AlignedCodonFrame> pep1Mappings = MappingUtils |
1758 | .findMappingsForSequence(pep1, mappings); | |
1759 | 1 | assertEquals(2, pep1Mappings.size()); |
1760 | ||
1761 | /* | |
1762 | * Get mapping of pep1 to cds1 and verify it | |
1763 | * maps GPFG to 1-3,4-6,7-9,10-12 | |
1764 | */ | |
1765 | 1 | List<AlignedCodonFrame> pep1CdsMappings = MappingUtils |
1766 | .findMappingsForSequence(cds.getSequenceAt(0), pep1Mappings); | |
1767 | 1 | assertEquals(1, pep1CdsMappings.size()); |
1768 | 1 | SearchResultsI sr = MappingUtils.buildSearchResults(pep1, 1, |
1769 | pep1CdsMappings); | |
1770 | 1 | assertEquals(1, sr.getResults().size()); |
1771 | 1 | SearchResultMatchI m = sr.getResults().get(0); |
1772 | 1 | assertEquals(cds.getSequenceAt(0).getDatasetSequence(), |
1773 | m.getSequence()); | |
1774 | 1 | assertEquals(1, m.getStart()); |
1775 | 1 | assertEquals(3, m.getEnd()); |
1776 | 1 | sr = MappingUtils.buildSearchResults(pep1, 2, pep1CdsMappings); |
1777 | 1 | m = sr.getResults().get(0); |
1778 | 1 | assertEquals(4, m.getStart()); |
1779 | 1 | assertEquals(6, m.getEnd()); |
1780 | 1 | sr = MappingUtils.buildSearchResults(pep1, 3, pep1CdsMappings); |
1781 | 1 | m = sr.getResults().get(0); |
1782 | 1 | assertEquals(7, m.getStart()); |
1783 | 1 | assertEquals(9, m.getEnd()); |
1784 | 1 | sr = MappingUtils.buildSearchResults(pep1, 4, pep1CdsMappings); |
1785 | 1 | m = sr.getResults().get(0); |
1786 | 1 | assertEquals(10, m.getStart()); |
1787 | 1 | assertEquals(12, m.getEnd()); |
1788 | ||
1789 | /* | |
1790 | * Get mapping of pep2 to cds2 and verify it | |
1791 | * maps GPG in pep2 to 1-3,4-6,7-9 in second CDS sequence | |
1792 | */ | |
1793 | 1 | List<AlignedCodonFrame> pep2Mappings = MappingUtils |
1794 | .findMappingsForSequence(pep2, mappings); | |
1795 | 1 | assertEquals(2, pep2Mappings.size()); |
1796 | 1 | List<AlignedCodonFrame> pep2CdsMappings = MappingUtils |
1797 | .findMappingsForSequence(cds.getSequenceAt(1), pep2Mappings); | |
1798 | 1 | assertEquals(1, pep2CdsMappings.size()); |
1799 | 1 | sr = MappingUtils.buildSearchResults(pep2, 1, pep2CdsMappings); |
1800 | 1 | assertEquals(1, sr.getResults().size()); |
1801 | 1 | m = sr.getResults().get(0); |
1802 | 1 | assertEquals(cds.getSequenceAt(1).getDatasetSequence(), |
1803 | m.getSequence()); | |
1804 | 1 | assertEquals(1, m.getStart()); |
1805 | 1 | assertEquals(3, m.getEnd()); |
1806 | 1 | sr = MappingUtils.buildSearchResults(pep2, 2, pep2CdsMappings); |
1807 | 1 | m = sr.getResults().get(0); |
1808 | 1 | assertEquals(4, m.getStart()); |
1809 | 1 | assertEquals(6, m.getEnd()); |
1810 | 1 | sr = MappingUtils.buildSearchResults(pep2, 3, pep2CdsMappings); |
1811 | 1 | m = sr.getResults().get(0); |
1812 | 1 | assertEquals(7, m.getStart()); |
1813 | 1 | assertEquals(9, m.getEnd()); |
1814 | } | |
1815 | ||
1816 | /** | |
1817 | * Test the method that realigns protein to match mapped codon alignment. | |
1818 | */ | |
1819 | 1 | @Test(groups = { "Functional" }) |
1820 | public void testAlignProteinAsDna_incompleteStartCodon() | |
1821 | { | |
1822 | // seq1: incomplete start codon (not mapped), then [3, 11] | |
1823 | 1 | SequenceI dna1 = new Sequence("Seq1", "ccAAA-TTT-GGG-"); |
1824 | // seq2 codons are [4, 5], [8, 11] | |
1825 | 1 | SequenceI dna2 = new Sequence("Seq2", "ccaAA-ttT-GGG-"); |
1826 | // seq3 incomplete start codon at 'tt' | |
1827 | 1 | SequenceI dna3 = new Sequence("Seq3", "ccaaa-ttt-GGG-"); |
1828 | 1 | AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 }); |
1829 | 1 | dna.setDataset(null); |
1830 | ||
1831 | // prot1 has 'X' for incomplete start codon (not mapped) | |
1832 | 1 | SequenceI prot1 = new Sequence("Seq1", "XKFG"); // X for incomplete start |
1833 | 1 | SequenceI prot2 = new Sequence("Seq2", "NG"); |
1834 | 1 | SequenceI prot3 = new Sequence("Seq3", "XG"); // X for incomplete start |
1835 | 1 | AlignmentI protein = new Alignment( |
1836 | new SequenceI[] | |
1837 | { prot1, prot2, prot3 }); | |
1838 | 1 | protein.setDataset(null); |
1839 | ||
1840 | // map dna1 [3, 11] to prot1 [2, 4] KFG | |
1841 | 1 | MapList map = new MapList(new int[] { 3, 11 }, new int[] { 2, 4 }, 3, |
1842 | 1); | |
1843 | 1 | AlignedCodonFrame acf = new AlignedCodonFrame(); |
1844 | 1 | acf.addMap(dna1.getDatasetSequence(), prot1.getDatasetSequence(), map); |
1845 | ||
1846 | // map dna2 [4, 5] [8, 11] to prot2 [1, 2] NG | |
1847 | 1 | map = new MapList(new int[] { 4, 5, 8, 11 }, new int[] { 1, 2 }, 3, 1); |
1848 | 1 | acf.addMap(dna2.getDatasetSequence(), prot2.getDatasetSequence(), map); |
1849 | ||
1850 | // map dna3 [9, 11] to prot3 [2, 2] G | |
1851 | 1 | map = new MapList(new int[] { 9, 11 }, new int[] { 2, 2 }, 3, 1); |
1852 | 1 | acf.addMap(dna3.getDatasetSequence(), prot3.getDatasetSequence(), map); |
1853 | ||
1854 | 1 | ArrayList<AlignedCodonFrame> acfs = new ArrayList<>(); |
1855 | 1 | acfs.add(acf); |
1856 | 1 | protein.setCodonFrames(acfs); |
1857 | ||
1858 | /* | |
1859 | * verify X is included in the aligned proteins, and placed just | |
1860 | * before the first mapped residue | |
1861 | * CCT is between CCC and TTT | |
1862 | */ | |
1863 | 1 | AlignmentUtils.alignProteinAsDna(protein, dna); |
1864 | 1 | assertEquals("XK-FG", prot1.getSequenceAsString()); |
1865 | 1 | assertEquals("--N-G", prot2.getSequenceAsString()); |
1866 | 1 | assertEquals("---XG", prot3.getSequenceAsString()); |
1867 | } | |
1868 | ||
1869 | /** | |
1870 | * Tests for the method that maps the subset of a dna sequence that has CDS | |
1871 | * (or subtype) feature - case where the start codon is incomplete. | |
1872 | */ | |
1873 | 1 | @Test(groups = "Functional") |
1874 | public void testFindCdsPositions_fivePrimeIncomplete() | |
1875 | { | |
1876 | 1 | SequenceI dnaSeq = new Sequence("dna", "aaagGGCCCaaaTTTttt"); |
1877 | 1 | dnaSeq.createDatasetSequence(); |
1878 | 1 | SequenceI ds = dnaSeq.getDatasetSequence(); |
1879 | ||
1880 | // CDS for dna 5-6 (incomplete codon), 7-9 | |
1881 | 1 | SequenceFeature sf = new SequenceFeature("CDS", "", 5, 9, 0f, null); |
1882 | 1 | sf.setPhase("2"); // skip 2 bases to start of next codon |
1883 | 1 | ds.addSequenceFeature(sf); |
1884 | // CDS for dna 13-15 | |
1885 | 1 | sf = new SequenceFeature("CDS_predicted", "", 13, 15, 0f, null); |
1886 | 1 | ds.addSequenceFeature(sf); |
1887 | ||
1888 | 1 | List<int[]> ranges = AlignmentUtils.findCdsPositions(dnaSeq); |
1889 | ||
1890 | /* | |
1891 | * check the mapping starts with the first complete codon | |
1892 | */ | |
1893 | 1 | assertEquals(6, MappingUtils.getLength(ranges)); |
1894 | 1 | assertEquals(2, ranges.size()); |
1895 | 1 | assertEquals(7, ranges.get(0)[0]); |
1896 | 1 | assertEquals(9, ranges.get(0)[1]); |
1897 | 1 | assertEquals(13, ranges.get(1)[0]); |
1898 | 1 | assertEquals(15, ranges.get(1)[1]); |
1899 | } | |
1900 | ||
1901 | /** | |
1902 | * Tests for the method that maps the subset of a dna sequence that has CDS | |
1903 | * (or subtype) feature. | |
1904 | */ | |
1905 | 1 | @Test(groups = "Functional") |
1906 | public void testFindCdsPositions() | |
1907 | { | |
1908 | 1 | SequenceI dnaSeq = new Sequence("dna", "aaaGGGcccAAATTTttt"); |
1909 | 1 | dnaSeq.createDatasetSequence(); |
1910 | 1 | SequenceI ds = dnaSeq.getDatasetSequence(); |
1911 | ||
1912 | // CDS for dna 10-12 | |
1913 | 1 | SequenceFeature sf = new SequenceFeature("CDS_predicted", "", 10, 12, |
1914 | 0f, null); | |
1915 | 1 | sf.setStrand("+"); |
1916 | 1 | ds.addSequenceFeature(sf); |
1917 | // CDS for dna 4-6 | |
1918 | 1 | sf = new SequenceFeature("CDS", "", 4, 6, 0f, null); |
1919 | 1 | sf.setStrand("+"); |
1920 | 1 | ds.addSequenceFeature(sf); |
1921 | // exon feature should be ignored here | |
1922 | 1 | sf = new SequenceFeature("exon", "", 7, 9, 0f, null); |
1923 | 1 | ds.addSequenceFeature(sf); |
1924 | ||
1925 | 1 | List<int[]> ranges = AlignmentUtils.findCdsPositions(dnaSeq); |
1926 | /* | |
1927 | * verify ranges { [4-6], [12-10] } | |
1928 | * note CDS ranges are ordered ascending even if the CDS | |
1929 | * features are not | |
1930 | */ | |
1931 | 1 | assertEquals(6, MappingUtils.getLength(ranges)); |
1932 | 1 | assertEquals(2, ranges.size()); |
1933 | 1 | assertEquals(4, ranges.get(0)[0]); |
1934 | 1 | assertEquals(6, ranges.get(0)[1]); |
1935 | 1 | assertEquals(10, ranges.get(1)[0]); |
1936 | 1 | assertEquals(12, ranges.get(1)[1]); |
1937 | } | |
1938 | ||
1939 | /** | |
1940 | * Tests for the method that maps the subset of a dna sequence that has CDS | |
1941 | * (or subtype) feature, with CDS strand = '-' (reverse) | |
1942 | */ | |
1943 | // test turned off as currently findCdsPositions is not strand-dependent | |
1944 | // left in case it comes around again... | |
1945 | 0 | @Test(groups = "Functional", enabled = false) |
1946 | public void testFindCdsPositions_reverseStrand() | |
1947 | { | |
1948 | 0 | SequenceI dnaSeq = new Sequence("dna", "aaaGGGcccAAATTTttt"); |
1949 | 0 | dnaSeq.createDatasetSequence(); |
1950 | 0 | SequenceI ds = dnaSeq.getDatasetSequence(); |
1951 | ||
1952 | // CDS for dna 4-6 | |
1953 | 0 | SequenceFeature sf = new SequenceFeature("CDS", "", 4, 6, 0f, null); |
1954 | 0 | sf.setStrand("-"); |
1955 | 0 | ds.addSequenceFeature(sf); |
1956 | // exon feature should be ignored here | |
1957 | 0 | sf = new SequenceFeature("exon", "", 7, 9, 0f, null); |
1958 | 0 | ds.addSequenceFeature(sf); |
1959 | // CDS for dna 10-12 | |
1960 | 0 | sf = new SequenceFeature("CDS_predicted", "", 10, 12, 0f, null); |
1961 | 0 | sf.setStrand("-"); |
1962 | 0 | ds.addSequenceFeature(sf); |
1963 | ||
1964 | 0 | List<int[]> ranges = AlignmentUtils.findCdsPositions(dnaSeq); |
1965 | /* | |
1966 | * verify ranges { [12-10], [6-4] } | |
1967 | */ | |
1968 | 0 | assertEquals(6, MappingUtils.getLength(ranges)); |
1969 | 0 | assertEquals(2, ranges.size()); |
1970 | 0 | assertEquals(12, ranges.get(0)[0]); |
1971 | 0 | assertEquals(10, ranges.get(0)[1]); |
1972 | 0 | assertEquals(6, ranges.get(1)[0]); |
1973 | 0 | assertEquals(4, ranges.get(1)[1]); |
1974 | } | |
1975 | ||
1976 | /** | |
1977 | * Tests for the method that maps the subset of a dna sequence that has CDS | |
1978 | * (or subtype) feature - reverse strand case where the start codon is | |
1979 | * incomplete. | |
1980 | */ | |
1981 | 0 | @Test(groups = "Functional", enabled = false) |
1982 | // test turned off as currently findCdsPositions is not strand-dependent | |
1983 | // left in case it comes around again... | |
1984 | public void testFindCdsPositions_reverseStrandThreePrimeIncomplete() | |
1985 | { | |
1986 | 0 | SequenceI dnaSeq = new Sequence("dna", "aaagGGCCCaaaTTTttt"); |
1987 | 0 | dnaSeq.createDatasetSequence(); |
1988 | 0 | SequenceI ds = dnaSeq.getDatasetSequence(); |
1989 | ||
1990 | // CDS for dna 5-9 | |
1991 | 0 | SequenceFeature sf = new SequenceFeature("CDS", "", 5, 9, 0f, null); |
1992 | 0 | sf.setStrand("-"); |
1993 | 0 | ds.addSequenceFeature(sf); |
1994 | // CDS for dna 13-15 | |
1995 | 0 | sf = new SequenceFeature("CDS_predicted", "", 13, 15, 0f, null); |
1996 | 0 | sf.setStrand("-"); |
1997 | 0 | sf.setPhase("2"); // skip 2 bases to start of next codon |
1998 | 0 | ds.addSequenceFeature(sf); |
1999 | ||
2000 | 0 | List<int[]> ranges = AlignmentUtils.findCdsPositions(dnaSeq); |
2001 | ||
2002 | /* | |
2003 | * check the mapping starts with the first complete codon | |
2004 | * expect ranges [13, 13], [9, 5] | |
2005 | */ | |
2006 | 0 | assertEquals(6, MappingUtils.getLength(ranges)); |
2007 | 0 | assertEquals(2, ranges.size()); |
2008 | 0 | assertEquals(13, ranges.get(0)[0]); |
2009 | 0 | assertEquals(13, ranges.get(0)[1]); |
2010 | 0 | assertEquals(9, ranges.get(1)[0]); |
2011 | 0 | assertEquals(5, ranges.get(1)[1]); |
2012 | } | |
2013 | ||
2014 | 1 | @Test(groups = "Functional") |
2015 | public void testAlignAs_alternateTranscriptsUngapped() | |
2016 | { | |
2017 | 1 | SequenceI dna1 = new Sequence("dna1", "cccGGGTTTaaa"); |
2018 | 1 | SequenceI dna2 = new Sequence("dna2", "CCCgggtttAAA"); |
2019 | 1 | AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 }); |
2020 | 1 | ((Alignment) dna).createDatasetAlignment(); |
2021 | 1 | SequenceI cds1 = new Sequence("cds1", "GGGTTT"); |
2022 | 1 | SequenceI cds2 = new Sequence("cds2", "CCCAAA"); |
2023 | 1 | AlignmentI cds = new Alignment(new SequenceI[] { cds1, cds2 }); |
2024 | 1 | ((Alignment) cds).createDatasetAlignment(); |
2025 | ||
2026 | 1 | AlignedCodonFrame acf = new AlignedCodonFrame(); |
2027 | 1 | MapList map = new MapList(new int[] { 4, 9 }, new int[] { 1, 6 }, 1, 1); |
2028 | 1 | acf.addMap(dna1.getDatasetSequence(), cds1.getDatasetSequence(), map); |
2029 | 1 | map = new MapList(new int[] { 1, 3, 10, 12 }, new int[] { 1, 6 }, 1, 1); |
2030 | 1 | acf.addMap(dna2.getDatasetSequence(), cds2.getDatasetSequence(), map); |
2031 | ||
2032 | /* | |
2033 | * verify CDS alignment is as: | |
2034 | * cccGGGTTTaaa (cdna) | |
2035 | * CCCgggtttAAA (cdna) | |
2036 | * | |
2037 | * ---GGGTTT--- (cds) | |
2038 | * CCC------AAA (cds) | |
2039 | */ | |
2040 | 1 | dna.addCodonFrame(acf); |
2041 | 1 | AlignmentUtils.alignAs(cds, dna); |
2042 | 1 | assertEquals("---GGGTTT", cds.getSequenceAt(0).getSequenceAsString()); |
2043 | 1 | assertEquals("CCC------AAA", |
2044 | cds.getSequenceAt(1).getSequenceAsString()); | |
2045 | } | |
2046 | ||
2047 | 1 | @Test(groups = { "Functional" }) |
2048 | public void testAddMappedPositions() | |
2049 | { | |
2050 | 1 | SequenceI from = new Sequence("dna", "ggAA-ATcc-TT-g"); |
2051 | 1 | SequenceI seq1 = new Sequence("cds", "AAATTT"); |
2052 | 1 | from.createDatasetSequence(); |
2053 | 1 | seq1.createDatasetSequence(); |
2054 | 1 | Mapping mapping = new Mapping(seq1, |
2055 | new MapList(new int[] | |
2056 | { 3, 6, 9, 10 }, new int[] { 1, 6 }, 1, 1)); | |
2057 | 1 | Map<Integer, Map<SequenceI, Character>> map = new TreeMap<>(); |
2058 | 1 | AlignmentUtils.addMappedPositions(seq1, from, mapping, map); |
2059 | ||
2060 | /* | |
2061 | * verify map has seq1 residues in columns 3,4,6,7,11,12 | |
2062 | */ | |
2063 | 1 | assertEquals(6, map.size()); |
2064 | 1 | assertEquals('A', map.get(3).get(seq1).charValue()); |
2065 | 1 | assertEquals('A', map.get(4).get(seq1).charValue()); |
2066 | 1 | assertEquals('A', map.get(6).get(seq1).charValue()); |
2067 | 1 | assertEquals('T', map.get(7).get(seq1).charValue()); |
2068 | 1 | assertEquals('T', map.get(11).get(seq1).charValue()); |
2069 | 1 | assertEquals('T', map.get(12).get(seq1).charValue()); |
2070 | ||
2071 | /* | |
2072 | * | |
2073 | */ | |
2074 | } | |
2075 | ||
2076 | /** | |
2077 | * Test case where the mapping 'from' range includes a stop codon which is | |
2078 | * absent in the 'to' range | |
2079 | */ | |
2080 | 1 | @Test(groups = { "Functional" }) |
2081 | public void testAddMappedPositions_withStopCodon() | |
2082 | { | |
2083 | 1 | SequenceI from = new Sequence("dna", "ggAA-ATcc-TT-g"); |
2084 | 1 | SequenceI seq1 = new Sequence("cds", "AAATTT"); |
2085 | 1 | from.createDatasetSequence(); |
2086 | 1 | seq1.createDatasetSequence(); |
2087 | 1 | Mapping mapping = new Mapping(seq1, |
2088 | new MapList(new int[] | |
2089 | { 3, 6, 9, 10 }, new int[] { 1, 6 }, 1, 1)); | |
2090 | 1 | Map<Integer, Map<SequenceI, Character>> map = new TreeMap<>(); |
2091 | 1 | AlignmentUtils.addMappedPositions(seq1, from, mapping, map); |
2092 | ||
2093 | /* | |
2094 | * verify map has seq1 residues in columns 3,4,6,7,11,12 | |
2095 | */ | |
2096 | 1 | assertEquals(6, map.size()); |
2097 | 1 | assertEquals('A', map.get(3).get(seq1).charValue()); |
2098 | 1 | assertEquals('A', map.get(4).get(seq1).charValue()); |
2099 | 1 | assertEquals('A', map.get(6).get(seq1).charValue()); |
2100 | 1 | assertEquals('T', map.get(7).get(seq1).charValue()); |
2101 | 1 | assertEquals('T', map.get(11).get(seq1).charValue()); |
2102 | 1 | assertEquals('T', map.get(12).get(seq1).charValue()); |
2103 | } | |
2104 | ||
2105 | /** | |
2106 | * Test for the case where the products for which we want CDS are specified. | |
2107 | * This is to represent the case where EMBL has CDS mappings to both Uniprot | |
2108 | * and EMBLCDSPROTEIN. makeCdsAlignment() should only return the mappings for | |
2109 | * the protein sequences specified. | |
2110 | */ | |
2111 | 1 | @Test(groups = { "Functional" }) |
2112 | public void testMakeCdsAlignment_filterProducts() | |
2113 | { | |
2114 | 1 | SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa"); |
2115 | 1 | SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC"); |
2116 | 1 | SequenceI pep1 = new Sequence("Uniprot|pep1", "GF"); |
2117 | 1 | SequenceI pep2 = new Sequence("Uniprot|pep2", "GFP"); |
2118 | 1 | SequenceI pep3 = new Sequence("EMBL|pep3", "GF"); |
2119 | 1 | SequenceI pep4 = new Sequence("EMBL|pep4", "GFP"); |
2120 | 1 | dna1.createDatasetSequence(); |
2121 | 1 | dna2.createDatasetSequence(); |
2122 | 1 | pep1.createDatasetSequence(); |
2123 | 1 | pep2.createDatasetSequence(); |
2124 | 1 | pep3.createDatasetSequence(); |
2125 | 1 | pep4.createDatasetSequence(); |
2126 | 1 | AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 }); |
2127 | 1 | dna.setDataset(null); |
2128 | 1 | AlignmentI emblPeptides = new Alignment(new SequenceI[] { pep3, pep4 }); |
2129 | 1 | emblPeptides.setDataset(null); |
2130 | ||
2131 | 1 | AlignedCodonFrame acf = new AlignedCodonFrame(); |
2132 | 1 | MapList map = new MapList(new int[] { 4, 6, 10, 12 }, |
2133 | new int[] | |
2134 | { 1, 2 }, 3, 1); | |
2135 | 1 | acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map); |
2136 | 1 | acf.addMap(dna1.getDatasetSequence(), pep3.getDatasetSequence(), map); |
2137 | 1 | dna.addCodonFrame(acf); |
2138 | ||
2139 | 1 | acf = new AlignedCodonFrame(); |
2140 | 1 | map = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, new int[] { 1, 3 }, |
2141 | 3, 1); | |
2142 | 1 | acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map); |
2143 | 1 | acf.addMap(dna2.getDatasetSequence(), pep4.getDatasetSequence(), map); |
2144 | 1 | dna.addCodonFrame(acf); |
2145 | ||
2146 | /* | |
2147 | * execute method under test to find CDS for EMBL peptides only | |
2148 | */ | |
2149 | 1 | AlignmentI cds = AlignmentUtils |
2150 | .makeCdsAlignment(new SequenceI[] | |
2151 | { dna1, dna2 }, dna.getDataset(), | |
2152 | emblPeptides.getSequencesArray()); | |
2153 | ||
2154 | 1 | assertEquals(2, cds.getSequences().size()); |
2155 | 1 | assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString()); |
2156 | 1 | assertEquals("GGGTTTCCC", cds.getSequenceAt(1).getSequenceAsString()); |
2157 | ||
2158 | /* | |
2159 | * verify shared, extended alignment dataset | |
2160 | */ | |
2161 | 1 | assertSame(dna.getDataset(), cds.getDataset()); |
2162 | 1 | assertTrue(dna.getDataset().getSequences() |
2163 | .contains(cds.getSequenceAt(0).getDatasetSequence())); | |
2164 | 1 | assertTrue(dna.getDataset().getSequences() |
2165 | .contains(cds.getSequenceAt(1).getDatasetSequence())); | |
2166 | ||
2167 | /* | |
2168 | * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide | |
2169 | * the mappings are on the shared alignment dataset | |
2170 | */ | |
2171 | 1 | List<AlignedCodonFrame> cdsMappings = cds.getDataset().getCodonFrames(); |
2172 | /* | |
2173 | * 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep) | |
2174 | */ | |
2175 | 1 | assertEquals(6, cdsMappings.size()); |
2176 | ||
2177 | /* | |
2178 | * verify that mapping sets for dna and cds alignments are different | |
2179 | * [not current behaviour - all mappings are on the alignment dataset] | |
2180 | */ | |
2181 | // select -> subselect type to test. | |
2182 | // Assert.assertNotSame(dna.getCodonFrames(), cds.getCodonFrames()); | |
2183 | // assertEquals(4, dna.getCodonFrames().size()); | |
2184 | // assertEquals(4, cds.getCodonFrames().size()); | |
2185 | ||
2186 | /* | |
2187 | * Two mappings involve pep3 (dna to pep3, cds to pep3) | |
2188 | * Mapping from pep3 to GGGTTT in first new exon sequence | |
2189 | */ | |
2190 | 1 | List<AlignedCodonFrame> pep3Mappings = MappingUtils |
2191 | .findMappingsForSequence(pep3, cdsMappings); | |
2192 | 1 | assertEquals(2, pep3Mappings.size()); |
2193 | 1 | List<AlignedCodonFrame> mappings = MappingUtils |
2194 | .findMappingsForSequence(cds.getSequenceAt(0), pep3Mappings); | |
2195 | 1 | assertEquals(1, mappings.size()); |
2196 | ||
2197 | // map G to GGG | |
2198 | 1 | SearchResultsI sr = MappingUtils.buildSearchResults(pep3, 1, mappings); |
2199 | 1 | assertEquals(1, sr.getResults().size()); |
2200 | 1 | SearchResultMatchI m = sr.getResults().get(0); |
2201 | 1 | assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence()); |
2202 | 1 | assertEquals(1, m.getStart()); |
2203 | 1 | assertEquals(3, m.getEnd()); |
2204 | // map F to TTT | |
2205 | 1 | sr = MappingUtils.buildSearchResults(pep3, 2, mappings); |
2206 | 1 | m = sr.getResults().get(0); |
2207 | 1 | assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence()); |
2208 | 1 | assertEquals(4, m.getStart()); |
2209 | 1 | assertEquals(6, m.getEnd()); |
2210 | ||
2211 | /* | |
2212 | * Two mappings involve pep4 (dna to pep4, cds to pep4) | |
2213 | * Verify mapping from pep4 to GGGTTTCCC in second new exon sequence | |
2214 | */ | |
2215 | 1 | List<AlignedCodonFrame> pep4Mappings = MappingUtils |
2216 | .findMappingsForSequence(pep4, cdsMappings); | |
2217 | 1 | assertEquals(2, pep4Mappings.size()); |
2218 | 1 | mappings = MappingUtils.findMappingsForSequence(cds.getSequenceAt(1), |
2219 | pep4Mappings); | |
2220 | 1 | assertEquals(1, mappings.size()); |
2221 | // map G to GGG | |
2222 | 1 | sr = MappingUtils.buildSearchResults(pep4, 1, mappings); |
2223 | 1 | assertEquals(1, sr.getResults().size()); |
2224 | 1 | m = sr.getResults().get(0); |
2225 | 1 | assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence()); |
2226 | 1 | assertEquals(1, m.getStart()); |
2227 | 1 | assertEquals(3, m.getEnd()); |
2228 | // map F to TTT | |
2229 | 1 | sr = MappingUtils.buildSearchResults(pep4, 2, mappings); |
2230 | 1 | m = sr.getResults().get(0); |
2231 | 1 | assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence()); |
2232 | 1 | assertEquals(4, m.getStart()); |
2233 | 1 | assertEquals(6, m.getEnd()); |
2234 | // map P to CCC | |
2235 | 1 | sr = MappingUtils.buildSearchResults(pep4, 3, mappings); |
2236 | 1 | m = sr.getResults().get(0); |
2237 | 1 | assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence()); |
2238 | 1 | assertEquals(7, m.getStart()); |
2239 | 1 | assertEquals(9, m.getEnd()); |
2240 | } | |
2241 | ||
2242 | /** | |
2243 | * Test the method that just copies aligned sequences, provided all sequences | |
2244 | * to be aligned share the aligned sequence's dataset | |
2245 | */ | |
2246 | 1 | @Test(groups = "Functional") |
2247 | public void testAlignAsSameSequences() | |
2248 | { | |
2249 | 1 | SequenceI dna1 = new Sequence("dna1", "cccGGGTTTaaa"); |
2250 | 1 | SequenceI dna2 = new Sequence("dna2", "CCCgggtttAAA"); |
2251 | 1 | AlignmentI al1 = new Alignment(new SequenceI[] { dna1, dna2 }); |
2252 | 1 | ((Alignment) al1).createDatasetAlignment(); |
2253 | ||
2254 | 1 | SequenceI dna3 = new Sequence(dna1); |
2255 | 1 | SequenceI dna4 = new Sequence(dna2); |
2256 | 1 | assertSame(dna3.getDatasetSequence(), dna1.getDatasetSequence()); |
2257 | 1 | assertSame(dna4.getDatasetSequence(), dna2.getDatasetSequence()); |
2258 | 1 | String seq1 = "-cc-GG-GT-TT--aaa"; |
2259 | 1 | dna3.setSequence(seq1); |
2260 | 1 | String seq2 = "C--C-Cgg--gtt-tAA-A-"; |
2261 | 1 | dna4.setSequence(seq2); |
2262 | 1 | AlignmentI al2 = new Alignment(new SequenceI[] { dna3, dna4 }); |
2263 | 1 | ((Alignment) al2).createDatasetAlignment(); |
2264 | ||
2265 | /* | |
2266 | * alignment removes gapped columns (two internal, two trailing) | |
2267 | */ | |
2268 | 1 | assertTrue(AlignmentUtils.alignAsSameSequences(al1, al2)); |
2269 | 1 | String aligned1 = "-cc-GG-GTTT-aaa"; |
2270 | 1 | assertEquals(aligned1, al1.getSequenceAt(0).getSequenceAsString()); |
2271 | 1 | String aligned2 = "C--C-Cgg-gtttAAA"; |
2272 | 1 | assertEquals(aligned2, al1.getSequenceAt(1).getSequenceAsString()); |
2273 | ||
2274 | /* | |
2275 | * add another sequence to 'aligned' - should still succeed, since | |
2276 | * unaligned sequences still share a dataset with aligned sequences | |
2277 | */ | |
2278 | 1 | SequenceI dna5 = new Sequence("dna5", "CCCgggtttAAA"); |
2279 | 1 | dna5.createDatasetSequence(); |
2280 | 1 | al2.addSequence(dna5); |
2281 | 1 | assertTrue(AlignmentUtils.alignAsSameSequences(al1, al2)); |
2282 | 1 | assertEquals(aligned1, al1.getSequenceAt(0).getSequenceAsString()); |
2283 | 1 | assertEquals(aligned2, al1.getSequenceAt(1).getSequenceAsString()); |
2284 | ||
2285 | /* | |
2286 | * add another sequence to 'unaligned' - should fail, since now not | |
2287 | * all unaligned sequences share a dataset with aligned sequences | |
2288 | */ | |
2289 | 1 | SequenceI dna6 = new Sequence("dna6", "CCCgggtttAAA"); |
2290 | 1 | dna6.createDatasetSequence(); |
2291 | 1 | al1.addSequence(dna6); |
2292 | // JAL-2110 JBP Comment: what's the use case for this behaviour ? | |
2293 | 1 | assertFalse(AlignmentUtils.alignAsSameSequences(al1, al2)); |
2294 | } | |
2295 | ||
2296 | 1 | @Test(groups = "Functional") |
2297 | public void testAlignAsSameSequencesMultipleSubSeq() | |
2298 | { | |
2299 | 1 | SequenceI dna1 = new Sequence("dna1", "cccGGGTTTaaa"); |
2300 | 1 | SequenceI dna2 = new Sequence("dna2", "CCCgggtttAAA"); |
2301 | 1 | SequenceI as1 = dna1.deriveSequence(); // cccGGGTTTaaa/1-12 |
2302 | 1 | SequenceI as2 = dna1.deriveSequence().getSubSequence(3, 7); // GGGT/4-7 |
2303 | 1 | SequenceI as3 = dna2.deriveSequence(); // CCCgggtttAAA/1-12 |
2304 | 1 | as1.insertCharAt(6, 5, '-'); |
2305 | 1 | assertEquals("cccGGG-----TTTaaa", as1.getSequenceAsString()); |
2306 | 1 | as2.insertCharAt(6, 5, '-'); |
2307 | 1 | assertEquals("GGGT-----", as2.getSequenceAsString()); |
2308 | 1 | as3.insertCharAt(3, 5, '-'); |
2309 | 1 | assertEquals("CCC-----gggtttAAA", as3.getSequenceAsString()); |
2310 | 1 | AlignmentI aligned = new Alignment(new SequenceI[] { as1, as2, as3 }); |
2311 | ||
2312 | // why do we need to cast this still ? | |
2313 | 1 | ((Alignment) aligned).createDatasetAlignment(); |
2314 | 1 | SequenceI uas1 = dna1.deriveSequence(); |
2315 | 1 | SequenceI uas2 = dna1.deriveSequence().getSubSequence(3, 7); |
2316 | 1 | SequenceI uas3 = dna2.deriveSequence(); |
2317 | 1 | AlignmentI tobealigned = new Alignment( |
2318 | new SequenceI[] | |
2319 | { uas1, uas2, uas3 }); | |
2320 | 1 | ((Alignment) tobealigned).createDatasetAlignment(); |
2321 | ||
2322 | /* | |
2323 | * alignAs lines up dataset sequences and removes empty columns (two) | |
2324 | */ | |
2325 | 1 | assertTrue(AlignmentUtils.alignAsSameSequences(tobealigned, aligned)); |
2326 | 1 | assertEquals("cccGGG---TTTaaa", uas1.getSequenceAsString()); |
2327 | 1 | assertEquals("GGGT", uas2.getSequenceAsString()); |
2328 | 1 | assertEquals("CCC---gggtttAAA", uas3.getSequenceAsString()); |
2329 | } | |
2330 | ||
2331 | 1 | @Test(groups = { "Functional" }) |
2332 | public void testTransferGeneLoci() | |
2333 | { | |
2334 | 1 | SequenceI from = new Sequence("transcript", |
2335 | "aaacccgggTTTAAACCCGGGtttaaacccgggttt"); | |
2336 | 1 | SequenceI to = new Sequence("CDS", "TTTAAACCCGGG"); |
2337 | 1 | MapList map = new MapList(new int[] { 1, 12 }, new int[] { 10, 21 }, 1, |
2338 | 1); | |
2339 | ||
2340 | /* | |
2341 | * first with nothing to transfer | |
2342 | */ | |
2343 | 1 | AlignmentUtils.transferGeneLoci(from, map, to); |
2344 | 1 | assertNull(to.getGeneLoci()); |
2345 | ||
2346 | /* | |
2347 | * next with gene loci set on 'from' sequence | |
2348 | */ | |
2349 | 1 | int[] exons = new int[] { 100, 105, 155, 164, 210, 229 }; |
2350 | 1 | MapList geneMap = new MapList(new int[] { 1, 36 }, exons, 1, 1); |
2351 | 1 | from.setGeneLoci("human", "GRCh38", "7", geneMap); |
2352 | 1 | AlignmentUtils.transferGeneLoci(from, map, to); |
2353 | ||
2354 | 1 | GeneLociI toLoci = to.getGeneLoci(); |
2355 | 1 | assertNotNull(toLoci); |
2356 | // DBRefEntry constructor upper-cases 'source' | |
2357 | 1 | assertEquals("HUMAN", toLoci.getSpeciesId()); |
2358 | 1 | assertEquals("GRCh38", toLoci.getAssemblyId()); |
2359 | 1 | assertEquals("7", toLoci.getChromosomeId()); |
2360 | ||
2361 | /* | |
2362 | * transcript 'exons' are 1-6, 7-16, 17-36 | |
2363 | * CDS 1:12 is transcript 10-21 | |
2364 | * transcript 'CDS' is 10-16, 17-21 | |
2365 | * which is 'gene' 158-164, 210-214 | |
2366 | */ | |
2367 | 1 | MapList toMap = toLoci.getMapping(); |
2368 | 1 | assertEquals(1, toMap.getFromRanges().size()); |
2369 | 1 | assertEquals(2, toMap.getFromRanges().get(0).length); |
2370 | 1 | assertEquals(1, toMap.getFromRanges().get(0)[0]); |
2371 | 1 | assertEquals(12, toMap.getFromRanges().get(0)[1]); |
2372 | 1 | assertEquals(2, toMap.getToRanges().size()); |
2373 | 1 | assertEquals(2, toMap.getToRanges().get(0).length); |
2374 | 1 | assertEquals(158, toMap.getToRanges().get(0)[0]); |
2375 | 1 | assertEquals(164, toMap.getToRanges().get(0)[1]); |
2376 | 1 | assertEquals(210, toMap.getToRanges().get(1)[0]); |
2377 | 1 | assertEquals(214, toMap.getToRanges().get(1)[1]); |
2378 | // or summarised as (but toString might change in future): | |
2379 | 1 | assertEquals("[ [1, 12] ] 1:1 to [ [158, 164] [210, 214] ]", |
2380 | toMap.toString()); | |
2381 | ||
2382 | /* | |
2383 | * an existing value is not overridden | |
2384 | */ | |
2385 | 1 | geneMap = new MapList(new int[] { 1, 36 }, new int[] { 36, 1 }, 1, 1); |
2386 | 1 | from.setGeneLoci("inhuman", "GRCh37", "6", geneMap); |
2387 | 1 | AlignmentUtils.transferGeneLoci(from, map, to); |
2388 | 1 | assertEquals("GRCh38", toLoci.getAssemblyId()); |
2389 | 1 | assertEquals("7", toLoci.getChromosomeId()); |
2390 | 1 | toMap = toLoci.getMapping(); |
2391 | 1 | assertEquals("[ [1, 12] ] 1:1 to [ [158, 164] [210, 214] ]", |
2392 | toMap.toString()); | |
2393 | } | |
2394 | ||
2395 | /** | |
2396 | * Tests for the method that maps nucleotide to protein based on CDS features | |
2397 | */ | |
2398 | 1 | @Test(groups = "Functional") |
2399 | public void testMapCdsToProtein() | |
2400 | { | |
2401 | 1 | SequenceI peptide = new Sequence("pep", "KLQ"); |
2402 | ||
2403 | /* | |
2404 | * Case 1: CDS 3 times length of peptide | |
2405 | * NB method only checks lengths match, not translation | |
2406 | */ | |
2407 | 1 | SequenceI dna = new Sequence("dna", "AACGacgtCTCCT"); |
2408 | 1 | dna.createDatasetSequence(); |
2409 | 1 | dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null)); |
2410 | 1 | dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 13, null)); |
2411 | 1 | MapList ml = AlignmentUtils.mapCdsToProtein(dna, peptide); |
2412 | 1 | assertEquals(3, ml.getFromRatio()); |
2413 | 1 | assertEquals(1, ml.getToRatio()); |
2414 | 1 | assertEquals("[[1, 3]]", |
2415 | Arrays.deepToString(ml.getToRanges().toArray())); | |
2416 | 1 | assertEquals("[[1, 4], [9, 13]]", |
2417 | Arrays.deepToString(ml.getFromRanges().toArray())); | |
2418 | ||
2419 | /* | |
2420 | * Case 2: CDS 3 times length of peptide + stop codon | |
2421 | * (note code does not currently check trailing codon is a stop codon) | |
2422 | */ | |
2423 | 1 | dna = new Sequence("dna", "AACGacgtCTCCTCCC"); |
2424 | 1 | dna.createDatasetSequence(); |
2425 | 1 | dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null)); |
2426 | 1 | dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 16, null)); |
2427 | 1 | ml = AlignmentUtils.mapCdsToProtein(dna, peptide); |
2428 | 1 | assertEquals(3, ml.getFromRatio()); |
2429 | 1 | assertEquals(1, ml.getToRatio()); |
2430 | 1 | assertEquals("[[1, 3]]", |
2431 | Arrays.deepToString(ml.getToRanges().toArray())); | |
2432 | 1 | assertEquals("[[1, 4], [9, 13]]", |
2433 | Arrays.deepToString(ml.getFromRanges().toArray())); | |
2434 | ||
2435 | /* | |
2436 | * Case 3: CDS longer than 3 * peptide + stop codon - no mapping is made | |
2437 | */ | |
2438 | 1 | dna = new Sequence("dna", "AACGacgtCTCCTTGATCA"); |
2439 | 1 | dna.createDatasetSequence(); |
2440 | 1 | dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null)); |
2441 | 1 | dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 19, null)); |
2442 | 1 | ml = AlignmentUtils.mapCdsToProtein(dna, peptide); |
2443 | 1 | assertNull(ml); |
2444 | ||
2445 | /* | |
2446 | * Case 4: CDS shorter than 3 * peptide - no mapping is made | |
2447 | */ | |
2448 | 1 | dna = new Sequence("dna", "AACGacgtCTCC"); |
2449 | 1 | dna.createDatasetSequence(); |
2450 | 1 | dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null)); |
2451 | 1 | dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 12, null)); |
2452 | 1 | ml = AlignmentUtils.mapCdsToProtein(dna, peptide); |
2453 | 1 | assertNull(ml); |
2454 | ||
2455 | /* | |
2456 | * Case 5: CDS 3 times length of peptide + part codon - mapping is truncated | |
2457 | */ | |
2458 | 1 | dna = new Sequence("dna", "AACGacgtCTCCTTG"); |
2459 | 1 | dna.createDatasetSequence(); |
2460 | 1 | dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null)); |
2461 | 1 | dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 15, null)); |
2462 | 1 | ml = AlignmentUtils.mapCdsToProtein(dna, peptide); |
2463 | 1 | assertEquals(3, ml.getFromRatio()); |
2464 | 1 | assertEquals(1, ml.getToRatio()); |
2465 | 1 | assertEquals("[[1, 3]]", |
2466 | Arrays.deepToString(ml.getToRanges().toArray())); | |
2467 | 1 | assertEquals("[[1, 4], [9, 13]]", |
2468 | Arrays.deepToString(ml.getFromRanges().toArray())); | |
2469 | ||
2470 | /* | |
2471 | * Case 6: incomplete start codon corresponding to X in peptide | |
2472 | */ | |
2473 | 1 | dna = new Sequence("dna", "ACGacgtCTCCTTGG"); |
2474 | 1 | dna.createDatasetSequence(); |
2475 | 1 | SequenceFeature sf = new SequenceFeature("CDS", "", 1, 3, null); |
2476 | 1 | sf.setPhase("2"); // skip 2 positions (AC) to start of next codon (GCT) |
2477 | 1 | dna.addSequenceFeature(sf); |
2478 | 1 | dna.addSequenceFeature(new SequenceFeature("CDS", "", 8, 15, null)); |
2479 | 1 | peptide = new Sequence("pep", "XLQ"); |
2480 | 1 | ml = AlignmentUtils.mapCdsToProtein(dna, peptide); |
2481 | 1 | assertEquals("[[2, 3]]", |
2482 | Arrays.deepToString(ml.getToRanges().toArray())); | |
2483 | 1 | assertEquals("[[3, 3], [8, 12]]", |
2484 | Arrays.deepToString(ml.getFromRanges().toArray())); | |
2485 | } | |
2486 | ||
2487 | /** | |
2488 | * Tests for the method that locates the CDS sequence that has a mapping to | |
2489 | * the given protein. That is, given a transcript-to-peptide mapping, find the | |
2490 | * cds-to-peptide mapping that relates to both, and return the CDS sequence. | |
2491 | */ | |
2492 | 1 | @Test(groups = "Functional") |
2493 | public void testFindCdsForProtein() | |
2494 | { | |
2495 | 1 | List<AlignedCodonFrame> mappings = new ArrayList<>(); |
2496 | 1 | AlignedCodonFrame acf1 = new AlignedCodonFrame(); |
2497 | 1 | mappings.add(acf1); |
2498 | ||
2499 | 1 | SequenceI dna1 = new Sequence("dna1", "cgatATcgGCTATCTATGacg"); |
2500 | 1 | dna1.createDatasetSequence(); |
2501 | ||
2502 | // NB we currently exclude STOP codon from CDS sequences | |
2503 | // the test would need to change if this changes in future | |
2504 | 1 | SequenceI cds1 = new Sequence("cds1", "ATGCTATCT"); |
2505 | 1 | cds1.createDatasetSequence(); |
2506 | ||
2507 | 1 | SequenceI pep1 = new Sequence("pep1", "MLS"); |
2508 | 1 | pep1.createDatasetSequence(); |
2509 | 1 | List<AlignedCodonFrame> seqMappings = new ArrayList<>(); |
2510 | 1 | MapList mapList = new MapList(new int[] { 5, 6, 9, 15 }, |
2511 | new int[] | |
2512 | { 1, 3 }, 3, 1); | |
2513 | 1 | Mapping dnaToPeptide = new Mapping(pep1.getDatasetSequence(), mapList); |
2514 | ||
2515 | // add dna to peptide mapping | |
2516 | 1 | seqMappings.add(acf1); |
2517 | 1 | acf1.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), |
2518 | mapList); | |
2519 | ||
2520 | /* | |
2521 | * first case - no dna-to-CDS mapping exists - search fails | |
2522 | */ | |
2523 | 1 | SequenceI seq = AlignmentUtils.findCdsForProtein(mappings, dna1, |
2524 | seqMappings, dnaToPeptide); | |
2525 | 1 | assertNull(seq); |
2526 | ||
2527 | /* | |
2528 | * second case - CDS-to-peptide mapping exists but no dna-to-CDS | |
2529 | * - search fails | |
2530 | */ | |
2531 | // todo this test fails if the mapping is added to acf1, not acf2 | |
2532 | // need to tidy up use of lists of mappings in AlignedCodonFrame | |
2533 | 1 | AlignedCodonFrame acf2 = new AlignedCodonFrame(); |
2534 | 1 | mappings.add(acf2); |
2535 | 1 | MapList cdsToPeptideMapping = new MapList(new int[] { 1, 9 }, |
2536 | new int[] | |
2537 | { 1, 3 }, 3, 1); | |
2538 | 1 | acf2.addMap(cds1.getDatasetSequence(), pep1.getDatasetSequence(), |
2539 | cdsToPeptideMapping); | |
2540 | 1 | assertNull(AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings, |
2541 | dnaToPeptide)); | |
2542 | ||
2543 | /* | |
2544 | * third case - add dna-to-CDS mapping - CDS is now found! | |
2545 | */ | |
2546 | 1 | MapList dnaToCdsMapping = new MapList(new int[] { 5, 6, 9, 15 }, |
2547 | new int[] | |
2548 | { 1, 9 }, 1, 1); | |
2549 | 1 | acf1.addMap(dna1.getDatasetSequence(), cds1.getDatasetSequence(), |
2550 | dnaToCdsMapping); | |
2551 | 1 | seq = AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings, |
2552 | dnaToPeptide); | |
2553 | 1 | assertSame(seq, cds1.getDatasetSequence()); |
2554 | } | |
2555 | ||
2556 | /** | |
2557 | * Tests for the method that locates the CDS sequence that has a mapping to | |
2558 | * the given protein. That is, given a transcript-to-peptide mapping, find the | |
2559 | * cds-to-peptide mapping that relates to both, and return the CDS sequence. | |
2560 | * This test is for the case where transcript and CDS are the same length. | |
2561 | */ | |
2562 | 1 | @Test(groups = "Functional") |
2563 | public void testFindCdsForProtein_noUTR() | |
2564 | { | |
2565 | 1 | List<AlignedCodonFrame> mappings = new ArrayList<>(); |
2566 | 1 | AlignedCodonFrame acf1 = new AlignedCodonFrame(); |
2567 | 1 | mappings.add(acf1); |
2568 | ||
2569 | 1 | SequenceI dna1 = new Sequence("dna1", "ATGCTATCTTAA"); |
2570 | 1 | dna1.createDatasetSequence(); |
2571 | ||
2572 | // NB we currently exclude STOP codon from CDS sequences | |
2573 | // the test would need to change if this changes in future | |
2574 | 1 | SequenceI cds1 = new Sequence("cds1", "ATGCTATCT"); |
2575 | 1 | cds1.createDatasetSequence(); |
2576 | ||
2577 | 1 | SequenceI pep1 = new Sequence("pep1", "MLS"); |
2578 | 1 | pep1.createDatasetSequence(); |
2579 | 1 | List<AlignedCodonFrame> seqMappings = new ArrayList<>(); |
2580 | 1 | MapList mapList = new MapList(new int[] { 1, 9 }, new int[] { 1, 3 }, 3, |
2581 | 1); | |
2582 | 1 | Mapping dnaToPeptide = new Mapping(pep1.getDatasetSequence(), mapList); |
2583 | ||
2584 | // add dna to peptide mapping | |
2585 | 1 | seqMappings.add(acf1); |
2586 | 1 | acf1.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), |
2587 | mapList); | |
2588 | ||
2589 | /* | |
2590 | * first case - transcript lacks CDS features - it appears to be | |
2591 | * the CDS sequence and is returned | |
2592 | */ | |
2593 | 1 | SequenceI seq = AlignmentUtils.findCdsForProtein(mappings, dna1, |
2594 | seqMappings, dnaToPeptide); | |
2595 | 1 | assertSame(seq, dna1.getDatasetSequence()); |
2596 | ||
2597 | /* | |
2598 | * second case - transcript has CDS feature - this means it is | |
2599 | * not returned as a match for CDS (CDS sequences don't have CDS features) | |
2600 | */ | |
2601 | 1 | dna1.addSequenceFeature( |
2602 | new SequenceFeature(SequenceOntologyI.CDS, "cds", 1, 12, null)); | |
2603 | 1 | seq = AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings, |
2604 | dnaToPeptide); | |
2605 | 1 | assertNull(seq); |
2606 | ||
2607 | /* | |
2608 | * third case - CDS-to-peptide mapping exists but no dna-to-CDS | |
2609 | * - search fails | |
2610 | */ | |
2611 | // todo this test fails if the mapping is added to acf1, not acf2 | |
2612 | // need to tidy up use of lists of mappings in AlignedCodonFrame | |
2613 | 1 | AlignedCodonFrame acf2 = new AlignedCodonFrame(); |
2614 | 1 | mappings.add(acf2); |
2615 | 1 | MapList cdsToPeptideMapping = new MapList(new int[] { 1, 9 }, |
2616 | new int[] | |
2617 | { 1, 3 }, 3, 1); | |
2618 | 1 | acf2.addMap(cds1.getDatasetSequence(), pep1.getDatasetSequence(), |
2619 | cdsToPeptideMapping); | |
2620 | 1 | assertNull(AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings, |
2621 | dnaToPeptide)); | |
2622 | ||
2623 | /* | |
2624 | * fourth case - add dna-to-CDS mapping - CDS is now found! | |
2625 | */ | |
2626 | 1 | MapList dnaToCdsMapping = new MapList(new int[] { 1, 9 }, |
2627 | new int[] | |
2628 | { 1, 9 }, 1, 1); | |
2629 | 1 | acf1.addMap(dna1.getDatasetSequence(), cds1.getDatasetSequence(), |
2630 | dnaToCdsMapping); | |
2631 | 1 | seq = AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings, |
2632 | dnaToPeptide); | |
2633 | 1 | assertSame(seq, cds1.getDatasetSequence()); |
2634 | } | |
2635 | ||
2636 | 1 | @Test(groups = "Functional") |
2637 | public void testAddReferenceAnnotations() | |
2638 | { | |
2639 | 1 | SequenceI longseq = new Sequence("longA", "ASDASDASDASDAASDASDASDASDA"); |
2640 | 1 | Annotation[] aa = new Annotation[longseq.getLength()]; |
2641 | ||
2642 | 27 | for (int p = 0; p < aa.length; p++) |
2643 | { | |
2644 | 26 | aa[p] = new Annotation("P", "pos " + (p + 1), (char) 0, |
2645 | (float) p + 1); | |
2646 | } | |
2647 | 1 | AlignmentAnnotation refAnnot = new AlignmentAnnotation("LongSeqAnnot", |
2648 | "Annotations", aa); | |
2649 | 1 | refAnnot.setCalcId("Test"); |
2650 | 1 | longseq.addAlignmentAnnotation(refAnnot); |
2651 | 1 | verifyExpectedSequenceAnnotation(refAnnot); |
2652 | ||
2653 | 1 | Alignment ourAl = new Alignment( |
2654 | new SequenceI[] | |
2655 | { longseq.getSubSequence(5, 10), | |
2656 | longseq.getSubSequence(7, 12) }); | |
2657 | 1 | ourAl.createDatasetAlignment(); |
2658 | ||
2659 | // transfer annotation | |
2660 | 1 | SortedMap<String, String> tipEntries = new TreeMap<>(); |
2661 | 1 | Map<SequenceI, List<AlignmentAnnotation>> candidates = new LinkedHashMap<>(); |
2662 | ||
2663 | 1 | AlignmentUtils.findAddableReferenceAnnotations(ourAl.getSequences(), |
2664 | tipEntries, candidates, ourAl); | |
2665 | 1 | AlignmentUtils.addReferenceAnnotations(candidates, ourAl, null); |
2666 | ||
2667 | 1 | assertNotNull(ourAl.getAlignmentAnnotation()); |
2668 | 1 | assertEquals(ourAl.getAlignmentAnnotation().length, 2); |
2669 | ||
2670 | 1 | for (AlignmentAnnotation alan : ourAl.getAlignmentAnnotation()) |
2671 | { | |
2672 | 2 | verifyExpectedSequenceAnnotation(alan); |
2673 | } | |
2674 | // Everything above works for 2.11.3 and 2.11.2.x. | |
2675 | // now simulate copy/paste to new alignment | |
2676 | 1 | SequenceI[] newSeqAl = new SequenceI[2]; |
2677 | // copy sequences but no annotation | |
2678 | 1 | newSeqAl[0] = new Sequence(ourAl.getSequenceAt(0), |
2679 | ourAl.getSequenceAt(0).getAnnotation()); | |
2680 | 1 | newSeqAl[1] = new Sequence(ourAl.getSequenceAt(1), |
2681 | ourAl.getSequenceAt(1).getAnnotation()); | |
2682 | ||
2683 | 1 | Alignment newAl = new Alignment(newSeqAl); |
2684 | // delete annotation | |
2685 | 1 | for (SequenceI sq : newAl.getSequences()) |
2686 | { | |
2687 | 2 | sq.setAlignmentAnnotation(new AlignmentAnnotation[0]); |
2688 | } | |
2689 | // JAL-4182 scenario test | |
2690 | 1 | SequenceGroup sg = new SequenceGroup(Arrays.asList(newSeqAl)); |
2691 | 1 | sg.setStartRes(0); |
2692 | 1 | sg.setEndRes(newAl.getWidth()); |
2693 | 1 | AlignmentUtils.addReferenceAnnotationTo(newAl, newSeqAl[0], |
2694 | newSeqAl[0].getDatasetSequence().getAnnotation()[0], sg); | |
2695 | 1 | AlignmentUtils.addReferenceAnnotationTo(newAl, newSeqAl[1], |
2696 | newSeqAl[1].getDatasetSequence().getAnnotation()[0], sg); | |
2697 | 1 | for (AlignmentAnnotation alan : newAl.getAlignmentAnnotation()) |
2698 | { | |
2699 | 2 | verifyExpectedSequenceAnnotation(alan); |
2700 | } | |
2701 | } | |
2702 | ||
2703 | /** | |
2704 | * helper - tests annotation is mapped to position it was originally created | |
2705 | * for | |
2706 | * | |
2707 | * @param alan | |
2708 | */ | |
2709 | 5 | private void verifyExpectedSequenceAnnotation(AlignmentAnnotation alan) |
2710 | { | |
2711 | 51 | for (int c = 0; c < alan.annotations.length; c++) |
2712 | { | |
2713 | 46 | Annotation a = alan.annotations[c]; |
2714 | 46 | if (a != null) |
2715 | { | |
2716 | 46 | assertEquals("Misaligned annotation at " + c, |
2717 | (float) alan.sequenceRef.findPosition(c), a.value); | |
2718 | } | |
2719 | else | |
2720 | { | |
2721 | 0 | assertTrue("Unexpected Null at position " + c, |
2722 | c >= alan.sequenceRef.getLength() | |
2723 | || Comparison.isGap(alan.sequenceRef.getCharAt(c))); | |
2724 | } | |
2725 | } | |
2726 | } | |
2727 | ||
2728 | 1 | @Test(groups = "Functional") |
2729 | public void testAddReferenceContactMap() | |
2730 | { | |
2731 | 1 | SequenceI sq = new Sequence("a", "SSSQ"); |
2732 | 1 | ContactMatrixI cm = new SeqDistanceContactMatrix(4); |
2733 | 1 | AlignmentAnnotation cm_aan = sq.addContactList(cm); |
2734 | 1 | cm_aan.description = cm_aan.description + " cm1"; |
2735 | 1 | SequenceI dssq = sq.createDatasetSequence(); |
2736 | ||
2737 | // remove annotation on our non-dataset sequence | |
2738 | 1 | sq.removeAlignmentAnnotation(sq.getAnnotation()[0]); |
2739 | // test transfer | |
2740 | 1 | Alignment al = new Alignment(new SequenceI[] { sq }); |
2741 | 1 | SortedMap<String, String> tipEntries = new TreeMap<>(); |
2742 | 1 | Map<SequenceI, List<AlignmentAnnotation>> candidates = new LinkedHashMap<>(); |
2743 | ||
2744 | 1 | AlignmentUtils.findAddableReferenceAnnotations(al.getSequences(), |
2745 | tipEntries, candidates, al); | |
2746 | 1 | AlignmentUtils.addReferenceAnnotations(candidates, al, null); |
2747 | 1 | assertTrue("No contact map annotation transferred", |
2748 | al.getAlignmentAnnotation() != null | |
2749 | && al.getAlignmentAnnotation().length == 1); | |
2750 | 1 | AlignmentAnnotation alan = al.findAnnotations(sq, null, cm_aan.label) |
2751 | .iterator().next(); | |
2752 | 1 | ContactMatrixI t_cm = al.getContactMatrixFor(alan); |
2753 | 1 | assertNotNull("No contact map for the transferred annotation row.", |
2754 | t_cm); | |
2755 | 1 | assertTrue(t_cm instanceof SeqDistanceContactMatrix); |
2756 | 1 | assertTrue(((SeqDistanceContactMatrix) t_cm).hasReferenceSeq()); |
2757 | ||
2758 | 1 | ContactListI cl = al.getContactListFor(alan, 1); |
2759 | 1 | assertNotNull( |
2760 | "No contact matrix recovered after reference annotation transfer", | |
2761 | cl); | |
2762 | // semantics of sequence associated contact list is slightly tricky - column | |
2763 | // 3 in alignment should have data | |
2764 | 1 | cl = al.getContactListFor(alan, 3); |
2765 | 1 | assertNotNull( |
2766 | "Contact matrix should have data for last position in sequence", | |
2767 | cl); | |
2768 | ||
2769 | 1 | ContactMatrixI cm2 = new SeqDistanceContactMatrix(4); |
2770 | 1 | dssq.addContactList(cm2); |
2771 | 1 | tipEntries = new TreeMap<>(); |
2772 | 1 | candidates = new LinkedHashMap<>(); |
2773 | ||
2774 | 1 | AlignmentUtils.findAddableReferenceAnnotations(al.getSequences(), |
2775 | tipEntries, candidates, al); | |
2776 | 1 | AlignmentUtils.addReferenceAnnotations(candidates, al, null); |
2777 | 1 | assertTrue("Expected two contact map annotation transferred", |
2778 | al.getAlignmentAnnotation() != null | |
2779 | && al.getAlignmentAnnotation().length == 2); | |
2780 | ||
2781 | } | |
2782 | ||
2783 | 5 | @Test( |
2784 | groups = "Functional", | |
2785 | dataProvider = "SecondaryStructureAnnotations") | |
2786 | public void testSecondaryStructurePresentAndSources( | |
2787 | AlignmentAnnotation[] annotations, boolean expectedSSPresent, | |
2788 | ArrayList<String> expectedSSSources) | |
2789 | { | |
2790 | 5 | Assert.assertEquals(expectedSSPresent, |
2791 | AlignmentUtils.isSecondaryStructurePresent(annotations)); | |
2792 | } | |
2793 | ||
2794 | 1 | @DataProvider(name = "SecondaryStructureAnnotations") |
2795 | public static Object[][] provideSecondaryStructureAnnotations() | |
2796 | { | |
2797 | 1 | AlignmentAnnotation ann1 = new AlignmentAnnotation( |
2798 | "Secondary Structure", "Secondary Structure", | |
2799 | new Annotation[] {}); | |
2800 | 1 | AlignmentAnnotation ann2 = new AlignmentAnnotation("jnetpred", |
2801 | "jnetpred", new Annotation[] {}); | |
2802 | 1 | AlignmentAnnotation ann3 = new AlignmentAnnotation("Temp", "Temp", |
2803 | new Annotation[] {}); | |
2804 | 1 | AlignmentAnnotation ann4 = new AlignmentAnnotation("Temp", "Temp", |
2805 | new Annotation[] {}); | |
2806 | ||
2807 | 1 | List<String> ssSources1 = new ArrayList<>( |
2808 | Arrays.asList("3D Structures")); | |
2809 | 1 | List<String> ssSources2 = new ArrayList<>(Arrays.asList("JPred")); |
2810 | 1 | List<String> ssSources3 = new ArrayList<>( |
2811 | Arrays.asList("3D Structures", "JPred")); | |
2812 | 1 | List<String> ssSources4 = new ArrayList<>(); |
2813 | ||
2814 | 1 | return new Object[][] { |
2815 | { new AlignmentAnnotation[] | |
2816 | { ann1, ann3, ann4 }, true, ssSources1 }, | |
2817 | { new AlignmentAnnotation[] | |
2818 | { ann2, ann3, ann4 }, true, ssSources2 }, | |
2819 | { new AlignmentAnnotation[] | |
2820 | { ann3, ann4 }, false, ssSources4 }, | |
2821 | { new AlignmentAnnotation[] {}, false, ssSources4 }, | |
2822 | { new AlignmentAnnotation[] | |
2823 | { ann1, ann2, ann3, ann4 }, true, ssSources3 } }; | |
2824 | } | |
2825 | ||
2826 | 0 | @Test(dataProvider = "SecondaryStructureAnnotationColours") |
2827 | public void testSecondaryStructureAnnotationColour(char symbol, | |
2828 | Color expectedColor) | |
2829 | { | |
2830 | 0 | Color actualColor = AlignmentUtils |
2831 | .getSecondaryStructureAnnotationColour(symbol); | |
2832 | 0 | Assert.assertEquals(actualColor, expectedColor); |
2833 | } | |
2834 | ||
2835 | 0 | @DataProvider(name = "SecondaryStructureAnnotationColours") |
2836 | public static Object[][] provideSecondaryStructureAnnotationColours() | |
2837 | { | |
2838 | 0 | return new Object[][] { { 'C', Color.gray }, { 'E', Color.green }, |
2839 | { 'H', Color.red }, | |
2840 | { '-', Color.white } }; | |
2841 | } | |
2842 | ||
2843 | 0 | @Test(dataProvider = "SSAnnotationPresence") |
2844 | public void testIsSSAnnotationPresent( | |
2845 | Map<SequenceI, List<AlignmentAnnotation>> annotations, | |
2846 | boolean expectedPresence) | |
2847 | { | |
2848 | 0 | boolean actualPresence = AlignmentUtils |
2849 | .isSSAnnotationPresent(annotations); | |
2850 | 0 | Assert.assertEquals(actualPresence, expectedPresence); |
2851 | } | |
2852 | ||
2853 | 0 | @DataProvider(name = "SSAnnotationPresence") |
2854 | public static Object[][] provideSSAnnotationPresence() | |
2855 | { | |
2856 | 0 | Map<SequenceI, List<AlignmentAnnotation>> annotations1 = new HashMap<>(); |
2857 | 0 | SequenceI seq1 = new Sequence("Seq1", "ASD---ASD---ASD", 37, 45); |
2858 | 0 | List<AlignmentAnnotation> annotationsList1 = new ArrayList<>(); |
2859 | 0 | annotationsList1.add(new AlignmentAnnotation("Secondary Structure", |
2860 | "Secondary Structure", new Annotation[] {})); | |
2861 | 0 | annotations1.put(seq1, annotationsList1); // Annotation present secondary |
2862 | // structure for seq1 | |
2863 | ||
2864 | 0 | Map<SequenceI, List<AlignmentAnnotation>> annotations2 = new HashMap<>(); |
2865 | 0 | SequenceI seq2 = new Sequence("Seq2", "ASD---ASD------", 37, 42); |
2866 | 0 | List<AlignmentAnnotation> annotationsList2 = new ArrayList<>(); |
2867 | 0 | annotationsList2.add(new AlignmentAnnotation("Other Annotation", |
2868 | "Other Annotation", new Annotation[] {})); | |
2869 | 0 | annotations2.put(seq2, annotationsList2); // Annotation not related to any |
2870 | // of secondary structure for seq2 | |
2871 | ||
2872 | 0 | Map<SequenceI, List<AlignmentAnnotation>> annotations3 = new HashMap<>(); |
2873 | // Empty annotation map | |
2874 | ||
2875 | 0 | Map<SequenceI, List<AlignmentAnnotation>> annotations4 = new HashMap<>(); |
2876 | 0 | SequenceI seq4 = new Sequence("Seq4", "ASD---ASD---AS-", 37, 44); |
2877 | 0 | List<AlignmentAnnotation> annotationsList4 = new ArrayList<>(); |
2878 | 0 | annotationsList4.add(new AlignmentAnnotation("jnetpred", "jnetpred", |
2879 | new Annotation[] {})); | |
2880 | 0 | annotations4.put(seq4, annotationsList4); // Annotation present from JPred |
2881 | // for seq4 | |
2882 | ||
2883 | 0 | return new Object[][] { { annotations1, true }, // Annotations present |
2884 | // secondary structure | |
2885 | // present | |
2886 | { annotations2, false }, // No annotations related to any of the | |
2887 | // secondary structure present | |
2888 | { annotations3, false }, // Empty annotation map | |
2889 | { annotations4, true }, // Annotations present from JPred secondary | |
2890 | // structure present | |
2891 | }; | |
2892 | } | |
2893 | ||
2894 | 0 | @DataProvider(name = "SSSourceFromAnnotationDescription") |
2895 | public static Object[][] provideSSSourceFromAnnotationDescription() | |
2896 | { | |
2897 | 0 | Map<SequenceI, List<AlignmentAnnotation>> annotations1 = new HashMap<>(); |
2898 | 0 | SequenceI seq1 = new Sequence("Seq1", "ASD---ASD---ASD", 37, 45); |
2899 | 0 | List<AlignmentAnnotation> annotationsList1 = new ArrayList<>(); |
2900 | 0 | annotationsList1.add(new AlignmentAnnotation("jnetpred", "JPred Output", |
2901 | new Annotation[] {})); | |
2902 | 0 | annotations1.put(seq1, annotationsList1); // Annotation present from JPred |
2903 | // for seq1 | |
2904 | ||
2905 | 0 | Map<SequenceI, List<AlignmentAnnotation>> annotations2 = new HashMap<>(); |
2906 | 0 | SequenceI seq2 = new Sequence("Seq2", "ASD---ASD------", 37, 42); |
2907 | 0 | List<AlignmentAnnotation> annotationsList2 = new ArrayList<>(); |
2908 | 0 | annotationsList2.add(new AlignmentAnnotation("Secondary Structure", |
2909 | "Secondary Structure for af-q43517-f1A", new Annotation[] {})); | |
2910 | 0 | annotations2.put(seq2, annotationsList2); // Annotation present secondary |
2911 | // structure from Alphafold for | |
2912 | // seq2 | |
2913 | ||
2914 | 0 | Map<SequenceI, List<AlignmentAnnotation>> annotations3 = new HashMap<>(); |
2915 | // Empty annotation map | |
2916 | ||
2917 | 0 | Map<SequenceI, List<AlignmentAnnotation>> annotations4 = new HashMap<>(); |
2918 | 0 | SequenceI seq4 = new Sequence("Seq4", "ASD---ASD---AS-", 37, 44); |
2919 | 0 | List<AlignmentAnnotation> annotationsList4 = new ArrayList<>(); |
2920 | 0 | annotationsList4.add(new AlignmentAnnotation("Secondary Structure", |
2921 | "Secondary Structure for 4zhpA", new Annotation[] {})); | |
2922 | 0 | annotations4.put(seq4, annotationsList4); // Annotation present secondary |
2923 | // structure from pdb for seq4 | |
2924 | ||
2925 | 0 | Map<SequenceI, List<AlignmentAnnotation>> annotations5 = new HashMap<>(); |
2926 | 0 | SequenceI seq5 = new Sequence("Seq5", "ASD---ASD---AS-", 37, 44); |
2927 | 0 | List<AlignmentAnnotation> annotationsList5 = new ArrayList<>(); |
2928 | 0 | annotationsList5.add(new AlignmentAnnotation("Secondary Structure", |
2929 | "Secondary Structure for p09911_54-147__3a7wzn.1.p3502557454997462030P", | |
2930 | new Annotation[] {})); | |
2931 | 0 | annotations5.put(seq5, annotationsList5); // Annotation present secondary |
2932 | // structure from Swiss model for | |
2933 | // seq5 | |
2934 | ||
2935 | // JPred Output - JPred | |
2936 | // Secondary Structure for af-q43517-f1A - Alphafold | |
2937 | // Secondary Structure for 4zhpA - Experimental | |
2938 | // Secondary Structure for p09911_54-147__3a7wzn.1.p3502557454997462030P - | |
2939 | // Swiss Model | |
2940 | ||
2941 | 0 | return new Object[][] { { annotations1, "JPred" }, |
2942 | { annotations2, "Alphafold" }, | |
2943 | { annotations3, null }, | |
2944 | { annotations4, "PDB" }, | |
2945 | { annotations5, "Swiss Model" } }; | |
2946 | } | |
2947 | ||
2948 | } |