Clover icon

jalviewX

  1. Project Clover database Wed Oct 31 2018 15:13:58 GMT
  2. Package jalview.analysis

File AlignmentUtilsTests.java

 

Code metrics

8
1,379
46
1
2,950
1,860
50
0.04
29.98
46
1.09

Classes

Class Line # Actions
AlignmentUtilsTests 66 1,379 50 92
0.93579993.6%
 

Contributing tests

This file is covered by 39 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.analysis;
22   
23    import static org.testng.AssertJUnit.assertEquals;
24    import static org.testng.AssertJUnit.assertFalse;
25    import static org.testng.AssertJUnit.assertNotNull;
26    import static org.testng.AssertJUnit.assertNull;
27    import static org.testng.AssertJUnit.assertSame;
28    import static org.testng.AssertJUnit.assertTrue;
29   
30    import jalview.analysis.AlignmentUtils.DnaVariant;
31    import jalview.datamodel.AlignedCodonFrame;
32    import jalview.datamodel.Alignment;
33    import jalview.datamodel.AlignmentAnnotation;
34    import jalview.datamodel.AlignmentI;
35    import jalview.datamodel.Annotation;
36    import jalview.datamodel.DBRefEntry;
37    import jalview.datamodel.GeneLociI;
38    import jalview.datamodel.Mapping;
39    import jalview.datamodel.SearchResultMatchI;
40    import jalview.datamodel.SearchResultsI;
41    import jalview.datamodel.Sequence;
42    import jalview.datamodel.SequenceFeature;
43    import jalview.datamodel.SequenceI;
44    import jalview.datamodel.features.SequenceFeatures;
45    import jalview.gui.JvOptionPane;
46    import jalview.io.AppletFormatAdapter;
47    import jalview.io.DataSourceType;
48    import jalview.io.FileFormat;
49    import jalview.io.FileFormatI;
50    import jalview.io.FormatAdapter;
51    import jalview.io.gff.SequenceOntologyI;
52    import jalview.util.MapList;
53    import jalview.util.MappingUtils;
54   
55    import java.io.IOException;
56    import java.util.ArrayList;
57    import java.util.Arrays;
58    import java.util.LinkedHashMap;
59    import java.util.List;
60    import java.util.Map;
61    import java.util.TreeMap;
62   
63    import org.testng.annotations.BeforeClass;
64    import org.testng.annotations.Test;
65   
 
66    public class AlignmentUtilsTests
67    {
68    private static Sequence ts = new Sequence("short",
69    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklm");
70   
 
71  1 toggle @BeforeClass(alwaysRun = true)
72    public void setUpJvOptionPane()
73    {
74  1 JvOptionPane.setInteractiveMode(false);
75  1 JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
76    }
77   
 
78  1 toggle @Test(groups = { "Functional" })
79    public void testExpandContext()
80    {
81  1 AlignmentI al = new Alignment(new Sequence[] {});
82  6 for (int i = 4; i < 14; i += 2)
83    {
84  5 SequenceI s1 = ts.deriveSequence().getSubSequence(i, i + 7);
85  5 al.addSequence(s1);
86    }
87  1 System.out.println(new AppletFormatAdapter().formatSequences(
88    FileFormat.Clustal,
89    al, true));
90  27 for (int flnk = -1; flnk < 25; flnk++)
91    {
92  26 AlignmentI exp = AlignmentUtils.expandContext(al, flnk);
93  26 System.out.println("\nFlank size: " + flnk);
94  26 System.out.println(new AppletFormatAdapter().formatSequences(
95    FileFormat.Clustal, exp, true));
96  26 if (flnk == -1)
97    {
98    /*
99    * Full expansion to complete sequences
100    */
101  1 for (SequenceI sq : exp.getSequences())
102    {
103  5 String ung = sq.getSequenceAsString().replaceAll("-+", "");
104  5 final String errorMsg = "Flanking sequence not the same as original dataset sequence.\n"
105    + ung
106    + "\n"
107    + sq.getDatasetSequence().getSequenceAsString();
108  5 assertTrue(errorMsg, ung.equalsIgnoreCase(sq.getDatasetSequence()
109    .getSequenceAsString()));
110    }
111    }
112  25 else if (flnk == 24)
113    {
114    /*
115    * Last sequence is fully expanded, others have leading gaps to match
116    */
117  1 assertTrue(exp.getSequenceAt(4).getSequenceAsString()
118    .startsWith("abc"));
119  1 assertTrue(exp.getSequenceAt(3).getSequenceAsString()
120    .startsWith("--abc"));
121  1 assertTrue(exp.getSequenceAt(2).getSequenceAsString()
122    .startsWith("----abc"));
123  1 assertTrue(exp.getSequenceAt(1).getSequenceAsString()
124    .startsWith("------abc"));
125  1 assertTrue(exp.getSequenceAt(0).getSequenceAsString()
126    .startsWith("--------abc"));
127    }
128    }
129    }
130   
131    /**
132    * Test that annotations are correctly adjusted by expandContext
133    */
 
134  1 toggle @Test(groups = { "Functional" })
135    public void testExpandContext_annotation()
136    {
137  1 AlignmentI al = new Alignment(new Sequence[] {});
138  1 SequenceI ds = new Sequence("Seq1", "ABCDEFGHI");
139    // subsequence DEF:
140  1 SequenceI seq1 = ds.deriveSequence().getSubSequence(3, 6);
141  1 al.addSequence(seq1);
142   
143    /*
144    * Annotate DEF with 4/5/6 respectively
145    */
146  1 Annotation[] anns = new Annotation[] { new Annotation(4),
147    new Annotation(5), new Annotation(6) };
148  1 AlignmentAnnotation ann = new AlignmentAnnotation("SS",
149    "secondary structure", anns);
150  1 seq1.addAlignmentAnnotation(ann);
151   
152    /*
153    * The annotations array should match aligned positions
154    */
155  1 assertEquals(3, ann.annotations.length);
156  1 assertEquals(4, ann.annotations[0].value, 0.001);
157  1 assertEquals(5, ann.annotations[1].value, 0.001);
158  1 assertEquals(6, ann.annotations[2].value, 0.001);
159   
160    /*
161    * Check annotation to sequence position mappings before expanding the
162    * sequence; these are set up in Sequence.addAlignmentAnnotation ->
163    * Annotation.setSequenceRef -> createSequenceMappings
164    */
165  1 assertNull(ann.getAnnotationForPosition(1));
166  1 assertNull(ann.getAnnotationForPosition(2));
167  1 assertNull(ann.getAnnotationForPosition(3));
168  1 assertEquals(4, ann.getAnnotationForPosition(4).value, 0.001);
169  1 assertEquals(5, ann.getAnnotationForPosition(5).value, 0.001);
170  1 assertEquals(6, ann.getAnnotationForPosition(6).value, 0.001);
171  1 assertNull(ann.getAnnotationForPosition(7));
172  1 assertNull(ann.getAnnotationForPosition(8));
173  1 assertNull(ann.getAnnotationForPosition(9));
174   
175    /*
176    * Expand the subsequence to the full sequence abcDEFghi
177    */
178  1 AlignmentI expanded = AlignmentUtils.expandContext(al, -1);
179  1 assertEquals("abcDEFghi", expanded.getSequenceAt(0)
180    .getSequenceAsString());
181   
182    /*
183    * Confirm the alignment and sequence have the same SS annotation,
184    * referencing the expanded sequence
185    */
186  1 ann = expanded.getSequenceAt(0).getAnnotation()[0];
187  1 assertSame(ann, expanded.getAlignmentAnnotation()[0]);
188  1 assertSame(expanded.getSequenceAt(0), ann.sequenceRef);
189   
190    /*
191    * The annotations array should have null values except for annotated
192    * positions
193    */
194  1 assertNull(ann.annotations[0]);
195  1 assertNull(ann.annotations[1]);
196  1 assertNull(ann.annotations[2]);
197  1 assertEquals(4, ann.annotations[3].value, 0.001);
198  1 assertEquals(5, ann.annotations[4].value, 0.001);
199  1 assertEquals(6, ann.annotations[5].value, 0.001);
200  1 assertNull(ann.annotations[6]);
201  1 assertNull(ann.annotations[7]);
202  1 assertNull(ann.annotations[8]);
203   
204    /*
205    * sequence position mappings should be unchanged
206    */
207  1 assertNull(ann.getAnnotationForPosition(1));
208  1 assertNull(ann.getAnnotationForPosition(2));
209  1 assertNull(ann.getAnnotationForPosition(3));
210  1 assertEquals(4, ann.getAnnotationForPosition(4).value, 0.001);
211  1 assertEquals(5, ann.getAnnotationForPosition(5).value, 0.001);
212  1 assertEquals(6, ann.getAnnotationForPosition(6).value, 0.001);
213  1 assertNull(ann.getAnnotationForPosition(7));
214  1 assertNull(ann.getAnnotationForPosition(8));
215  1 assertNull(ann.getAnnotationForPosition(9));
216    }
217   
218    /**
219    * Test method that returns a map of lists of sequences by sequence name.
220    *
221    * @throws IOException
222    */
 
223  1 toggle @Test(groups = { "Functional" })
224    public void testGetSequencesByName() throws IOException
225    {
226  1 final String data = ">Seq1Name\nKQYL\n" + ">Seq2Name\nRFPW\n"
227    + ">Seq1Name\nABCD\n";
228  1 AlignmentI al = loadAlignment(data, FileFormat.Fasta);
229  1 Map<String, List<SequenceI>> map = AlignmentUtils
230    .getSequencesByName(al);
231  1 assertEquals(2, map.keySet().size());
232  1 assertEquals(2, map.get("Seq1Name").size());
233  1 assertEquals("KQYL", map.get("Seq1Name").get(0).getSequenceAsString());
234  1 assertEquals("ABCD", map.get("Seq1Name").get(1).getSequenceAsString());
235  1 assertEquals(1, map.get("Seq2Name").size());
236  1 assertEquals("RFPW", map.get("Seq2Name").get(0).getSequenceAsString());
237    }
238   
239    /**
240    * Helper method to load an alignment and ensure dataset sequences are set up.
241    *
242    * @param data
243    * @param format
244    * TODO
245    * @return
246    * @throws IOException
247    */
 
248  1 toggle protected AlignmentI loadAlignment(final String data, FileFormatI format)
249    throws IOException
250    {
251  1 AlignmentI a = new FormatAdapter().readFile(data,
252    DataSourceType.PASTE, format);
253  1 a.setDataset(null);
254  1 return a;
255    }
256   
257    /**
258    * Test mapping of protein to cDNA, for the case where we have no sequence
259    * cross-references, so mappings are made first-served 1-1 where sequences
260    * translate.
261    *
262    * @throws IOException
263    */
 
264  1 toggle @Test(groups = { "Functional" })
265    public void testMapProteinAlignmentToCdna_noXrefs() throws IOException
266    {
267  1 List<SequenceI> protseqs = new ArrayList<>();
268  1 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
269  1 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
270  1 protseqs.add(new Sequence("UNIPROT|V12347", "SAR"));
271  1 AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3]));
272  1 protein.setDataset(null);
273   
274  1 List<SequenceI> dnaseqs = new ArrayList<>();
275  1 dnaseqs.add(new Sequence("EMBL|A11111", "TCAGCACGC")); // = SAR
276  1 dnaseqs.add(new Sequence("EMBL|A22222", "GAGATACAA")); // = EIQ
277  1 dnaseqs.add(new Sequence("EMBL|A33333", "GAAATCCAG")); // = EIQ
278  1 dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG")); // = EIQ
279  1 AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[4]));
280  1 cdna.setDataset(null);
281   
282  1 assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));
283   
284    // 3 mappings made, each from 1 to 1 sequence
285  1 assertEquals(3, protein.getCodonFrames().size());
286  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
287  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
288  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size());
289   
290    // V12345 mapped to A22222
291  1 AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0))
292    .get(0);
293  1 assertEquals(1, acf.getdnaSeqs().length);
294  1 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
295    acf.getdnaSeqs()[0]);
296  1 Mapping[] protMappings = acf.getProtMappings();
297  1 assertEquals(1, protMappings.length);
298  1 MapList mapList = protMappings[0].getMap();
299  1 assertEquals(3, mapList.getFromRatio());
300  1 assertEquals(1, mapList.getToRatio());
301  1 assertTrue(Arrays.equals(new int[] { 1, 9 }, mapList.getFromRanges()
302    .get(0)));
303  1 assertEquals(1, mapList.getFromRanges().size());
304  1 assertTrue(Arrays.equals(new int[] { 1, 3 },
305    mapList.getToRanges().get(0)));
306  1 assertEquals(1, mapList.getToRanges().size());
307   
308    // V12346 mapped to A33333
309  1 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
310  1 assertEquals(1, acf.getdnaSeqs().length);
311  1 assertEquals(cdna.getSequenceAt(2).getDatasetSequence(),
312    acf.getdnaSeqs()[0]);
313   
314    // V12347 mapped to A11111
315  1 acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0);
316  1 assertEquals(1, acf.getdnaSeqs().length);
317  1 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
318    acf.getdnaSeqs()[0]);
319   
320    // no mapping involving the 'extra' A44444
321  1 assertTrue(protein.getCodonFrame(cdna.getSequenceAt(3)).isEmpty());
322    }
323   
324    /**
325    * Test for the alignSequenceAs method that takes two sequences and a mapping.
326    */
 
327  1 toggle @Test(groups = { "Functional" })
328    public void testAlignSequenceAs_withMapping_noIntrons()
329    {
330  1 MapList map = new MapList(new int[] { 1, 6 }, new int[] { 1, 2 }, 3, 1);
331   
332    /*
333    * No existing gaps in dna:
334    */
335  1 checkAlignSequenceAs("GGGAAA", "-A-L-", false, false, map,
336    "---GGG---AAA");
337   
338    /*
339    * Now introduce gaps in dna but ignore them when realigning.
340    */
341  1 checkAlignSequenceAs("-G-G-G-A-A-A-", "-A-L-", false, false, map,
342    "---GGG---AAA");
343   
344    /*
345    * Now include gaps in dna when realigning. First retaining 'mapped' gaps
346    * only, i.e. those within the exon region.
347    */
348  1 checkAlignSequenceAs("-G-G--G-A--A-A-", "-A-L-", true, false, map,
349    "---G-G--G---A--A-A");
350   
351    /*
352    * Include all gaps in dna when realigning (within and without the exon
353    * region). The leading gap, and the gaps between codons, are subsumed by
354    * the protein alignment gap.
355    */
356  1 checkAlignSequenceAs("-G-GG--AA-A---", "-A-L-", true, true, map,
357    "---G-GG---AA-A---");
358   
359    /*
360    * Include only unmapped gaps in dna when realigning (outside the exon
361    * region). The leading gap, and the gaps between codons, are subsumed by
362    * the protein alignment gap.
363    */
364  1 checkAlignSequenceAs("-G-GG--AA-A-", "-A-L-", false, true, map,
365    "---GGG---AAA---");
366    }
367   
368    /**
369    * Test for the alignSequenceAs method that takes two sequences and a mapping.
370    */
 
371  1 toggle @Test(groups = { "Functional" })
372    public void testAlignSequenceAs_withMapping_withIntrons()
373    {
374    /*
375    * Exons at codon 2 (AAA) and 4 (TTT)
376    */
377  1 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
378    new int[] { 1, 2 }, 3, 1);
379   
380    /*
381    * Simple case: no gaps in dna
382    */
383  1 checkAlignSequenceAs("GGGAAACCCTTTGGG", "--A-L-", false, false, map,
384    "GGG---AAACCCTTTGGG");
385   
386    /*
387    * Add gaps to dna - but ignore when realigning.
388    */
389  1 checkAlignSequenceAs("-G-G-G--A--A---AC-CC-T-TT-GG-G-", "--A-L-",
390    false, false, map, "GGG---AAACCCTTTGGG");
391   
392    /*
393    * Add gaps to dna - include within exons only when realigning.
394    */
395  1 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-",
396    true, false, map, "GGG---A--A---ACCCT-TTGGG");
397   
398    /*
399    * Include gaps outside exons only when realigning.
400    */
401  1 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-",
402    false, true, map, "-G-G-GAAAC-CCTTT-GG-G-");
403   
404    /*
405    * Include gaps following first intron if we are 'preserving mapped gaps'
406    */
407  1 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-",
408    true, true, map, "-G-G-G--A--A---A-C-CC-T-TT-GG-G-");
409   
410    /*
411    * Include all gaps in dna when realigning.
412    */
413  1 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-",
414    true, true, map, "-G-G-G--A--A---A-C-CC-T-TT-GG-G-");
415    }
416   
417    /**
418    * Test for the case where not all of the protein sequence is mapped to cDNA.
419    */
 
420  1 toggle @Test(groups = { "Functional" })
421    public void testAlignSequenceAs_withMapping_withUnmappedProtein()
422    {
423    /*
424    * Exons at codon 2 (AAA) and 4 (TTT) mapped to A and P
425    */
426  1 final MapList map = new MapList(new int[] { 4, 6, 10, 12 }, new int[] {
427    1, 1, 3, 3 }, 3, 1);
428   
429    /*
430    * -L- 'aligns' ccc------
431    */
432  1 checkAlignSequenceAs("gggAAAcccTTTggg", "-A-L-P-", false, false, map,
433    "gggAAAccc------TTTggg");
434    }
435   
436    /**
437    * Helper method that performs and verifies the method under test.
438    *
439    * @param alignee
440    * the sequence to be realigned
441    * @param alignModel
442    * the sequence whose alignment is to be copied
443    * @param preserveMappedGaps
444    * @param preserveUnmappedGaps
445    * @param map
446    * @param expected
447    */
 
448  14 toggle protected void checkAlignSequenceAs(final String alignee,
449    final String alignModel, final boolean preserveMappedGaps,
450    final boolean preserveUnmappedGaps, MapList map,
451    final String expected)
452    {
453  14 SequenceI alignMe = new Sequence("Seq1", alignee);
454  14 alignMe.createDatasetSequence();
455  14 SequenceI alignFrom = new Sequence("Seq2", alignModel);
456  14 alignFrom.createDatasetSequence();
457  14 AlignedCodonFrame acf = new AlignedCodonFrame();
458  14 acf.addMap(alignMe.getDatasetSequence(),
459    alignFrom.getDatasetSequence(), map);
460   
461  14 AlignmentUtils.alignSequenceAs(alignMe, alignFrom, acf, "---", '-',
462    preserveMappedGaps, preserveUnmappedGaps);
463  14 assertEquals(expected, alignMe.getSequenceAsString());
464    }
465   
466    /**
467    * Test for the alignSequenceAs method where we preserve gaps in introns only.
468    */
 
469  1 toggle @Test(groups = { "Functional" })
470    public void testAlignSequenceAs_keepIntronGapsOnly()
471    {
472   
473    /*
474    * Intron GGGAAA followed by exon CCCTTT
475    */
476  1 MapList map = new MapList(new int[] { 7, 12 }, new int[] { 1, 2 }, 3, 1);
477   
478  1 checkAlignSequenceAs("GG-G-AA-A-C-CC-T-TT", "AL", false, true, map,
479    "GG-G-AA-ACCCTTT");
480    }
481   
482    /**
483    * Test the method that realigns protein to match mapped codon alignment.
484    */
 
485  1 toggle @Test(groups = { "Functional" })
486    public void testAlignProteinAsDna()
487    {
488    // seq1 codons are [1,2,3] [4,5,6] [7,8,9] [10,11,12]
489  1 SequenceI dna1 = new Sequence("Seq1", "TGCCATTACCAG-");
490    // seq2 codons are [1,3,4] [5,6,7] [8,9,10] [11,12,13]
491  1 SequenceI dna2 = new Sequence("Seq2", "T-GCCATTACCAG");
492    // seq3 codons are [1,2,3] [4,5,7] [8,9,10] [11,12,13]
493  1 SequenceI dna3 = new Sequence("Seq3", "TGCCA-TTACCAG");
494  1 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 });
495  1 dna.setDataset(null);
496   
497    // protein alignment will be realigned like dna
498  1 SequenceI prot1 = new Sequence("Seq1", "CHYQ");
499  1 SequenceI prot2 = new Sequence("Seq2", "CHYQ");
500  1 SequenceI prot3 = new Sequence("Seq3", "CHYQ");
501  1 SequenceI prot4 = new Sequence("Seq4", "R-QSV"); // unmapped, unchanged
502  1 AlignmentI protein = new Alignment(new SequenceI[] { prot1, prot2,
503    prot3, prot4 });
504  1 protein.setDataset(null);
505   
506  1 MapList map = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 }, 3, 1);
507  1 AlignedCodonFrame acf = new AlignedCodonFrame();
508  1 acf.addMap(dna1.getDatasetSequence(), prot1.getDatasetSequence(), map);
509  1 acf.addMap(dna2.getDatasetSequence(), prot2.getDatasetSequence(), map);
510  1 acf.addMap(dna3.getDatasetSequence(), prot3.getDatasetSequence(), map);
511  1 ArrayList<AlignedCodonFrame> acfs = new ArrayList<>();
512  1 acfs.add(acf);
513  1 protein.setCodonFrames(acfs);
514   
515    /*
516    * Translated codon order is [1,2,3] [1,3,4] [4,5,6] [4,5,7] [5,6,7] [7,8,9]
517    * [8,9,10] [10,11,12] [11,12,13]
518    */
519  1 AlignmentUtils.alignProteinAsDna(protein, dna);
520  1 assertEquals("C-H--Y-Q-", prot1.getSequenceAsString());
521  1 assertEquals("-C--H-Y-Q", prot2.getSequenceAsString());
522  1 assertEquals("C--H--Y-Q", prot3.getSequenceAsString());
523  1 assertEquals("R-QSV", prot4.getSequenceAsString());
524    }
525   
526    /**
527    * Test the method that tests whether a CDNA sequence translates to a protein
528    * sequence
529    */
 
530  1 toggle @Test(groups = { "Functional" })
531    public void testTranslatesAs()
532    {
533    // null arguments check
534  1 assertFalse(AlignmentUtils.translatesAs(null, 0, null));
535  1 assertFalse(AlignmentUtils.translatesAs(new char[] { 't' }, 0, null));
536  1 assertFalse(AlignmentUtils.translatesAs(null, 0, new char[] { 'a' }));
537   
538    // straight translation
539  1 assertTrue(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), 0,
540    "FPKG".toCharArray()));
541    // with extra start codon (not in protein)
542  1 assertTrue(AlignmentUtils.translatesAs("atgtttcccaaaggg".toCharArray(),
543    3, "FPKG".toCharArray()));
544    // with stop codon1 (not in protein)
545  1 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtaa".toCharArray(),
546    0, "FPKG".toCharArray()));
547    // with stop codon1 (in protein as *)
548  1 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtaa".toCharArray(),
549    0, "FPKG*".toCharArray()));
550    // with stop codon2 (not in protein)
551  1 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtag".toCharArray(),
552    0, "FPKG".toCharArray()));
553    // with stop codon3 (not in protein)
554  1 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtga".toCharArray(),
555    0, "FPKG".toCharArray()));
556    // with start and stop codon1
557  1 assertTrue(AlignmentUtils.translatesAs(
558    "atgtttcccaaagggtaa".toCharArray(), 3, "FPKG".toCharArray()));
559    // with start and stop codon1 (in protein as *)
560  1 assertTrue(AlignmentUtils.translatesAs(
561    "atgtttcccaaagggtaa".toCharArray(), 3, "FPKG*".toCharArray()));
562    // with start and stop codon2
563  1 assertTrue(AlignmentUtils.translatesAs(
564    "atgtttcccaaagggtag".toCharArray(), 3, "FPKG".toCharArray()));
565    // with start and stop codon3
566  1 assertTrue(AlignmentUtils.translatesAs(
567    "atgtttcccaaagggtga".toCharArray(), 3, "FPKG".toCharArray()));
568   
569    // with embedded stop codons
570  1 assertTrue(AlignmentUtils.translatesAs(
571    "atgtttTAGcccaaaTAAgggtga".toCharArray(), 3,
572    "F*PK*G".toCharArray()));
573   
574    // wrong protein
575  1 assertFalse(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(),
576    0, "FPMG".toCharArray()));
577   
578    // truncated dna
579  1 assertFalse(AlignmentUtils.translatesAs("tttcccaaagg".toCharArray(), 0,
580    "FPKG".toCharArray()));
581   
582    // truncated protein
583  1 assertFalse(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(),
584    0, "FPK".toCharArray()));
585   
586    // overlong dna (doesn't end in stop codon)
587  1 assertFalse(AlignmentUtils.translatesAs(
588    "tttcccaaagggttt".toCharArray(), 0, "FPKG".toCharArray()));
589   
590    // dna + stop codon + more
591  1 assertFalse(AlignmentUtils.translatesAs(
592    "tttcccaaagggttaga".toCharArray(), 0, "FPKG".toCharArray()));
593   
594    // overlong protein
595  1 assertFalse(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(),
596    0, "FPKGQ".toCharArray()));
597    }
598   
599    /**
600    * Test mapping of protein to cDNA, for cases where the cDNA has start and/or
601    * stop codons in addition to the protein coding sequence.
602    *
603    * @throws IOException
604    */
 
605  1 toggle @Test(groups = { "Functional" })
606    public void testMapProteinAlignmentToCdna_withStartAndStopCodons()
607    throws IOException
608    {
609  1 List<SequenceI> protseqs = new ArrayList<>();
610  1 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
611  1 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
612  1 protseqs.add(new Sequence("UNIPROT|V12347", "SAR"));
613  1 AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3]));
614  1 protein.setDataset(null);
615   
616  1 List<SequenceI> dnaseqs = new ArrayList<>();
617    // start + SAR:
618  1 dnaseqs.add(new Sequence("EMBL|A11111", "ATGTCAGCACGC"));
619    // = EIQ + stop
620  1 dnaseqs.add(new Sequence("EMBL|A22222", "GAGATACAATAA"));
621    // = start +EIQ + stop
622  1 dnaseqs.add(new Sequence("EMBL|A33333", "ATGGAAATCCAGTAG"));
623  1 dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG"));
624  1 AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[4]));
625  1 cdna.setDataset(null);
626   
627  1 assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));
628   
629    // 3 mappings made, each from 1 to 1 sequence
630  1 assertEquals(3, protein.getCodonFrames().size());
631  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
632  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
633  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size());
634   
635    // V12345 mapped from A22222
636  1 AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0))
637    .get(0);
638  1 assertEquals(1, acf.getdnaSeqs().length);
639  1 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
640    acf.getdnaSeqs()[0]);
641  1 Mapping[] protMappings = acf.getProtMappings();
642  1 assertEquals(1, protMappings.length);
643  1 MapList mapList = protMappings[0].getMap();
644  1 assertEquals(3, mapList.getFromRatio());
645  1 assertEquals(1, mapList.getToRatio());
646  1 assertTrue(Arrays.equals(new int[] { 1, 9 }, mapList.getFromRanges()
647    .get(0)));
648  1 assertEquals(1, mapList.getFromRanges().size());
649  1 assertTrue(Arrays.equals(new int[] { 1, 3 },
650    mapList.getToRanges().get(0)));
651  1 assertEquals(1, mapList.getToRanges().size());
652   
653    // V12346 mapped from A33333 starting position 4
654  1 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
655  1 assertEquals(1, acf.getdnaSeqs().length);
656  1 assertEquals(cdna.getSequenceAt(2).getDatasetSequence(),
657    acf.getdnaSeqs()[0]);
658  1 protMappings = acf.getProtMappings();
659  1 assertEquals(1, protMappings.length);
660  1 mapList = protMappings[0].getMap();
661  1 assertEquals(3, mapList.getFromRatio());
662  1 assertEquals(1, mapList.getToRatio());
663  1 assertTrue(Arrays.equals(new int[] { 4, 12 }, mapList.getFromRanges()
664    .get(0)));
665  1 assertEquals(1, mapList.getFromRanges().size());
666  1 assertTrue(Arrays.equals(new int[] { 1, 3 },
667    mapList.getToRanges().get(0)));
668  1 assertEquals(1, mapList.getToRanges().size());
669   
670    // V12347 mapped to A11111 starting position 4
671  1 acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0);
672  1 assertEquals(1, acf.getdnaSeqs().length);
673  1 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
674    acf.getdnaSeqs()[0]);
675  1 protMappings = acf.getProtMappings();
676  1 assertEquals(1, protMappings.length);
677  1 mapList = protMappings[0].getMap();
678  1 assertEquals(3, mapList.getFromRatio());
679  1 assertEquals(1, mapList.getToRatio());
680  1 assertTrue(Arrays.equals(new int[] { 4, 12 }, mapList.getFromRanges()
681    .get(0)));
682  1 assertEquals(1, mapList.getFromRanges().size());
683  1 assertTrue(Arrays.equals(new int[] { 1, 3 },
684    mapList.getToRanges().get(0)));
685  1 assertEquals(1, mapList.getToRanges().size());
686   
687    // no mapping involving the 'extra' A44444
688  1 assertTrue(protein.getCodonFrame(cdna.getSequenceAt(3)).isEmpty());
689    }
690   
691    /**
692    * Test mapping of protein to cDNA, for the case where we have some sequence
693    * cross-references. Verify that 1-to-many mappings are made where
694    * cross-references exist and sequences are mappable.
695    *
696    * @throws IOException
697    */
 
698  1 toggle @Test(groups = { "Functional" })
699    public void testMapProteinAlignmentToCdna_withXrefs() throws IOException
700    {
701  1 List<SequenceI> protseqs = new ArrayList<>();
702  1 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
703  1 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
704  1 protseqs.add(new Sequence("UNIPROT|V12347", "SAR"));
705  1 AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3]));
706  1 protein.setDataset(null);
707   
708  1 List<SequenceI> dnaseqs = new ArrayList<>();
709  1 dnaseqs.add(new Sequence("EMBL|A11111", "TCAGCACGC")); // = SAR
710  1 dnaseqs.add(new Sequence("EMBL|A22222", "ATGGAGATACAA")); // = start + EIQ
711  1 dnaseqs.add(new Sequence("EMBL|A33333", "GAAATCCAG")); // = EIQ
712  1 dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG")); // = EIQ
713  1 dnaseqs.add(new Sequence("EMBL|A55555", "GAGATTCAG")); // = EIQ
714  1 AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[5]));
715  1 cdna.setDataset(null);
716   
717    // Xref A22222 to V12345 (should get mapped)
718  1 dnaseqs.get(1).addDBRef(new DBRefEntry("UNIPROT", "1", "V12345"));
719    // Xref V12345 to A44444 (should get mapped)
720  1 protseqs.get(0).addDBRef(new DBRefEntry("EMBL", "1", "A44444"));
721    // Xref A33333 to V12347 (sequence mismatch - should not get mapped)
722  1 dnaseqs.get(2).addDBRef(new DBRefEntry("UNIPROT", "1", "V12347"));
723    // as V12345 is mapped to A22222 and A44444, this leaves V12346 unmapped.
724    // it should get paired up with the unmapped A33333
725    // A11111 should be mapped to V12347
726    // A55555 is spare and has no xref so is not mapped
727   
728  1 assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));
729   
730    // 4 protein mappings made for 3 proteins, 2 to V12345, 1 each to V12346/7
731  1 assertEquals(3, protein.getCodonFrames().size());
732  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
733  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
734  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size());
735   
736    // one mapping for each of the first 4 cDNA sequences
737  1 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(0)).size());
738  1 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(1)).size());
739  1 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(2)).size());
740  1 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(3)).size());
741   
742    // V12345 mapped to A22222 and A44444
743  1 AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0))
744    .get(0);
745  1 assertEquals(2, acf.getdnaSeqs().length);
746  1 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
747    acf.getdnaSeqs()[0]);
748  1 assertEquals(cdna.getSequenceAt(3).getDatasetSequence(),
749    acf.getdnaSeqs()[1]);
750   
751    // V12346 mapped to A33333
752  1 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
753  1 assertEquals(1, acf.getdnaSeqs().length);
754  1 assertEquals(cdna.getSequenceAt(2).getDatasetSequence(),
755    acf.getdnaSeqs()[0]);
756   
757    // V12347 mapped to A11111
758  1 acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0);
759  1 assertEquals(1, acf.getdnaSeqs().length);
760  1 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
761    acf.getdnaSeqs()[0]);
762   
763    // no mapping involving the 'extra' A55555
764  1 assertTrue(protein.getCodonFrame(cdna.getSequenceAt(4)).isEmpty());
765    }
766   
767    /**
768    * Test mapping of protein to cDNA, for the case where we have some sequence
769    * cross-references. Verify that once we have made an xref mapping we don't
770    * also map un-xrefd sequeces.
771    *
772    * @throws IOException
773    */
 
774  1 toggle @Test(groups = { "Functional" })
775    public void testMapProteinAlignmentToCdna_prioritiseXrefs()
776    throws IOException
777    {
778  1 List<SequenceI> protseqs = new ArrayList<>();
779  1 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
780  1 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
781  1 AlignmentI protein = new Alignment(
782    protseqs.toArray(new SequenceI[protseqs.size()]));
783  1 protein.setDataset(null);
784   
785  1 List<SequenceI> dnaseqs = new ArrayList<>();
786  1 dnaseqs.add(new Sequence("EMBL|A11111", "GAAATCCAG")); // = EIQ
787  1 dnaseqs.add(new Sequence("EMBL|A22222", "GAAATTCAG")); // = EIQ
788  1 AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[dnaseqs
789    .size()]));
790  1 cdna.setDataset(null);
791   
792    // Xref A22222 to V12345 (should get mapped)
793    // A11111 should then be mapped to the unmapped V12346
794  1 dnaseqs.get(1).addDBRef(new DBRefEntry("UNIPROT", "1", "V12345"));
795   
796  1 assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));
797   
798    // 2 protein mappings made
799  1 assertEquals(2, protein.getCodonFrames().size());
800  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
801  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
802   
803    // one mapping for each of the cDNA sequences
804  1 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(0)).size());
805  1 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(1)).size());
806   
807    // V12345 mapped to A22222
808  1 AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0))
809    .get(0);
810  1 assertEquals(1, acf.getdnaSeqs().length);
811  1 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
812    acf.getdnaSeqs()[0]);
813   
814    // V12346 mapped to A11111
815  1 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
816  1 assertEquals(1, acf.getdnaSeqs().length);
817  1 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
818    acf.getdnaSeqs()[0]);
819    }
820   
821    /**
822    * Test the method that shows or hides sequence annotations by type(s) and
823    * selection group.
824    */
 
825  1 toggle @Test(groups = { "Functional" })
826    public void testShowOrHideSequenceAnnotations()
827    {
828  1 SequenceI seq1 = new Sequence("Seq1", "AAA");
829  1 SequenceI seq2 = new Sequence("Seq2", "BBB");
830  1 SequenceI seq3 = new Sequence("Seq3", "CCC");
831  1 Annotation[] anns = new Annotation[] { new Annotation(2f) };
832  1 AlignmentAnnotation ann1 = new AlignmentAnnotation("Structure", "ann1",
833    anns);
834  1 ann1.setSequenceRef(seq1);
835  1 AlignmentAnnotation ann2 = new AlignmentAnnotation("Structure", "ann2",
836    anns);
837  1 ann2.setSequenceRef(seq2);
838  1 AlignmentAnnotation ann3 = new AlignmentAnnotation("Structure", "ann3",
839    anns);
840  1 AlignmentAnnotation ann4 = new AlignmentAnnotation("Temp", "ann4", anns);
841  1 ann4.setSequenceRef(seq1);
842  1 AlignmentAnnotation ann5 = new AlignmentAnnotation("Temp", "ann5", anns);
843  1 ann5.setSequenceRef(seq2);
844  1 AlignmentAnnotation ann6 = new AlignmentAnnotation("Temp", "ann6", anns);
845  1 AlignmentI al = new Alignment(new SequenceI[] { seq1, seq2, seq3 });
846  1 al.addAnnotation(ann1); // Structure for Seq1
847  1 al.addAnnotation(ann2); // Structure for Seq2
848  1 al.addAnnotation(ann3); // Structure for no sequence
849  1 al.addAnnotation(ann4); // Temp for seq1
850  1 al.addAnnotation(ann5); // Temp for seq2
851  1 al.addAnnotation(ann6); // Temp for no sequence
852  1 List<String> types = new ArrayList<>();
853  1 List<SequenceI> scope = new ArrayList<>();
854   
855    /*
856    * Set all sequence related Structure to hidden (ann1, ann2)
857    */
858  1 types.add("Structure");
859  1 AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, false,
860    false);
861  1 assertFalse(ann1.visible);
862  1 assertFalse(ann2.visible);
863  1 assertTrue(ann3.visible); // not sequence-related, not affected
864  1 assertTrue(ann4.visible); // not Structure, not affected
865  1 assertTrue(ann5.visible); // "
866  1 assertTrue(ann6.visible); // not sequence-related, not affected
867   
868    /*
869    * Set Temp in {seq1, seq3} to hidden
870    */
871  1 types.clear();
872  1 types.add("Temp");
873  1 scope.add(seq1);
874  1 scope.add(seq3);
875  1 AlignmentUtils.showOrHideSequenceAnnotations(al, types, scope, false,
876    false);
877  1 assertFalse(ann1.visible); // unchanged
878  1 assertFalse(ann2.visible); // unchanged
879  1 assertTrue(ann3.visible); // not sequence-related, not affected
880  1 assertFalse(ann4.visible); // Temp for seq1 hidden
881  1 assertTrue(ann5.visible); // not in scope, not affected
882  1 assertTrue(ann6.visible); // not sequence-related, not affected
883   
884    /*
885    * Set Temp in all sequences to hidden
886    */
887  1 types.clear();
888  1 types.add("Temp");
889  1 scope.add(seq1);
890  1 scope.add(seq3);
891  1 AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, false,
892    false);
893  1 assertFalse(ann1.visible); // unchanged
894  1 assertFalse(ann2.visible); // unchanged
895  1 assertTrue(ann3.visible); // not sequence-related, not affected
896  1 assertFalse(ann4.visible); // Temp for seq1 hidden
897  1 assertFalse(ann5.visible); // Temp for seq2 hidden
898  1 assertTrue(ann6.visible); // not sequence-related, not affected
899   
900    /*
901    * Set all types in {seq1, seq3} to visible
902    */
903  1 types.clear();
904  1 scope.clear();
905  1 scope.add(seq1);
906  1 scope.add(seq3);
907  1 AlignmentUtils.showOrHideSequenceAnnotations(al, types, scope, true,
908    true);
909  1 assertTrue(ann1.visible); // Structure for seq1 set visible
910  1 assertFalse(ann2.visible); // not in scope, unchanged
911  1 assertTrue(ann3.visible); // not sequence-related, not affected
912  1 assertTrue(ann4.visible); // Temp for seq1 set visible
913  1 assertFalse(ann5.visible); // not in scope, unchanged
914  1 assertTrue(ann6.visible); // not sequence-related, not affected
915   
916    /*
917    * Set all types in all scope to hidden
918    */
919  1 AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, true,
920    false);
921  1 assertFalse(ann1.visible);
922  1 assertFalse(ann2.visible);
923  1 assertTrue(ann3.visible); // not sequence-related, not affected
924  1 assertFalse(ann4.visible);
925  1 assertFalse(ann5.visible);
926  1 assertTrue(ann6.visible); // not sequence-related, not affected
927    }
928   
929    /**
930    * Tests for the method that checks if one sequence cross-references another
931    */
 
932  1 toggle @Test(groups = { "Functional" })
933    public void testHasCrossRef()
934    {
935  1 assertFalse(AlignmentUtils.hasCrossRef(null, null));
936  1 SequenceI seq1 = new Sequence("EMBL|A12345", "ABCDEF");
937  1 assertFalse(AlignmentUtils.hasCrossRef(seq1, null));
938  1 assertFalse(AlignmentUtils.hasCrossRef(null, seq1));
939  1 SequenceI seq2 = new Sequence("UNIPROT|V20192", "ABCDEF");
940  1 assertFalse(AlignmentUtils.hasCrossRef(seq1, seq2));
941   
942    // different ref
943  1 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "v20193"));
944  1 assertFalse(AlignmentUtils.hasCrossRef(seq1, seq2));
945   
946    // case-insensitive; version number is ignored
947  1 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "v20192"));
948  1 assertTrue(AlignmentUtils.hasCrossRef(seq1, seq2));
949   
950    // right case!
951  1 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "V20192"));
952  1 assertTrue(AlignmentUtils.hasCrossRef(seq1, seq2));
953    // test is one-way only
954  1 assertFalse(AlignmentUtils.hasCrossRef(seq2, seq1));
955    }
956   
957    /**
958    * Tests for the method that checks if either sequence cross-references the
959    * other
960    */
 
961  1 toggle @Test(groups = { "Functional" })
962    public void testHaveCrossRef()
963    {
964  1 assertFalse(AlignmentUtils.hasCrossRef(null, null));
965  1 SequenceI seq1 = new Sequence("EMBL|A12345", "ABCDEF");
966  1 assertFalse(AlignmentUtils.haveCrossRef(seq1, null));
967  1 assertFalse(AlignmentUtils.haveCrossRef(null, seq1));
968  1 SequenceI seq2 = new Sequence("UNIPROT|V20192", "ABCDEF");
969  1 assertFalse(AlignmentUtils.haveCrossRef(seq1, seq2));
970   
971  1 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "V20192"));
972  1 assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2));
973    // next is true for haveCrossRef, false for hasCrossRef
974  1 assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1));
975   
976    // now the other way round
977  1 seq1.setDBRefs(null);
978  1 seq2.addDBRef(new DBRefEntry("EMBL", "1", "A12345"));
979  1 assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2));
980  1 assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1));
981   
982    // now both ways
983  1 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "V20192"));
984  1 assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2));
985  1 assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1));
986    }
987   
988    /**
989    * Test the method that extracts the cds-only part of a dna alignment.
990    */
 
991  1 toggle @Test(groups = { "Functional" })
992    public void testMakeCdsAlignment()
993    {
994    /*
995    * scenario:
996    * dna1 --> [4, 6] [10,12] --> pep1
997    * dna2 --> [1, 3] [7, 9] [13,15] --> pep2
998    */
999  1 SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
1000  1 SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC");
1001  1 SequenceI pep1 = new Sequence("pep1", "GF");
1002  1 SequenceI pep2 = new Sequence("pep2", "GFP");
1003  1 pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "pep1"));
1004  1 pep2.addDBRef(new DBRefEntry("UNIPROT", "0", "pep2"));
1005  1 dna1.createDatasetSequence();
1006  1 dna2.createDatasetSequence();
1007  1 pep1.createDatasetSequence();
1008  1 pep2.createDatasetSequence();
1009  1 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 });
1010  1 dna.setDataset(null);
1011   
1012    /*
1013    * put a variant feature on dna2 base 8
1014    * - should transfer to cds2 base 5
1015    */
1016  1 dna2.addSequenceFeature(new SequenceFeature("variant", "hgmd", 8, 8,
1017    0f, null));
1018   
1019    /*
1020    * need a sourceDbRef if we are to construct dbrefs to the CDS
1021    * sequence from the dna contig sequences
1022    */
1023  1 DBRefEntry dbref = new DBRefEntry("ENSEMBL", "0", "dna1");
1024  1 dna1.getDatasetSequence().addDBRef(dbref);
1025  1 org.testng.Assert.assertEquals(dbref, dna1.getPrimaryDBRefs().get(0));
1026  1 dbref = new DBRefEntry("ENSEMBL", "0", "dna2");
1027  1 dna2.getDatasetSequence().addDBRef(dbref);
1028  1 org.testng.Assert.assertEquals(dbref, dna2.getPrimaryDBRefs().get(0));
1029   
1030    /*
1031    * CDS sequences are 'discovered' from dna-to-protein mappings on the alignment
1032    * dataset (e.g. added from dbrefs by CrossRef.findXrefSequences)
1033    */
1034  1 MapList mapfordna1 = new MapList(new int[] { 4, 6, 10, 12 }, new int[] {
1035    1, 2 }, 3, 1);
1036  1 AlignedCodonFrame acf = new AlignedCodonFrame();
1037  1 acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(),
1038    mapfordna1);
1039  1 dna.addCodonFrame(acf);
1040  1 MapList mapfordna2 = new MapList(new int[] { 1, 3, 7, 9, 13, 15 },
1041    new int[] { 1, 3 }, 3, 1);
1042  1 acf = new AlignedCodonFrame();
1043  1 acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(),
1044    mapfordna2);
1045  1 dna.addCodonFrame(acf);
1046   
1047    /*
1048    * In this case, mappings originally came from matching Uniprot accessions
1049    * - so need an xref on dna involving those regions.
1050    * These are normally constructed from CDS annotation
1051    */
1052  1 DBRefEntry dna1xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep1",
1053    new Mapping(mapfordna1));
1054  1 dna1.addDBRef(dna1xref);
1055  1 assertEquals(2, dna1.getDBRefs().length); // to self and to pep1
1056  1 DBRefEntry dna2xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep2",
1057    new Mapping(mapfordna2));
1058  1 dna2.addDBRef(dna2xref);
1059  1 assertEquals(2, dna2.getDBRefs().length); // to self and to pep2
1060   
1061    /*
1062    * execute method under test:
1063    */
1064  1 AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
1065    dna1, dna2 }, dna.getDataset(), null);
1066   
1067    /*
1068    * verify cds sequences
1069    */
1070  1 assertEquals(2, cds.getSequences().size());
1071  1 assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString());
1072  1 assertEquals("GGGTTTCCC", cds.getSequenceAt(1).getSequenceAsString());
1073   
1074    /*
1075    * verify shared, extended alignment dataset
1076    */
1077  1 assertSame(dna.getDataset(), cds.getDataset());
1078  1 SequenceI cds1Dss = cds.getSequenceAt(0).getDatasetSequence();
1079  1 SequenceI cds2Dss = cds.getSequenceAt(1).getDatasetSequence();
1080  1 assertTrue(dna.getDataset().getSequences().contains(cds1Dss));
1081  1 assertTrue(dna.getDataset().getSequences().contains(cds2Dss));
1082   
1083    /*
1084    * verify CDS has a dbref with mapping to peptide
1085    */
1086  1 assertNotNull(cds1Dss.getDBRefs());
1087  1 assertEquals(2, cds1Dss.getDBRefs().length);
1088  1 dbref = cds1Dss.getDBRefs()[0];
1089  1 assertEquals(dna1xref.getSource(), dbref.getSource());
1090    // version is via ensembl's primary ref
1091  1 assertEquals(dna1xref.getVersion(), dbref.getVersion());
1092  1 assertEquals(dna1xref.getAccessionId(), dbref.getAccessionId());
1093  1 assertNotNull(dbref.getMap());
1094  1 assertSame(pep1.getDatasetSequence(), dbref.getMap().getTo());
1095  1 MapList cdsMapping = new MapList(new int[] { 1, 6 },
1096    new int[] { 1, 2 }, 3, 1);
1097  1 assertEquals(cdsMapping, dbref.getMap().getMap());
1098   
1099    /*
1100    * verify peptide has added a dbref with reverse mapping to CDS
1101    */
1102  1 assertNotNull(pep1.getDBRefs());
1103    // FIXME pep1.getDBRefs() is 1 - is that the correct behaviour ?
1104  1 assertEquals(2, pep1.getDBRefs().length);
1105  1 dbref = pep1.getDBRefs()[1];
1106  1 assertEquals("ENSEMBL", dbref.getSource());
1107  1 assertEquals("0", dbref.getVersion());
1108  1 assertEquals("CDS|dna1", dbref.getAccessionId());
1109  1 assertNotNull(dbref.getMap());
1110  1 assertSame(cds1Dss, dbref.getMap().getTo());
1111  1 assertEquals(cdsMapping.getInverse(), dbref.getMap().getMap());
1112   
1113    /*
1114    * verify cDNA has added a dbref with mapping to CDS
1115    */
1116  1 assertEquals(3, dna1.getDBRefs().length);
1117  1 DBRefEntry dbRefEntry = dna1.getDBRefs()[2];
1118  1 assertSame(cds1Dss, dbRefEntry.getMap().getTo());
1119  1 MapList dnaToCdsMapping = new MapList(new int[] { 4, 6, 10, 12 },
1120    new int[] { 1, 6 }, 1, 1);
1121  1 assertEquals(dnaToCdsMapping, dbRefEntry.getMap().getMap());
1122  1 assertEquals(3, dna2.getDBRefs().length);
1123  1 dbRefEntry = dna2.getDBRefs()[2];
1124  1 assertSame(cds2Dss, dbRefEntry.getMap().getTo());
1125  1 dnaToCdsMapping = new MapList(new int[] { 1, 3, 7, 9, 13, 15 },
1126    new int[] { 1, 9 }, 1, 1);
1127  1 assertEquals(dnaToCdsMapping, dbRefEntry.getMap().getMap());
1128   
1129    /*
1130    * verify CDS has added a dbref with mapping to cDNA
1131    */
1132  1 assertEquals(2, cds1Dss.getDBRefs().length);
1133  1 dbRefEntry = cds1Dss.getDBRefs()[1];
1134  1 assertSame(dna1.getDatasetSequence(), dbRefEntry.getMap().getTo());
1135  1 MapList cdsToDnaMapping = new MapList(new int[] { 1, 6 }, new int[] {
1136    4, 6, 10, 12 }, 1, 1);
1137  1 assertEquals(cdsToDnaMapping, dbRefEntry.getMap().getMap());
1138  1 assertEquals(2, cds2Dss.getDBRefs().length);
1139  1 dbRefEntry = cds2Dss.getDBRefs()[1];
1140  1 assertSame(dna2.getDatasetSequence(), dbRefEntry.getMap().getTo());
1141  1 cdsToDnaMapping = new MapList(new int[] { 1, 9 }, new int[] { 1, 3, 7,
1142    9, 13, 15 }, 1, 1);
1143  1 assertEquals(cdsToDnaMapping, dbRefEntry.getMap().getMap());
1144   
1145    /*
1146    * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide
1147    * the mappings are on the shared alignment dataset
1148    * 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep)
1149    */
1150  1 List<AlignedCodonFrame> cdsMappings = cds.getDataset().getCodonFrames();
1151  1 assertEquals(6, cdsMappings.size());
1152   
1153    /*
1154    * verify that mapping sets for dna and cds alignments are different
1155    * [not current behaviour - all mappings are on the alignment dataset]
1156    */
1157    // select -> subselect type to test.
1158    // Assert.assertNotSame(dna.getCodonFrames(), cds.getCodonFrames());
1159    // assertEquals(4, dna.getCodonFrames().size());
1160    // assertEquals(4, cds.getCodonFrames().size());
1161   
1162    /*
1163    * Two mappings involve pep1 (dna to pep1, cds to pep1)
1164    * Mapping from pep1 to GGGTTT in first new exon sequence
1165    */
1166  1 List<AlignedCodonFrame> pep1Mappings = MappingUtils
1167    .findMappingsForSequence(pep1, cdsMappings);
1168  1 assertEquals(2, pep1Mappings.size());
1169  1 List<AlignedCodonFrame> mappings = MappingUtils
1170    .findMappingsForSequence(cds.getSequenceAt(0), pep1Mappings);
1171  1 assertEquals(1, mappings.size());
1172   
1173    // map G to GGG
1174  1 SearchResultsI sr = MappingUtils.buildSearchResults(pep1, 1, mappings);
1175  1 assertEquals(1, sr.getResults().size());
1176  1 SearchResultMatchI m = sr.getResults().get(0);
1177  1 assertSame(cds1Dss, m.getSequence());
1178  1 assertEquals(1, m.getStart());
1179  1 assertEquals(3, m.getEnd());
1180    // map F to TTT
1181  1 sr = MappingUtils.buildSearchResults(pep1, 2, mappings);
1182  1 m = sr.getResults().get(0);
1183  1 assertSame(cds1Dss, m.getSequence());
1184  1 assertEquals(4, m.getStart());
1185  1 assertEquals(6, m.getEnd());
1186   
1187    /*
1188    * Two mappings involve pep2 (dna to pep2, cds to pep2)
1189    * Verify mapping from pep2 to GGGTTTCCC in second new exon sequence
1190    */
1191  1 List<AlignedCodonFrame> pep2Mappings = MappingUtils
1192    .findMappingsForSequence(pep2, cdsMappings);
1193  1 assertEquals(2, pep2Mappings.size());
1194  1 mappings = MappingUtils.findMappingsForSequence(cds.getSequenceAt(1),
1195    pep2Mappings);
1196  1 assertEquals(1, mappings.size());
1197    // map G to GGG
1198  1 sr = MappingUtils.buildSearchResults(pep2, 1, mappings);
1199  1 assertEquals(1, sr.getResults().size());
1200  1 m = sr.getResults().get(0);
1201  1 assertSame(cds2Dss, m.getSequence());
1202  1 assertEquals(1, m.getStart());
1203  1 assertEquals(3, m.getEnd());
1204    // map F to TTT
1205  1 sr = MappingUtils.buildSearchResults(pep2, 2, mappings);
1206  1 m = sr.getResults().get(0);
1207  1 assertSame(cds2Dss, m.getSequence());
1208  1 assertEquals(4, m.getStart());
1209  1 assertEquals(6, m.getEnd());
1210    // map P to CCC
1211  1 sr = MappingUtils.buildSearchResults(pep2, 3, mappings);
1212  1 m = sr.getResults().get(0);
1213  1 assertSame(cds2Dss, m.getSequence());
1214  1 assertEquals(7, m.getStart());
1215  1 assertEquals(9, m.getEnd());
1216   
1217    /*
1218    * check cds2 acquired a variant feature in position 5
1219    */
1220  1 List<SequenceFeature> sfs = cds2Dss.getSequenceFeatures();
1221  1 assertNotNull(sfs);
1222  1 assertEquals(1, sfs.size());
1223  1 assertEquals("variant", sfs.get(0).type);
1224  1 assertEquals(5, sfs.get(0).begin);
1225  1 assertEquals(5, sfs.get(0).end);
1226    }
1227   
1228    /**
1229    * Test the method that makes a cds-only alignment from a DNA sequence and its
1230    * product mappings, for the case where there are multiple exon mappings to
1231    * different protein products.
1232    */
 
1233  1 toggle @Test(groups = { "Functional" })
1234    public void testMakeCdsAlignment_multipleProteins()
1235    {
1236  1 SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
1237  1 SequenceI pep1 = new Sequence("pep1", "GF"); // GGGTTT
1238  1 SequenceI pep2 = new Sequence("pep2", "KP"); // aaaccc
1239  1 SequenceI pep3 = new Sequence("pep3", "KF"); // aaaTTT
1240  1 dna1.createDatasetSequence();
1241  1 pep1.createDatasetSequence();
1242  1 pep2.createDatasetSequence();
1243  1 pep3.createDatasetSequence();
1244  1 pep1.getDatasetSequence().addDBRef(
1245    new DBRefEntry("EMBLCDS", "2", "A12345"));
1246  1 pep2.getDatasetSequence().addDBRef(
1247    new DBRefEntry("EMBLCDS", "3", "A12346"));
1248  1 pep3.getDatasetSequence().addDBRef(
1249    new DBRefEntry("EMBLCDS", "4", "A12347"));
1250   
1251    /*
1252    * Create the CDS alignment
1253    */
1254  1 AlignmentI dna = new Alignment(new SequenceI[] { dna1 });
1255  1 dna.setDataset(null);
1256   
1257    /*
1258    * Make the mappings from dna to protein
1259    */
1260    // map ...GGG...TTT to GF
1261  1 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
1262    new int[] { 1, 2 }, 3, 1);
1263  1 AlignedCodonFrame acf = new AlignedCodonFrame();
1264  1 acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
1265  1 dna.addCodonFrame(acf);
1266   
1267    // map aaa...ccc to KP
1268  1 map = new MapList(new int[] { 1, 3, 7, 9 }, new int[] { 1, 2 }, 3, 1);
1269  1 acf = new AlignedCodonFrame();
1270  1 acf.addMap(dna1.getDatasetSequence(), pep2.getDatasetSequence(), map);
1271  1 dna.addCodonFrame(acf);
1272   
1273    // map aaa......TTT to KF
1274  1 map = new MapList(new int[] { 1, 3, 10, 12 }, new int[] { 1, 2 }, 3, 1);
1275  1 acf = new AlignedCodonFrame();
1276  1 acf.addMap(dna1.getDatasetSequence(), pep3.getDatasetSequence(), map);
1277  1 dna.addCodonFrame(acf);
1278   
1279    /*
1280    * execute method under test
1281    */
1282  1 AlignmentI cdsal = AlignmentUtils.makeCdsAlignment(
1283    new SequenceI[] { dna1 }, dna.getDataset(), null);
1284   
1285    /*
1286    * Verify we have 3 cds sequences, mapped to pep1/2/3 respectively
1287    */
1288  1 List<SequenceI> cds = cdsal.getSequences();
1289  1 assertEquals(3, cds.size());
1290   
1291    /*
1292    * verify shared, extended alignment dataset
1293    */
1294  1 assertSame(cdsal.getDataset(), dna.getDataset());
1295  1 assertTrue(dna.getDataset().getSequences()
1296    .contains(cds.get(0).getDatasetSequence()));
1297  1 assertTrue(dna.getDataset().getSequences()
1298    .contains(cds.get(1).getDatasetSequence()));
1299  1 assertTrue(dna.getDataset().getSequences()
1300    .contains(cds.get(2).getDatasetSequence()));
1301   
1302    /*
1303    * verify aligned cds sequences and their xrefs
1304    */
1305  1 SequenceI cdsSeq = cds.get(0);
1306  1 assertEquals("GGGTTT", cdsSeq.getSequenceAsString());
1307    // assertEquals("dna1|A12345", cdsSeq.getName());
1308  1 assertEquals("CDS|dna1", cdsSeq.getName());
1309    // assertEquals(1, cdsSeq.getDBRefs().length);
1310    // DBRefEntry cdsRef = cdsSeq.getDBRefs()[0];
1311    // assertEquals("EMBLCDS", cdsRef.getSource());
1312    // assertEquals("2", cdsRef.getVersion());
1313    // assertEquals("A12345", cdsRef.getAccessionId());
1314   
1315  1 cdsSeq = cds.get(1);
1316  1 assertEquals("aaaccc", cdsSeq.getSequenceAsString());
1317    // assertEquals("dna1|A12346", cdsSeq.getName());
1318  1 assertEquals("CDS|dna1", cdsSeq.getName());
1319    // assertEquals(1, cdsSeq.getDBRefs().length);
1320    // cdsRef = cdsSeq.getDBRefs()[0];
1321    // assertEquals("EMBLCDS", cdsRef.getSource());
1322    // assertEquals("3", cdsRef.getVersion());
1323    // assertEquals("A12346", cdsRef.getAccessionId());
1324   
1325  1 cdsSeq = cds.get(2);
1326  1 assertEquals("aaaTTT", cdsSeq.getSequenceAsString());
1327    // assertEquals("dna1|A12347", cdsSeq.getName());
1328  1 assertEquals("CDS|dna1", cdsSeq.getName());
1329    // assertEquals(1, cdsSeq.getDBRefs().length);
1330    // cdsRef = cdsSeq.getDBRefs()[0];
1331    // assertEquals("EMBLCDS", cdsRef.getSource());
1332    // assertEquals("4", cdsRef.getVersion());
1333    // assertEquals("A12347", cdsRef.getAccessionId());
1334   
1335    /*
1336    * Verify there are mappings from each cds sequence to its protein product
1337    * and also to its dna source
1338    */
1339  1 List<AlignedCodonFrame> newMappings = cdsal.getCodonFrames();
1340   
1341    /*
1342    * 6 mappings involve dna1 (to pep1/2/3, cds1/2/3)
1343    */
1344  1 List<AlignedCodonFrame> dnaMappings = MappingUtils
1345    .findMappingsForSequence(dna1, newMappings);
1346  1 assertEquals(6, dnaMappings.size());
1347   
1348    /*
1349    * dna1 to pep1
1350    */
1351  1 List<AlignedCodonFrame> mappings = MappingUtils
1352    .findMappingsForSequence(pep1, dnaMappings);
1353  1 assertEquals(1, mappings.size());
1354  1 assertEquals(1, mappings.get(0).getMappings().size());
1355  1 assertSame(pep1.getDatasetSequence(), mappings.get(0).getMappings()
1356    .get(0).getMapping().getTo());
1357   
1358    /*
1359    * dna1 to cds1
1360    */
1361  1 List<AlignedCodonFrame> dnaToCds1Mappings = MappingUtils
1362    .findMappingsForSequence(cds.get(0), dnaMappings);
1363  1 Mapping mapping = dnaToCds1Mappings.get(0).getMappings().get(0)
1364    .getMapping();
1365  1 assertSame(cds.get(0).getDatasetSequence(), mapping.getTo());
1366  1 assertEquals("G(1) in CDS should map to G(4) in DNA", 4, mapping
1367    .getMap().getToPosition(1));
1368   
1369    /*
1370    * dna1 to pep2
1371    */
1372  1 mappings = MappingUtils.findMappingsForSequence(pep2, dnaMappings);
1373  1 assertEquals(1, mappings.size());
1374  1 assertEquals(1, mappings.get(0).getMappings().size());
1375  1 assertSame(pep2.getDatasetSequence(), mappings.get(0).getMappings()
1376    .get(0).getMapping().getTo());
1377   
1378    /*
1379    * dna1 to cds2
1380    */
1381  1 List<AlignedCodonFrame> dnaToCds2Mappings = MappingUtils
1382    .findMappingsForSequence(cds.get(1), dnaMappings);
1383  1 mapping = dnaToCds2Mappings.get(0).getMappings().get(0).getMapping();
1384  1 assertSame(cds.get(1).getDatasetSequence(), mapping.getTo());
1385  1 assertEquals("c(4) in CDS should map to c(7) in DNA", 7, mapping
1386    .getMap().getToPosition(4));
1387   
1388    /*
1389    * dna1 to pep3
1390    */
1391  1 mappings = MappingUtils.findMappingsForSequence(pep3, dnaMappings);
1392  1 assertEquals(1, mappings.size());
1393  1 assertEquals(1, mappings.get(0).getMappings().size());
1394  1 assertSame(pep3.getDatasetSequence(), mappings.get(0).getMappings()
1395    .get(0).getMapping().getTo());
1396   
1397    /*
1398    * dna1 to cds3
1399    */
1400  1 List<AlignedCodonFrame> dnaToCds3Mappings = MappingUtils
1401    .findMappingsForSequence(cds.get(2), dnaMappings);
1402  1 mapping = dnaToCds3Mappings.get(0).getMappings().get(0).getMapping();
1403  1 assertSame(cds.get(2).getDatasetSequence(), mapping.getTo());
1404  1 assertEquals("T(4) in CDS should map to T(10) in DNA", 10, mapping
1405    .getMap().getToPosition(4));
1406    }
1407   
 
1408  1 toggle @Test(groups = { "Functional" })
1409    public void testIsMappable()
1410    {
1411  1 SequenceI dna1 = new Sequence("dna1", "cgCAGtgGT");
1412  1 SequenceI aa1 = new Sequence("aa1", "RSG");
1413  1 AlignmentI al1 = new Alignment(new SequenceI[] { dna1 });
1414  1 AlignmentI al2 = new Alignment(new SequenceI[] { aa1 });
1415   
1416  1 assertFalse(AlignmentUtils.isMappable(null, null));
1417  1 assertFalse(AlignmentUtils.isMappable(al1, null));
1418  1 assertFalse(AlignmentUtils.isMappable(null, al1));
1419  1 assertFalse(AlignmentUtils.isMappable(al1, al1));
1420  1 assertFalse(AlignmentUtils.isMappable(al2, al2));
1421   
1422  1 assertTrue(AlignmentUtils.isMappable(al1, al2));
1423  1 assertTrue(AlignmentUtils.isMappable(al2, al1));
1424    }
1425   
1426    /**
1427    * Test creating a mapping when the sequences involved do not start at residue
1428    * 1
1429    *
1430    * @throws IOException
1431    */
 
1432  1 toggle @Test(groups = { "Functional" })
1433    public void testMapCdnaToProtein_forSubsequence() throws IOException
1434    {
1435  1 SequenceI prot = new Sequence("UNIPROT|V12345", "E-I--Q", 10, 12);
1436  1 prot.createDatasetSequence();
1437   
1438  1 SequenceI dna = new Sequence("EMBL|A33333", "GAA--AT-C-CAG", 40, 48);
1439  1 dna.createDatasetSequence();
1440   
1441  1 MapList map = AlignmentUtils.mapCdnaToProtein(prot, dna);
1442  1 assertEquals(10, map.getToLowest());
1443  1 assertEquals(12, map.getToHighest());
1444  1 assertEquals(40, map.getFromLowest());
1445  1 assertEquals(48, map.getFromHighest());
1446    }
1447   
1448    /**
1449    * Test for the alignSequenceAs method where we have protein mapped to protein
1450    */
 
1451  1 toggle @Test(groups = { "Functional" })
1452    public void testAlignSequenceAs_mappedProteinProtein()
1453    {
1454   
1455  1 SequenceI alignMe = new Sequence("Match", "MGAASEV");
1456  1 alignMe.createDatasetSequence();
1457  1 SequenceI alignFrom = new Sequence("Query", "LQTGYMGAASEVMFSPTRR");
1458  1 alignFrom.createDatasetSequence();
1459   
1460  1 AlignedCodonFrame acf = new AlignedCodonFrame();
1461    // this is like a domain or motif match of part of a peptide sequence
1462  1 MapList map = new MapList(new int[] { 6, 12 }, new int[] { 1, 7 }, 1, 1);
1463  1 acf.addMap(alignFrom.getDatasetSequence(),
1464    alignMe.getDatasetSequence(), map);
1465   
1466  1 AlignmentUtils.alignSequenceAs(alignMe, alignFrom, acf, "-", '-', true,
1467    true);
1468  1 assertEquals("-----MGAASEV-------", alignMe.getSequenceAsString());
1469    }
1470   
1471    /**
1472    * Test for the alignSequenceAs method where there are trailing unmapped
1473    * residues in the model sequence
1474    */
 
1475  1 toggle @Test(groups = { "Functional" })
1476    public void testAlignSequenceAs_withTrailingPeptide()
1477    {
1478    // map first 3 codons to KPF; G is a trailing unmapped residue
1479  1 MapList map = new MapList(new int[] { 1, 9 }, new int[] { 1, 3 }, 3, 1);
1480   
1481  1 checkAlignSequenceAs("AAACCCTTT", "K-PFG", true, true, map,
1482    "AAA---CCCTTT---");
1483    }
1484   
1485    /**
1486    * Tests for transferring features between mapped sequences
1487    */
 
1488  1 toggle @Test(groups = { "Functional" })
1489    public void testTransferFeatures()
1490    {
1491  1 SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt");
1492  1 SequenceI cds = new Sequence("cds/10-15", "TAGGCC");
1493   
1494    // no overlap
1495  1 dna.addSequenceFeature(new SequenceFeature("type1", "desc1", 1, 2, 1f,
1496    null));
1497    // partial overlap - to [1, 1]
1498  1 dna.addSequenceFeature(new SequenceFeature("type2", "desc2", 3, 4, 2f,
1499    null));
1500    // exact overlap - to [1, 3]
1501  1 dna.addSequenceFeature(new SequenceFeature("type3", "desc3", 4, 6, 3f,
1502    null));
1503    // spanning overlap - to [2, 5]
1504  1 dna.addSequenceFeature(new SequenceFeature("type4", "desc4", 5, 11, 4f,
1505    null));
1506    // exactly overlaps whole mapped range [1, 6]
1507  1 dna.addSequenceFeature(new SequenceFeature("type5", "desc5", 4, 12, 5f,
1508    null));
1509    // no overlap (internal)
1510  1 dna.addSequenceFeature(new SequenceFeature("type6", "desc6", 7, 9, 6f,
1511    null));
1512    // no overlap (3' end)
1513  1 dna.addSequenceFeature(new SequenceFeature("type7", "desc7", 13, 15,
1514    7f, null));
1515    // overlap (3' end) - to [6, 6]
1516  1 dna.addSequenceFeature(new SequenceFeature("type8", "desc8", 12, 12,
1517    8f, null));
1518    // extended overlap - to [6, +]
1519  1 dna.addSequenceFeature(new SequenceFeature("type9", "desc9", 12, 13,
1520    9f, null));
1521   
1522  1 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
1523    new int[] { 1, 6 }, 1, 1);
1524   
1525    /*
1526    * transferFeatures() will build 'partial overlap' for regions
1527    * that partially overlap 5' or 3' (start or end) of target sequence
1528    */
1529  1 AlignmentUtils.transferFeatures(dna, cds, map, null);
1530  1 List<SequenceFeature> sfs = cds.getSequenceFeatures();
1531  1 assertEquals(6, sfs.size());
1532   
1533  1 SequenceFeature sf = sfs.get(0);
1534  1 assertEquals("type2", sf.getType());
1535  1 assertEquals("desc2", sf.getDescription());
1536  1 assertEquals(2f, sf.getScore());
1537  1 assertEquals(1, sf.getBegin());
1538  1 assertEquals(1, sf.getEnd());
1539   
1540  1 sf = sfs.get(1);
1541  1 assertEquals("type3", sf.getType());
1542  1 assertEquals("desc3", sf.getDescription());
1543  1 assertEquals(3f, sf.getScore());
1544  1 assertEquals(1, sf.getBegin());
1545  1 assertEquals(3, sf.getEnd());
1546   
1547  1 sf = sfs.get(2);
1548  1 assertEquals("type4", sf.getType());
1549  1 assertEquals(2, sf.getBegin());
1550  1 assertEquals(5, sf.getEnd());
1551   
1552  1 sf = sfs.get(3);
1553  1 assertEquals("type5", sf.getType());
1554  1 assertEquals(1, sf.getBegin());
1555  1 assertEquals(6, sf.getEnd());
1556   
1557  1 sf = sfs.get(4);
1558  1 assertEquals("type8", sf.getType());
1559  1 assertEquals(6, sf.getBegin());
1560  1 assertEquals(6, sf.getEnd());
1561   
1562  1 sf = sfs.get(5);
1563  1 assertEquals("type9", sf.getType());
1564  1 assertEquals(6, sf.getBegin());
1565  1 assertEquals(6, sf.getEnd());
1566    }
1567   
1568    /**
1569    * Tests for transferring features between mapped sequences
1570    */
 
1571  1 toggle @Test(groups = { "Functional" })
1572    public void testTransferFeatures_withOmit()
1573    {
1574  1 SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt");
1575  1 SequenceI cds = new Sequence("cds/10-15", "TAGGCC");
1576   
1577  1 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
1578    new int[] { 1, 6 }, 1, 1);
1579   
1580    // [5, 11] maps to [2, 5]
1581  1 dna.addSequenceFeature(new SequenceFeature("type4", "desc4", 5, 11, 4f,
1582    null));
1583    // [4, 12] maps to [1, 6]
1584  1 dna.addSequenceFeature(new SequenceFeature("type5", "desc5", 4, 12, 5f,
1585    null));
1586    // [12, 12] maps to [6, 6]
1587  1 dna.addSequenceFeature(new SequenceFeature("type8", "desc8", 12, 12,
1588    8f, null));
1589   
1590    // desc4 and desc8 are the 'omit these' varargs
1591  1 AlignmentUtils.transferFeatures(dna, cds, map, null, "type4", "type8");
1592  1 List<SequenceFeature> sfs = cds.getSequenceFeatures();
1593  1 assertEquals(1, sfs.size());
1594   
1595  1 SequenceFeature sf = sfs.get(0);
1596  1 assertEquals("type5", sf.getType());
1597  1 assertEquals(1, sf.getBegin());
1598  1 assertEquals(6, sf.getEnd());
1599    }
1600   
1601    /**
1602    * Tests for transferring features between mapped sequences
1603    */
 
1604  1 toggle @Test(groups = { "Functional" })
1605    public void testTransferFeatures_withSelect()
1606    {
1607  1 SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt");
1608  1 SequenceI cds = new Sequence("cds/10-15", "TAGGCC");
1609   
1610  1 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
1611    new int[] { 1, 6 }, 1, 1);
1612   
1613    // [5, 11] maps to [2, 5]
1614  1 dna.addSequenceFeature(new SequenceFeature("type4", "desc4", 5, 11, 4f,
1615    null));
1616    // [4, 12] maps to [1, 6]
1617  1 dna.addSequenceFeature(new SequenceFeature("type5", "desc5", 4, 12, 5f,
1618    null));
1619    // [12, 12] maps to [6, 6]
1620  1 dna.addSequenceFeature(new SequenceFeature("type8", "desc8", 12, 12,
1621    8f, null));
1622   
1623    // "type5" is the 'select this type' argument
1624  1 AlignmentUtils.transferFeatures(dna, cds, map, "type5");
1625  1 List<SequenceFeature> sfs = cds.getSequenceFeatures();
1626  1 assertEquals(1, sfs.size());
1627   
1628  1 SequenceFeature sf = sfs.get(0);
1629  1 assertEquals("type5", sf.getType());
1630  1 assertEquals(1, sf.getBegin());
1631  1 assertEquals(6, sf.getEnd());
1632    }
1633   
1634    /**
1635    * Test the method that extracts the cds-only part of a dna alignment, for the
1636    * case where the cds should be aligned to match its nucleotide sequence.
1637    */
 
1638  1 toggle @Test(groups = { "Functional" })
1639    public void testMakeCdsAlignment_alternativeTranscripts()
1640    {
1641  1 SequenceI dna1 = new Sequence("dna1", "aaaGGGCC-----CTTTaaaGGG");
1642    // alternative transcript of same dna skips CCC codon
1643  1 SequenceI dna2 = new Sequence("dna2", "aaaGGGCC-----cttTaaaGGG");
1644    // dna3 has no mapping (protein product) so should be ignored here
1645  1 SequenceI dna3 = new Sequence("dna3", "aaaGGGCCCCCGGGcttTaaaGGG");
1646  1 SequenceI pep1 = new Sequence("pep1", "GPFG");
1647  1 SequenceI pep2 = new Sequence("pep2", "GPG");
1648  1 dna1.createDatasetSequence();
1649  1 dna2.createDatasetSequence();
1650  1 dna3.createDatasetSequence();
1651  1 pep1.createDatasetSequence();
1652  1 pep2.createDatasetSequence();
1653   
1654  1 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 });
1655  1 dna.setDataset(null);
1656   
1657  1 MapList map = new MapList(new int[] { 4, 12, 16, 18 },
1658    new int[] { 1, 4 }, 3, 1);
1659  1 AlignedCodonFrame acf = new AlignedCodonFrame();
1660  1 acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
1661  1 dna.addCodonFrame(acf);
1662  1 map = new MapList(new int[] { 4, 8, 12, 12, 16, 18 },
1663    new int[] { 1, 3 }, 3, 1);
1664  1 acf = new AlignedCodonFrame();
1665  1 acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
1666  1 dna.addCodonFrame(acf);
1667   
1668  1 AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
1669    dna1, dna2, dna3 }, dna.getDataset(), null);
1670  1 List<SequenceI> cdsSeqs = cds.getSequences();
1671  1 assertEquals(2, cdsSeqs.size());
1672  1 assertEquals("GGGCCCTTTGGG", cdsSeqs.get(0).getSequenceAsString());
1673  1 assertEquals("GGGCCTGGG", cdsSeqs.get(1).getSequenceAsString());
1674   
1675    /*
1676    * verify shared, extended alignment dataset
1677    */
1678  1 assertSame(dna.getDataset(), cds.getDataset());
1679  1 assertTrue(dna.getDataset().getSequences()
1680    .contains(cdsSeqs.get(0).getDatasetSequence()));
1681  1 assertTrue(dna.getDataset().getSequences()
1682    .contains(cdsSeqs.get(1).getDatasetSequence()));
1683   
1684    /*
1685    * Verify 6 mappings: dna1 to cds1, cds1 to pep1, dna1 to pep1
1686    * and the same for dna2/cds2/pep2
1687    */
1688  1 List<AlignedCodonFrame> mappings = cds.getCodonFrames();
1689  1 assertEquals(6, mappings.size());
1690   
1691    /*
1692    * 2 mappings involve pep1
1693    */
1694  1 List<AlignedCodonFrame> pep1Mappings = MappingUtils
1695    .findMappingsForSequence(pep1, mappings);
1696  1 assertEquals(2, pep1Mappings.size());
1697   
1698    /*
1699    * Get mapping of pep1 to cds1 and verify it
1700    * maps GPFG to 1-3,4-6,7-9,10-12
1701    */
1702  1 List<AlignedCodonFrame> pep1CdsMappings = MappingUtils
1703    .findMappingsForSequence(cds.getSequenceAt(0), pep1Mappings);
1704  1 assertEquals(1, pep1CdsMappings.size());
1705  1 SearchResultsI sr = MappingUtils.buildSearchResults(pep1, 1,
1706    pep1CdsMappings);
1707  1 assertEquals(1, sr.getResults().size());
1708  1 SearchResultMatchI m = sr.getResults().get(0);
1709  1 assertEquals(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence());
1710  1 assertEquals(1, m.getStart());
1711  1 assertEquals(3, m.getEnd());
1712  1 sr = MappingUtils.buildSearchResults(pep1, 2, pep1CdsMappings);
1713  1 m = sr.getResults().get(0);
1714  1 assertEquals(4, m.getStart());
1715  1 assertEquals(6, m.getEnd());
1716  1 sr = MappingUtils.buildSearchResults(pep1, 3, pep1CdsMappings);
1717  1 m = sr.getResults().get(0);
1718  1 assertEquals(7, m.getStart());
1719  1 assertEquals(9, m.getEnd());
1720  1 sr = MappingUtils.buildSearchResults(pep1, 4, pep1CdsMappings);
1721  1 m = sr.getResults().get(0);
1722  1 assertEquals(10, m.getStart());
1723  1 assertEquals(12, m.getEnd());
1724   
1725    /*
1726    * Get mapping of pep2 to cds2 and verify it
1727    * maps GPG in pep2 to 1-3,4-6,7-9 in second CDS sequence
1728    */
1729  1 List<AlignedCodonFrame> pep2Mappings = MappingUtils
1730    .findMappingsForSequence(pep2, mappings);
1731  1 assertEquals(2, pep2Mappings.size());
1732  1 List<AlignedCodonFrame> pep2CdsMappings = MappingUtils
1733    .findMappingsForSequence(cds.getSequenceAt(1), pep2Mappings);
1734  1 assertEquals(1, pep2CdsMappings.size());
1735  1 sr = MappingUtils.buildSearchResults(pep2, 1, pep2CdsMappings);
1736  1 assertEquals(1, sr.getResults().size());
1737  1 m = sr.getResults().get(0);
1738  1 assertEquals(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
1739  1 assertEquals(1, m.getStart());
1740  1 assertEquals(3, m.getEnd());
1741  1 sr = MappingUtils.buildSearchResults(pep2, 2, pep2CdsMappings);
1742  1 m = sr.getResults().get(0);
1743  1 assertEquals(4, m.getStart());
1744  1 assertEquals(6, m.getEnd());
1745  1 sr = MappingUtils.buildSearchResults(pep2, 3, pep2CdsMappings);
1746  1 m = sr.getResults().get(0);
1747  1 assertEquals(7, m.getStart());
1748  1 assertEquals(9, m.getEnd());
1749    }
1750   
1751    /**
1752    * Test the method that realigns protein to match mapped codon alignment.
1753    */
 
1754  1 toggle @Test(groups = { "Functional" })
1755    public void testAlignProteinAsDna_incompleteStartCodon()
1756    {
1757    // seq1: incomplete start codon (not mapped), then [3, 11]
1758  1 SequenceI dna1 = new Sequence("Seq1", "ccAAA-TTT-GGG-");
1759    // seq2 codons are [4, 5], [8, 11]
1760  1 SequenceI dna2 = new Sequence("Seq2", "ccaAA-ttT-GGG-");
1761    // seq3 incomplete start codon at 'tt'
1762  1 SequenceI dna3 = new Sequence("Seq3", "ccaaa-ttt-GGG-");
1763  1 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 });
1764  1 dna.setDataset(null);
1765   
1766    // prot1 has 'X' for incomplete start codon (not mapped)
1767  1 SequenceI prot1 = new Sequence("Seq1", "XKFG"); // X for incomplete start
1768  1 SequenceI prot2 = new Sequence("Seq2", "NG");
1769  1 SequenceI prot3 = new Sequence("Seq3", "XG"); // X for incomplete start
1770  1 AlignmentI protein = new Alignment(new SequenceI[] { prot1, prot2,
1771    prot3 });
1772  1 protein.setDataset(null);
1773   
1774    // map dna1 [3, 11] to prot1 [2, 4] KFG
1775  1 MapList map = new MapList(new int[] { 3, 11 }, new int[] { 2, 4 }, 3, 1);
1776  1 AlignedCodonFrame acf = new AlignedCodonFrame();
1777  1 acf.addMap(dna1.getDatasetSequence(), prot1.getDatasetSequence(), map);
1778   
1779    // map dna2 [4, 5] [8, 11] to prot2 [1, 2] NG
1780  1 map = new MapList(new int[] { 4, 5, 8, 11 }, new int[] { 1, 2 }, 3, 1);
1781  1 acf.addMap(dna2.getDatasetSequence(), prot2.getDatasetSequence(), map);
1782   
1783    // map dna3 [9, 11] to prot3 [2, 2] G
1784  1 map = new MapList(new int[] { 9, 11 }, new int[] { 2, 2 }, 3, 1);
1785  1 acf.addMap(dna3.getDatasetSequence(), prot3.getDatasetSequence(), map);
1786   
1787  1 ArrayList<AlignedCodonFrame> acfs = new ArrayList<>();
1788  1 acfs.add(acf);
1789  1 protein.setCodonFrames(acfs);
1790   
1791    /*
1792    * verify X is included in the aligned proteins, and placed just
1793    * before the first mapped residue
1794    * CCT is between CCC and TTT
1795    */
1796  1 AlignmentUtils.alignProteinAsDna(protein, dna);
1797  1 assertEquals("XK-FG", prot1.getSequenceAsString());
1798  1 assertEquals("--N-G", prot2.getSequenceAsString());
1799  1 assertEquals("---XG", prot3.getSequenceAsString());
1800    }
1801   
1802    /**
1803    * Tests for the method that maps the subset of a dna sequence that has CDS
1804    * (or subtype) feature - case where the start codon is incomplete.
1805    */
 
1806  1 toggle @Test(groups = "Functional")
1807    public void testFindCdsPositions_fivePrimeIncomplete()
1808    {
1809  1 SequenceI dnaSeq = new Sequence("dna", "aaagGGCCCaaaTTTttt");
1810  1 dnaSeq.createDatasetSequence();
1811  1 SequenceI ds = dnaSeq.getDatasetSequence();
1812   
1813    // CDS for dna 5-6 (incomplete codon), 7-9
1814  1 SequenceFeature sf = new SequenceFeature("CDS", "", 5, 9, 0f, null);
1815  1 sf.setPhase("2"); // skip 2 bases to start of next codon
1816  1 ds.addSequenceFeature(sf);
1817    // CDS for dna 13-15
1818  1 sf = new SequenceFeature("CDS_predicted", "", 13, 15, 0f, null);
1819  1 ds.addSequenceFeature(sf);
1820   
1821  1 List<int[]> ranges = AlignmentUtils.findCdsPositions(dnaSeq);
1822   
1823    /*
1824    * check the mapping starts with the first complete codon
1825    */
1826  1 assertEquals(6, MappingUtils.getLength(ranges));
1827  1 assertEquals(2, ranges.size());
1828  1 assertEquals(7, ranges.get(0)[0]);
1829  1 assertEquals(9, ranges.get(0)[1]);
1830  1 assertEquals(13, ranges.get(1)[0]);
1831  1 assertEquals(15, ranges.get(1)[1]);
1832    }
1833   
1834    /**
1835    * Tests for the method that maps the subset of a dna sequence that has CDS
1836    * (or subtype) feature.
1837    */
 
1838  1 toggle @Test(groups = "Functional")
1839    public void testFindCdsPositions()
1840    {
1841  1 SequenceI dnaSeq = new Sequence("dna", "aaaGGGcccAAATTTttt");
1842  1 dnaSeq.createDatasetSequence();
1843  1 SequenceI ds = dnaSeq.getDatasetSequence();
1844   
1845    // CDS for dna 10-12
1846  1 SequenceFeature sf = new SequenceFeature("CDS_predicted", "", 10, 12,
1847    0f, null);
1848  1 sf.setStrand("+");
1849  1 ds.addSequenceFeature(sf);
1850    // CDS for dna 4-6
1851  1 sf = new SequenceFeature("CDS", "", 4, 6, 0f, null);
1852  1 sf.setStrand("+");
1853  1 ds.addSequenceFeature(sf);
1854    // exon feature should be ignored here
1855  1 sf = new SequenceFeature("exon", "", 7, 9, 0f, null);
1856  1 ds.addSequenceFeature(sf);
1857   
1858  1 List<int[]> ranges = AlignmentUtils.findCdsPositions(dnaSeq);
1859    /*
1860    * verify ranges { [4-6], [12-10] }
1861    * note CDS ranges are ordered ascending even if the CDS
1862    * features are not
1863    */
1864  1 assertEquals(6, MappingUtils.getLength(ranges));
1865  1 assertEquals(2, ranges.size());
1866  1 assertEquals(4, ranges.get(0)[0]);
1867  1 assertEquals(6, ranges.get(0)[1]);
1868  1 assertEquals(10, ranges.get(1)[0]);
1869  1 assertEquals(12, ranges.get(1)[1]);
1870    }
1871   
1872    /**
1873    * Test the method that computes a map of codon variants for each protein
1874    * position from "sequence_variant" features on dna
1875    */
 
1876  1 toggle @Test(groups = "Functional")
1877    public void testBuildDnaVariantsMap()
1878    {
1879  1 SequenceI dna = new Sequence("dna", "atgAAATTTGGGCCCtag");
1880  1 MapList map = new MapList(new int[] { 1, 18 }, new int[] { 1, 5 }, 3, 1);
1881   
1882    /*
1883    * first with no variants on dna
1884    */
1885  1 LinkedHashMap<Integer, List<DnaVariant>[]> variantsMap = AlignmentUtils
1886    .buildDnaVariantsMap(dna, map);
1887  1 assertTrue(variantsMap.isEmpty());
1888   
1889    /*
1890    * single allele codon 1, on base 1
1891    */
1892  1 SequenceFeature sf1 = new SequenceFeature("sequence_variant", "", 1, 1,
1893    0f, null);
1894  1 sf1.setValue("alleles", "T");
1895  1 sf1.setValue("ID", "sequence_variant:rs758803211");
1896  1 dna.addSequenceFeature(sf1);
1897   
1898    /*
1899    * two alleles codon 2, on bases 2 and 3 (distinct variants)
1900    */
1901  1 SequenceFeature sf2 = new SequenceFeature("sequence_variant", "", 5, 5,
1902    0f, null);
1903  1 sf2.setValue("alleles", "T");
1904  1 sf2.setValue("ID", "sequence_variant:rs758803212");
1905  1 dna.addSequenceFeature(sf2);
1906  1 SequenceFeature sf3 = new SequenceFeature("sequence_variant", "", 6, 6,
1907    0f, null);
1908  1 sf3.setValue("alleles", "G");
1909  1 sf3.setValue("ID", "sequence_variant:rs758803213");
1910  1 dna.addSequenceFeature(sf3);
1911   
1912    /*
1913    * two alleles codon 3, both on base 2 (one variant)
1914    */
1915  1 SequenceFeature sf4 = new SequenceFeature("sequence_variant", "", 8, 8,
1916    0f, null);
1917  1 sf4.setValue("alleles", "C, G");
1918  1 sf4.setValue("ID", "sequence_variant:rs758803214");
1919  1 dna.addSequenceFeature(sf4);
1920   
1921    // no alleles on codon 4
1922   
1923    /*
1924    * alleles on codon 5 on all 3 bases (distinct variants)
1925    */
1926  1 SequenceFeature sf5 = new SequenceFeature("sequence_variant", "", 13,
1927    13, 0f, null);
1928  1 sf5.setValue("alleles", "C, G"); // (C duplicates given base value)
1929  1 sf5.setValue("ID", "sequence_variant:rs758803215");
1930  1 dna.addSequenceFeature(sf5);
1931  1 SequenceFeature sf6 = new SequenceFeature("sequence_variant", "", 14,
1932    14, 0f, null);
1933  1 sf6.setValue("alleles", "g, a"); // should force to upper-case
1934  1 sf6.setValue("ID", "sequence_variant:rs758803216");
1935  1 dna.addSequenceFeature(sf6);
1936   
1937  1 SequenceFeature sf7 = new SequenceFeature("sequence_variant", "", 15,
1938    15, 0f, null);
1939  1 sf7.setValue("alleles", "A, T");
1940  1 sf7.setValue("ID", "sequence_variant:rs758803217");
1941  1 dna.addSequenceFeature(sf7);
1942   
1943    /*
1944    * build map - expect variants on positions 1, 2, 3, 5
1945    */
1946  1 variantsMap = AlignmentUtils.buildDnaVariantsMap(dna, map);
1947  1 assertEquals(4, variantsMap.size());
1948   
1949    /*
1950    * protein residue 1: variant on codon (ATG) base 1, not on 2 or 3
1951    */
1952  1 List<DnaVariant>[] pep1Variants = variantsMap.get(1);
1953  1 assertEquals(3, pep1Variants.length);
1954  1 assertEquals(1, pep1Variants[0].size());
1955  1 assertEquals("A", pep1Variants[0].get(0).base); // codon[1] base
1956  1 assertSame(sf1, pep1Variants[0].get(0).variant); // codon[1] variant
1957  1 assertEquals(1, pep1Variants[1].size());
1958  1 assertEquals("T", pep1Variants[1].get(0).base); // codon[2] base
1959  1 assertNull(pep1Variants[1].get(0).variant); // no variant here
1960  1 assertEquals(1, pep1Variants[2].size());
1961  1 assertEquals("G", pep1Variants[2].get(0).base); // codon[3] base
1962  1 assertNull(pep1Variants[2].get(0).variant); // no variant here
1963   
1964    /*
1965    * protein residue 2: variants on codon (AAA) bases 2 and 3
1966    */
1967  1 List<DnaVariant>[] pep2Variants = variantsMap.get(2);
1968  1 assertEquals(3, pep2Variants.length);
1969  1 assertEquals(1, pep2Variants[0].size());
1970    // codon[1] base recorded while processing variant on codon[2]
1971  1 assertEquals("A", pep2Variants[0].get(0).base);
1972  1 assertNull(pep2Variants[0].get(0).variant); // no variant here
1973    // codon[2] base and variant:
1974  1 assertEquals(1, pep2Variants[1].size());
1975  1 assertEquals("A", pep2Variants[1].get(0).base);
1976  1 assertSame(sf2, pep2Variants[1].get(0).variant);
1977    // codon[3] base was recorded when processing codon[2] variant
1978    // and then the variant for codon[3] added to it
1979  1 assertEquals(1, pep2Variants[2].size());
1980  1 assertEquals("A", pep2Variants[2].get(0).base);
1981  1 assertSame(sf3, pep2Variants[2].get(0).variant);
1982   
1983    /*
1984    * protein residue 3: variants on codon (TTT) base 2 only
1985    */
1986  1 List<DnaVariant>[] pep3Variants = variantsMap.get(3);
1987  1 assertEquals(3, pep3Variants.length);
1988  1 assertEquals(1, pep3Variants[0].size());
1989  1 assertEquals("T", pep3Variants[0].get(0).base); // codon[1] base
1990  1 assertNull(pep3Variants[0].get(0).variant); // no variant here
1991  1 assertEquals(1, pep3Variants[1].size());
1992  1 assertEquals("T", pep3Variants[1].get(0).base); // codon[2] base
1993  1 assertSame(sf4, pep3Variants[1].get(0).variant); // codon[2] variant
1994  1 assertEquals(1, pep3Variants[2].size());
1995  1 assertEquals("T", pep3Variants[2].get(0).base); // codon[3] base
1996  1 assertNull(pep3Variants[2].get(0).variant); // no variant here
1997   
1998    /*
1999    * three variants on protein position 5
2000    */
2001  1 List<DnaVariant>[] pep5Variants = variantsMap.get(5);
2002  1 assertEquals(3, pep5Variants.length);
2003  1 assertEquals(1, pep5Variants[0].size());
2004  1 assertEquals("C", pep5Variants[0].get(0).base); // codon[1] base
2005  1 assertSame(sf5, pep5Variants[0].get(0).variant); // codon[1] variant
2006  1 assertEquals(1, pep5Variants[1].size());
2007  1 assertEquals("C", pep5Variants[1].get(0).base); // codon[2] base
2008  1 assertSame(sf6, pep5Variants[1].get(0).variant); // codon[2] variant
2009  1 assertEquals(1, pep5Variants[2].size());
2010  1 assertEquals("C", pep5Variants[2].get(0).base); // codon[3] base
2011  1 assertSame(sf7, pep5Variants[2].get(0).variant); // codon[3] variant
2012    }
2013   
2014    /**
2015    * Tests for the method that computes all peptide variants given codon
2016    * variants
2017    */
 
2018  1 toggle @Test(groups = "Functional")
2019    public void testComputePeptideVariants()
2020    {
2021    /*
2022    * scenario: AAATTTCCC codes for KFP
2023    * variants:
2024    * GAA -> E source: Ensembl
2025    * CAA -> Q source: dbSNP
2026    * TAA -> STOP source: dnSNP
2027    * AAG synonymous source: COSMIC
2028    * AAT -> N source: Ensembl
2029    * ...TTC synonymous source: dbSNP
2030    * ......CAC,CGC -> H,R source: COSMIC
2031    * (one variant with two alleles)
2032    */
2033  1 SequenceI peptide = new Sequence("pep/10-12", "KFP");
2034   
2035    /*
2036    * two distinct variants for codon 1 position 1
2037    * second one has clinical significance
2038    */
2039  1 String ensembl = "Ensembl";
2040  1 String dbSnp = "dbSNP";
2041  1 String cosmic = "COSMIC";
2042   
2043  1 SequenceFeature sf1 = new SequenceFeature("sequence_variant", "", 1, 1,
2044    0f, ensembl);
2045  1 sf1.setValue("alleles", "A,G"); // AAA -> GAA -> K/E
2046  1 sf1.setValue("ID", "var1.125A>G");
2047   
2048  1 SequenceFeature sf2 = new SequenceFeature("sequence_variant", "", 1, 1,
2049    0f, dbSnp);
2050  1 sf2.setValue("alleles", "A,C"); // AAA -> CAA -> K/Q
2051  1 sf2.setValue("ID", "var2");
2052  1 sf2.setValue("clinical_significance", "Dodgy");
2053   
2054  1 SequenceFeature sf3 = new SequenceFeature("sequence_variant", "", 1, 1,
2055    0f, dbSnp);
2056  1 sf3.setValue("alleles", "A,T"); // AAA -> TAA -> stop codon
2057  1 sf3.setValue("ID", "var3");
2058  1 sf3.setValue("clinical_significance", "Bad");
2059   
2060  1 SequenceFeature sf4 = new SequenceFeature("sequence_variant", "", 3, 3,
2061    0f, cosmic);
2062  1 sf4.setValue("alleles", "A,G"); // AAA -> AAG synonymous
2063  1 sf4.setValue("ID", "var4");
2064  1 sf4.setValue("clinical_significance", "None");
2065   
2066  1 SequenceFeature sf5 = new SequenceFeature("sequence_variant", "", 3, 3,
2067    0f, ensembl);
2068  1 sf5.setValue("alleles", "A,T"); // AAA -> AAT -> K/N
2069  1 sf5.setValue("ID", "sequence_variant:var5"); // prefix gets stripped off
2070  1 sf5.setValue("clinical_significance", "Benign");
2071   
2072  1 SequenceFeature sf6 = new SequenceFeature("sequence_variant", "", 6, 6,
2073    0f, dbSnp);
2074  1 sf6.setValue("alleles", "T,C"); // TTT -> TTC synonymous
2075  1 sf6.setValue("ID", "var6");
2076   
2077  1 SequenceFeature sf7 = new SequenceFeature("sequence_variant", "", 8, 8,
2078    0f, cosmic);
2079  1 sf7.setValue("alleles", "C,A,G"); // CCC -> CAC,CGC -> P/H/R
2080  1 sf7.setValue("ID", "var7");
2081  1 sf7.setValue("clinical_significance", "Good");
2082   
2083  1 List<DnaVariant> codon1Variants = new ArrayList<>();
2084  1 List<DnaVariant> codon2Variants = new ArrayList<>();
2085  1 List<DnaVariant> codon3Variants = new ArrayList<>();
2086   
2087  1 List<DnaVariant> codonVariants[] = new ArrayList[3];
2088  1 codonVariants[0] = codon1Variants;
2089  1 codonVariants[1] = codon2Variants;
2090  1 codonVariants[2] = codon3Variants;
2091   
2092    /*
2093    * compute variants for protein position 1
2094    */
2095  1 codon1Variants.add(new DnaVariant("A", sf1));
2096  1 codon1Variants.add(new DnaVariant("A", sf2));
2097  1 codon1Variants.add(new DnaVariant("A", sf3));
2098  1 codon2Variants.add(new DnaVariant("A"));
2099    // codon2Variants.add(new DnaVariant("A"));
2100  1 codon3Variants.add(new DnaVariant("A", sf4));
2101  1 codon3Variants.add(new DnaVariant("A", sf5));
2102  1 AlignmentUtils.computePeptideVariants(peptide, 1, codonVariants);
2103   
2104    /*
2105    * compute variants for protein position 2
2106    */
2107  1 codon1Variants.clear();
2108  1 codon2Variants.clear();
2109  1 codon3Variants.clear();
2110  1 codon1Variants.add(new DnaVariant("T"));
2111  1 codon2Variants.add(new DnaVariant("T"));
2112  1 codon3Variants.add(new DnaVariant("T", sf6));
2113  1 AlignmentUtils.computePeptideVariants(peptide, 2, codonVariants);
2114   
2115    /*
2116    * compute variants for protein position 3
2117    */
2118  1 codon1Variants.clear();
2119  1 codon2Variants.clear();
2120  1 codon3Variants.clear();
2121  1 codon1Variants.add(new DnaVariant("C"));
2122  1 codon2Variants.add(new DnaVariant("C", sf7));
2123  1 codon3Variants.add(new DnaVariant("C"));
2124  1 AlignmentUtils.computePeptideVariants(peptide, 3, codonVariants);
2125   
2126    /*
2127    * verify added sequence features for
2128    * var1 K -> E Ensembl
2129    * var2 K -> Q dbSNP
2130    * var3 K -> stop
2131    * var4 synonymous
2132    * var5 K -> N Ensembl
2133    * var6 synonymous
2134    * var7 P -> H COSMIC
2135    * var8 P -> R COSMIC
2136    */
2137  1 List<SequenceFeature> sfs = peptide.getSequenceFeatures();
2138  1 SequenceFeatures.sortFeatures(sfs, true);
2139  1 assertEquals(8, sfs.size());
2140   
2141    /*
2142    * features are sorted by start position ascending, but in no
2143    * particular order where start positions match; asserts here
2144    * simply match the data returned (the order is not important)
2145    */
2146    // AAA -> AAT -> K/N
2147  1 SequenceFeature sf = sfs.get(0);
2148  1 assertEquals(1, sf.getBegin());
2149  1 assertEquals(1, sf.getEnd());
2150  1 assertEquals("nonsynonymous_variant", sf.getType());
2151  1 assertEquals("p.Lys1Asn", sf.getDescription());
2152  1 assertEquals("var5", sf.getValue("ID"));
2153  1 assertEquals("Benign", sf.getValue("clinical_significance"));
2154  1 assertEquals("ID=var5;clinical_significance=Benign",
2155    sf.getAttributes());
2156  1 assertEquals(1, sf.links.size());
2157  1 assertEquals(
2158    "p.Lys1Asn var5|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var5",
2159    sf.links.get(0));
2160  1 assertEquals(ensembl, sf.getFeatureGroup());
2161   
2162    // AAA -> CAA -> K/Q
2163  1 sf = sfs.get(1);
2164  1 assertEquals(1, sf.getBegin());
2165  1 assertEquals(1, sf.getEnd());
2166  1 assertEquals("nonsynonymous_variant", sf.getType());
2167  1 assertEquals("p.Lys1Gln", sf.getDescription());
2168  1 assertEquals("var2", sf.getValue("ID"));
2169  1 assertEquals("Dodgy", sf.getValue("clinical_significance"));
2170  1 assertEquals("ID=var2;clinical_significance=Dodgy", sf.getAttributes());
2171  1 assertEquals(1, sf.links.size());
2172  1 assertEquals(
2173    "p.Lys1Gln var2|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var2",
2174    sf.links.get(0));
2175  1 assertEquals(dbSnp, sf.getFeatureGroup());
2176   
2177    // AAA -> GAA -> K/E
2178  1 sf = sfs.get(2);
2179  1 assertEquals(1, sf.getBegin());
2180  1 assertEquals(1, sf.getEnd());
2181  1 assertEquals("nonsynonymous_variant", sf.getType());
2182  1 assertEquals("p.Lys1Glu", sf.getDescription());
2183  1 assertEquals("var1.125A>G", sf.getValue("ID"));
2184  1 assertNull(sf.getValue("clinical_significance"));
2185  1 assertEquals("ID=var1.125A>G", sf.getAttributes());
2186  1 assertEquals(1, sf.links.size());
2187    // link to variation is urlencoded
2188  1 assertEquals(
2189    "p.Lys1Glu var1.125A>G|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var1.125A%3EG",
2190    sf.links.get(0));
2191  1 assertEquals(ensembl, sf.getFeatureGroup());
2192   
2193    // AAA -> TAA -> stop codon
2194  1 sf = sfs.get(3);
2195  1 assertEquals(1, sf.getBegin());
2196  1 assertEquals(1, sf.getEnd());
2197  1 assertEquals("stop_gained", sf.getType());
2198  1 assertEquals("Aaa/Taa", sf.getDescription());
2199  1 assertEquals("var3", sf.getValue("ID"));
2200  1 assertEquals("Bad", sf.getValue("clinical_significance"));
2201  1 assertEquals("ID=var3;clinical_significance=Bad", sf.getAttributes());
2202  1 assertEquals(1, sf.links.size());
2203  1 assertEquals(
2204    "Aaa/Taa var3|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var3",
2205    sf.links.get(0));
2206  1 assertEquals(dbSnp, sf.getFeatureGroup());
2207   
2208    // AAA -> AAG synonymous
2209  1 sf = sfs.get(4);
2210  1 assertEquals(1, sf.getBegin());
2211  1 assertEquals(1, sf.getEnd());
2212  1 assertEquals("synonymous_variant", sf.getType());
2213  1 assertEquals("aaA/aaG", sf.getDescription());
2214  1 assertEquals("var4", sf.getValue("ID"));
2215  1 assertEquals("None", sf.getValue("clinical_significance"));
2216  1 assertEquals("ID=var4;clinical_significance=None", sf.getAttributes());
2217  1 assertEquals(1, sf.links.size());
2218  1 assertEquals(
2219    "aaA/aaG var4|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var4",
2220    sf.links.get(0));
2221  1 assertEquals(cosmic, sf.getFeatureGroup());
2222   
2223    // TTT -> TTC synonymous
2224  1 sf = sfs.get(5);
2225  1 assertEquals(2, sf.getBegin());
2226  1 assertEquals(2, sf.getEnd());
2227  1 assertEquals("synonymous_variant", sf.getType());
2228  1 assertEquals("ttT/ttC", sf.getDescription());
2229  1 assertEquals("var6", sf.getValue("ID"));
2230  1 assertNull(sf.getValue("clinical_significance"));
2231  1 assertEquals("ID=var6", sf.getAttributes());
2232  1 assertEquals(1, sf.links.size());
2233  1 assertEquals(
2234    "ttT/ttC var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6",
2235    sf.links.get(0));
2236  1 assertEquals(dbSnp, sf.getFeatureGroup());
2237   
2238    // var7 generates two distinct protein variant features (two alleles)
2239    // CCC -> CGC -> P/R
2240  1 sf = sfs.get(6);
2241  1 assertEquals(3, sf.getBegin());
2242  1 assertEquals(3, sf.getEnd());
2243  1 assertEquals("nonsynonymous_variant", sf.getType());
2244  1 assertEquals("p.Pro3Arg", sf.getDescription());
2245  1 assertEquals("var7", sf.getValue("ID"));
2246  1 assertEquals("Good", sf.getValue("clinical_significance"));
2247  1 assertEquals("ID=var7;clinical_significance=Good", sf.getAttributes());
2248  1 assertEquals(1, sf.links.size());
2249  1 assertEquals(
2250    "p.Pro3Arg var7|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var7",
2251    sf.links.get(0));
2252  1 assertEquals(cosmic, sf.getFeatureGroup());
2253   
2254    // CCC -> CAC -> P/H
2255  1 sf = sfs.get(7);
2256  1 assertEquals(3, sf.getBegin());
2257  1 assertEquals(3, sf.getEnd());
2258  1 assertEquals("nonsynonymous_variant", sf.getType());
2259  1 assertEquals("p.Pro3His", sf.getDescription());
2260  1 assertEquals("var7", sf.getValue("ID"));
2261  1 assertEquals("Good", sf.getValue("clinical_significance"));
2262  1 assertEquals("ID=var7;clinical_significance=Good", sf.getAttributes());
2263  1 assertEquals(1, sf.links.size());
2264  1 assertEquals(
2265    "p.Pro3His var7|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var7",
2266    sf.links.get(0));
2267  1 assertEquals(cosmic, sf.getFeatureGroup());
2268    }
2269   
2270    /**
2271    * Tests for the method that maps the subset of a dna sequence that has CDS
2272    * (or subtype) feature, with CDS strand = '-' (reverse)
2273    */
2274    // test turned off as currently findCdsPositions is not strand-dependent
2275    // left in case it comes around again...
 
2276  0 toggle @Test(groups = "Functional", enabled = false)
2277    public void testFindCdsPositions_reverseStrand()
2278    {
2279  0 SequenceI dnaSeq = new Sequence("dna", "aaaGGGcccAAATTTttt");
2280  0 dnaSeq.createDatasetSequence();
2281  0 SequenceI ds = dnaSeq.getDatasetSequence();
2282   
2283    // CDS for dna 4-6
2284  0 SequenceFeature sf = new SequenceFeature("CDS", "", 4, 6, 0f, null);
2285  0 sf.setStrand("-");
2286  0 ds.addSequenceFeature(sf);
2287    // exon feature should be ignored here
2288  0 sf = new SequenceFeature("exon", "", 7, 9, 0f, null);
2289  0 ds.addSequenceFeature(sf);
2290    // CDS for dna 10-12
2291  0 sf = new SequenceFeature("CDS_predicted", "", 10, 12, 0f, null);
2292  0 sf.setStrand("-");
2293  0 ds.addSequenceFeature(sf);
2294   
2295  0 List<int[]> ranges = AlignmentUtils.findCdsPositions(dnaSeq);
2296    /*
2297    * verify ranges { [12-10], [6-4] }
2298    */
2299  0 assertEquals(6, MappingUtils.getLength(ranges));
2300  0 assertEquals(2, ranges.size());
2301  0 assertEquals(12, ranges.get(0)[0]);
2302  0 assertEquals(10, ranges.get(0)[1]);
2303  0 assertEquals(6, ranges.get(1)[0]);
2304  0 assertEquals(4, ranges.get(1)[1]);
2305    }
2306   
2307    /**
2308    * Tests for the method that maps the subset of a dna sequence that has CDS
2309    * (or subtype) feature - reverse strand case where the start codon is
2310    * incomplete.
2311    */
 
2312  0 toggle @Test(groups = "Functional", enabled = false)
2313    // test turned off as currently findCdsPositions is not strand-dependent
2314    // left in case it comes around again...
2315    public void testFindCdsPositions_reverseStrandThreePrimeIncomplete()
2316    {
2317  0 SequenceI dnaSeq = new Sequence("dna", "aaagGGCCCaaaTTTttt");
2318  0 dnaSeq.createDatasetSequence();
2319  0 SequenceI ds = dnaSeq.getDatasetSequence();
2320   
2321    // CDS for dna 5-9
2322  0 SequenceFeature sf = new SequenceFeature("CDS", "", 5, 9, 0f, null);
2323  0 sf.setStrand("-");
2324  0 ds.addSequenceFeature(sf);
2325    // CDS for dna 13-15
2326  0 sf = new SequenceFeature("CDS_predicted", "", 13, 15, 0f, null);
2327  0 sf.setStrand("-");
2328  0 sf.setPhase("2"); // skip 2 bases to start of next codon
2329  0 ds.addSequenceFeature(sf);
2330   
2331  0 List<int[]> ranges = AlignmentUtils.findCdsPositions(dnaSeq);
2332   
2333    /*
2334    * check the mapping starts with the first complete codon
2335    * expect ranges [13, 13], [9, 5]
2336    */
2337  0 assertEquals(6, MappingUtils.getLength(ranges));
2338  0 assertEquals(2, ranges.size());
2339  0 assertEquals(13, ranges.get(0)[0]);
2340  0 assertEquals(13, ranges.get(0)[1]);
2341  0 assertEquals(9, ranges.get(1)[0]);
2342  0 assertEquals(5, ranges.get(1)[1]);
2343    }
2344   
 
2345  1 toggle @Test(groups = "Functional")
2346    public void testAlignAs_alternateTranscriptsUngapped()
2347    {
2348  1 SequenceI dna1 = new Sequence("dna1", "cccGGGTTTaaa");
2349  1 SequenceI dna2 = new Sequence("dna2", "CCCgggtttAAA");
2350  1 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 });
2351  1 ((Alignment) dna).createDatasetAlignment();
2352  1 SequenceI cds1 = new Sequence("cds1", "GGGTTT");
2353  1 SequenceI cds2 = new Sequence("cds2", "CCCAAA");
2354  1 AlignmentI cds = new Alignment(new SequenceI[] { cds1, cds2 });
2355  1 ((Alignment) cds).createDatasetAlignment();
2356   
2357  1 AlignedCodonFrame acf = new AlignedCodonFrame();
2358  1 MapList map = new MapList(new int[] { 4, 9 }, new int[] { 1, 6 }, 1, 1);
2359  1 acf.addMap(dna1.getDatasetSequence(), cds1.getDatasetSequence(), map);
2360  1 map = new MapList(new int[] { 1, 3, 10, 12 }, new int[] { 1, 6 }, 1, 1);
2361  1 acf.addMap(dna2.getDatasetSequence(), cds2.getDatasetSequence(), map);
2362   
2363    /*
2364    * verify CDS alignment is as:
2365    * cccGGGTTTaaa (cdna)
2366    * CCCgggtttAAA (cdna)
2367    *
2368    * ---GGGTTT--- (cds)
2369    * CCC------AAA (cds)
2370    */
2371  1 dna.addCodonFrame(acf);
2372  1 AlignmentUtils.alignAs(cds, dna);
2373  1 assertEquals("---GGGTTT", cds.getSequenceAt(0).getSequenceAsString());
2374  1 assertEquals("CCC------AAA", cds.getSequenceAt(1).getSequenceAsString());
2375    }
2376   
 
2377  1 toggle @Test(groups = { "Functional" })
2378    public void testAddMappedPositions()
2379    {
2380  1 SequenceI from = new Sequence("dna", "ggAA-ATcc-TT-g");
2381  1 SequenceI seq1 = new Sequence("cds", "AAATTT");
2382  1 from.createDatasetSequence();
2383  1 seq1.createDatasetSequence();
2384  1 Mapping mapping = new Mapping(seq1, new MapList(
2385    new int[] { 3, 6, 9, 10 }, new int[] { 1, 6 }, 1, 1));
2386  1 Map<Integer, Map<SequenceI, Character>> map = new TreeMap<>();
2387  1 AlignmentUtils.addMappedPositions(seq1, from, mapping, map);
2388   
2389    /*
2390    * verify map has seq1 residues in columns 3,4,6,7,11,12
2391    */
2392  1 assertEquals(6, map.size());
2393  1 assertEquals('A', map.get(3).get(seq1).charValue());
2394  1 assertEquals('A', map.get(4).get(seq1).charValue());
2395  1 assertEquals('A', map.get(6).get(seq1).charValue());
2396  1 assertEquals('T', map.get(7).get(seq1).charValue());
2397  1 assertEquals('T', map.get(11).get(seq1).charValue());
2398  1 assertEquals('T', map.get(12).get(seq1).charValue());
2399   
2400    /*
2401    *
2402    */
2403    }
2404   
2405    /**
2406    * Test case where the mapping 'from' range includes a stop codon which is
2407    * absent in the 'to' range
2408    */
 
2409  1 toggle @Test(groups = { "Functional" })
2410    public void testAddMappedPositions_withStopCodon()
2411    {
2412  1 SequenceI from = new Sequence("dna", "ggAA-ATcc-TT-g");
2413  1 SequenceI seq1 = new Sequence("cds", "AAATTT");
2414  1 from.createDatasetSequence();
2415  1 seq1.createDatasetSequence();
2416  1 Mapping mapping = new Mapping(seq1, new MapList(
2417    new int[] { 3, 6, 9, 10 }, new int[] { 1, 6 }, 1, 1));
2418  1 Map<Integer, Map<SequenceI, Character>> map = new TreeMap<>();
2419  1 AlignmentUtils.addMappedPositions(seq1, from, mapping, map);
2420   
2421    /*
2422    * verify map has seq1 residues in columns 3,4,6,7,11,12
2423    */
2424  1 assertEquals(6, map.size());
2425  1 assertEquals('A', map.get(3).get(seq1).charValue());
2426  1 assertEquals('A', map.get(4).get(seq1).charValue());
2427  1 assertEquals('A', map.get(6).get(seq1).charValue());
2428  1 assertEquals('T', map.get(7).get(seq1).charValue());
2429  1 assertEquals('T', map.get(11).get(seq1).charValue());
2430  1 assertEquals('T', map.get(12).get(seq1).charValue());
2431    }
2432   
2433    /**
2434    * Test for the case where the products for which we want CDS are specified.
2435    * This is to represent the case where EMBL has CDS mappings to both Uniprot
2436    * and EMBLCDSPROTEIN. makeCdsAlignment() should only return the mappings for
2437    * the protein sequences specified.
2438    */
 
2439  1 toggle @Test(groups = { "Functional" })
2440    public void testMakeCdsAlignment_filterProducts()
2441    {
2442  1 SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
2443  1 SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC");
2444  1 SequenceI pep1 = new Sequence("Uniprot|pep1", "GF");
2445  1 SequenceI pep2 = new Sequence("Uniprot|pep2", "GFP");
2446  1 SequenceI pep3 = new Sequence("EMBL|pep3", "GF");
2447  1 SequenceI pep4 = new Sequence("EMBL|pep4", "GFP");
2448  1 dna1.createDatasetSequence();
2449  1 dna2.createDatasetSequence();
2450  1 pep1.createDatasetSequence();
2451  1 pep2.createDatasetSequence();
2452  1 pep3.createDatasetSequence();
2453  1 pep4.createDatasetSequence();
2454  1 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 });
2455  1 dna.setDataset(null);
2456  1 AlignmentI emblPeptides = new Alignment(new SequenceI[] { pep3, pep4 });
2457  1 emblPeptides.setDataset(null);
2458   
2459  1 AlignedCodonFrame acf = new AlignedCodonFrame();
2460  1 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
2461    new int[] { 1, 2 }, 3, 1);
2462  1 acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
2463  1 acf.addMap(dna1.getDatasetSequence(), pep3.getDatasetSequence(), map);
2464  1 dna.addCodonFrame(acf);
2465   
2466  1 acf = new AlignedCodonFrame();
2467  1 map = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, new int[] { 1, 3 },
2468    3, 1);
2469  1 acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
2470  1 acf.addMap(dna2.getDatasetSequence(), pep4.getDatasetSequence(), map);
2471  1 dna.addCodonFrame(acf);
2472   
2473    /*
2474    * execute method under test to find CDS for EMBL peptides only
2475    */
2476  1 AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
2477    dna1, dna2 }, dna.getDataset(), emblPeptides.getSequencesArray());
2478   
2479  1 assertEquals(2, cds.getSequences().size());
2480  1 assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString());
2481  1 assertEquals("GGGTTTCCC", cds.getSequenceAt(1).getSequenceAsString());
2482   
2483    /*
2484    * verify shared, extended alignment dataset
2485    */
2486  1 assertSame(dna.getDataset(), cds.getDataset());
2487  1 assertTrue(dna.getDataset().getSequences()
2488    .contains(cds.getSequenceAt(0).getDatasetSequence()));
2489  1 assertTrue(dna.getDataset().getSequences()
2490    .contains(cds.getSequenceAt(1).getDatasetSequence()));
2491   
2492    /*
2493    * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide
2494    * the mappings are on the shared alignment dataset
2495    */
2496  1 List<AlignedCodonFrame> cdsMappings = cds.getDataset().getCodonFrames();
2497    /*
2498    * 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep)
2499    */
2500  1 assertEquals(6, cdsMappings.size());
2501   
2502    /*
2503    * verify that mapping sets for dna and cds alignments are different
2504    * [not current behaviour - all mappings are on the alignment dataset]
2505    */
2506    // select -> subselect type to test.
2507    // Assert.assertNotSame(dna.getCodonFrames(), cds.getCodonFrames());
2508    // assertEquals(4, dna.getCodonFrames().size());
2509    // assertEquals(4, cds.getCodonFrames().size());
2510   
2511    /*
2512    * Two mappings involve pep3 (dna to pep3, cds to pep3)
2513    * Mapping from pep3 to GGGTTT in first new exon sequence
2514    */
2515  1 List<AlignedCodonFrame> pep3Mappings = MappingUtils
2516    .findMappingsForSequence(pep3, cdsMappings);
2517  1 assertEquals(2, pep3Mappings.size());
2518  1 List<AlignedCodonFrame> mappings = MappingUtils
2519    .findMappingsForSequence(cds.getSequenceAt(0), pep3Mappings);
2520  1 assertEquals(1, mappings.size());
2521   
2522    // map G to GGG
2523  1 SearchResultsI sr = MappingUtils.buildSearchResults(pep3, 1, mappings);
2524  1 assertEquals(1, sr.getResults().size());
2525  1 SearchResultMatchI m = sr.getResults().get(0);
2526  1 assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence());
2527  1 assertEquals(1, m.getStart());
2528  1 assertEquals(3, m.getEnd());
2529    // map F to TTT
2530  1 sr = MappingUtils.buildSearchResults(pep3, 2, mappings);
2531  1 m = sr.getResults().get(0);
2532  1 assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence());
2533  1 assertEquals(4, m.getStart());
2534  1 assertEquals(6, m.getEnd());
2535   
2536    /*
2537    * Two mappings involve pep4 (dna to pep4, cds to pep4)
2538    * Verify mapping from pep4 to GGGTTTCCC in second new exon sequence
2539    */
2540  1 List<AlignedCodonFrame> pep4Mappings = MappingUtils
2541    .findMappingsForSequence(pep4, cdsMappings);
2542  1 assertEquals(2, pep4Mappings.size());
2543  1 mappings = MappingUtils.findMappingsForSequence(cds.getSequenceAt(1),
2544    pep4Mappings);
2545  1 assertEquals(1, mappings.size());
2546    // map G to GGG
2547  1 sr = MappingUtils.buildSearchResults(pep4, 1, mappings);
2548  1 assertEquals(1, sr.getResults().size());
2549  1 m = sr.getResults().get(0);
2550  1 assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
2551  1 assertEquals(1, m.getStart());
2552  1 assertEquals(3, m.getEnd());
2553    // map F to TTT
2554  1 sr = MappingUtils.buildSearchResults(pep4, 2, mappings);
2555  1 m = sr.getResults().get(0);
2556  1 assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
2557  1 assertEquals(4, m.getStart());
2558  1 assertEquals(6, m.getEnd());
2559    // map P to CCC
2560  1 sr = MappingUtils.buildSearchResults(pep4, 3, mappings);
2561  1 m = sr.getResults().get(0);
2562  1 assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
2563  1 assertEquals(7, m.getStart());
2564  1 assertEquals(9, m.getEnd());
2565    }
2566   
2567    /**
2568    * Test the method that just copies aligned sequences, provided all sequences
2569    * to be aligned share the aligned sequence's dataset
2570    */
 
2571  1 toggle @Test(groups = "Functional")
2572    public void testAlignAsSameSequences()
2573    {
2574  1 SequenceI dna1 = new Sequence("dna1", "cccGGGTTTaaa");
2575  1 SequenceI dna2 = new Sequence("dna2", "CCCgggtttAAA");
2576  1 AlignmentI al1 = new Alignment(new SequenceI[] { dna1, dna2 });
2577  1 ((Alignment) al1).createDatasetAlignment();
2578   
2579  1 SequenceI dna3 = new Sequence(dna1);
2580  1 SequenceI dna4 = new Sequence(dna2);
2581  1 assertSame(dna3.getDatasetSequence(), dna1.getDatasetSequence());
2582  1 assertSame(dna4.getDatasetSequence(), dna2.getDatasetSequence());
2583  1 String seq1 = "-cc-GG-GT-TT--aaa";
2584  1 dna3.setSequence(seq1);
2585  1 String seq2 = "C--C-Cgg--gtt-tAA-A-";
2586  1 dna4.setSequence(seq2);
2587  1 AlignmentI al2 = new Alignment(new SequenceI[] { dna3, dna4 });
2588  1 ((Alignment) al2).createDatasetAlignment();
2589   
2590  1 assertTrue(AlignmentUtils.alignAsSameSequences(al1, al2));
2591  1 assertEquals(seq1, al1.getSequenceAt(0).getSequenceAsString());
2592  1 assertEquals(seq2, al1.getSequenceAt(1).getSequenceAsString());
2593   
2594    /*
2595    * add another sequence to 'aligned' - should still succeed, since
2596    * unaligned sequences still share a dataset with aligned sequences
2597    */
2598  1 SequenceI dna5 = new Sequence("dna5", "CCCgggtttAAA");
2599  1 dna5.createDatasetSequence();
2600  1 al2.addSequence(dna5);
2601  1 assertTrue(AlignmentUtils.alignAsSameSequences(al1, al2));
2602  1 assertEquals(seq1, al1.getSequenceAt(0).getSequenceAsString());
2603  1 assertEquals(seq2, al1.getSequenceAt(1).getSequenceAsString());
2604   
2605    /*
2606    * add another sequence to 'unaligned' - should fail, since now not
2607    * all unaligned sequences share a dataset with aligned sequences
2608    */
2609  1 SequenceI dna6 = new Sequence("dna6", "CCCgggtttAAA");
2610  1 dna6.createDatasetSequence();
2611  1 al1.addSequence(dna6);
2612    // JAL-2110 JBP Comment: what's the use case for this behaviour ?
2613  1 assertFalse(AlignmentUtils.alignAsSameSequences(al1, al2));
2614    }
2615   
 
2616  1 toggle @Test(groups = "Functional")
2617    public void testAlignAsSameSequencesMultipleSubSeq()
2618    {
2619  1 SequenceI dna1 = new Sequence("dna1", "cccGGGTTTaaa");
2620  1 SequenceI dna2 = new Sequence("dna2", "CCCgggtttAAA");
2621  1 SequenceI as1 = dna1.deriveSequence();
2622  1 SequenceI as2 = dna1.deriveSequence().getSubSequence(3, 7);
2623  1 SequenceI as3 = dna2.deriveSequence();
2624  1 as1.insertCharAt(6, 5, '-');
2625  1 String s_as1 = as1.getSequenceAsString();
2626  1 as2.insertCharAt(6, 5, '-');
2627  1 String s_as2 = as2.getSequenceAsString();
2628  1 as3.insertCharAt(6, 5, '-');
2629  1 String s_as3 = as3.getSequenceAsString();
2630  1 AlignmentI aligned = new Alignment(new SequenceI[] { as1, as2, as3 });
2631   
2632    // why do we need to cast this still ?
2633  1 ((Alignment) aligned).createDatasetAlignment();
2634  1 SequenceI uas1 = dna1.deriveSequence();
2635  1 SequenceI uas2 = dna1.deriveSequence().getSubSequence(3, 7);
2636  1 SequenceI uas3 = dna2.deriveSequence();
2637  1 AlignmentI tobealigned = new Alignment(new SequenceI[] { uas1, uas2,
2638    uas3 });
2639  1 ((Alignment) tobealigned).createDatasetAlignment();
2640   
2641  1 assertTrue(AlignmentUtils.alignAsSameSequences(tobealigned, aligned));
2642  1 assertEquals(s_as1, uas1.getSequenceAsString());
2643  1 assertEquals(s_as2, uas2.getSequenceAsString());
2644  1 assertEquals(s_as3, uas3.getSequenceAsString());
2645    }
2646   
 
2647  1 toggle @Test(groups = { "Functional" })
2648    public void testTransferGeneLoci()
2649    {
2650  1 SequenceI from = new Sequence("transcript",
2651    "aaacccgggTTTAAACCCGGGtttaaacccgggttt");
2652  1 SequenceI to = new Sequence("CDS", "TTTAAACCCGGG");
2653  1 MapList map = new MapList(new int[] { 1, 12 }, new int[] { 10, 21 }, 1,
2654    1);
2655   
2656    /*
2657    * first with nothing to transfer
2658    */
2659  1 AlignmentUtils.transferGeneLoci(from, map, to);
2660  1 assertNull(to.getGeneLoci());
2661   
2662    /*
2663    * next with gene loci set on 'from' sequence
2664    */
2665  1 int[] exons = new int[] { 100, 105, 155, 164, 210, 229 };
2666  1 MapList geneMap = new MapList(new int[] { 1, 36 }, exons, 1, 1);
2667  1 from.setGeneLoci("human", "GRCh38", "7", geneMap);
2668  1 AlignmentUtils.transferGeneLoci(from, map, to);
2669   
2670  1 GeneLociI toLoci = to.getGeneLoci();
2671  1 assertNotNull(toLoci);
2672    // DBRefEntry constructor upper-cases 'source'
2673  1 assertEquals("HUMAN", toLoci.getSpeciesId());
2674  1 assertEquals("GRCh38", toLoci.getAssemblyId());
2675  1 assertEquals("7", toLoci.getChromosomeId());
2676   
2677    /*
2678    * transcript 'exons' are 1-6, 7-16, 17-36
2679    * CDS 1:12 is transcript 10-21
2680    * transcript 'CDS' is 10-16, 17-21
2681    * which is 'gene' 158-164, 210-214
2682    */
2683  1 MapList toMap = toLoci.getMap();
2684  1 assertEquals(1, toMap.getFromRanges().size());
2685  1 assertEquals(2, toMap.getFromRanges().get(0).length);
2686  1 assertEquals(1, toMap.getFromRanges().get(0)[0]);
2687  1 assertEquals(12, toMap.getFromRanges().get(0)[1]);
2688  1 assertEquals(2, toMap.getToRanges().size());
2689  1 assertEquals(2, toMap.getToRanges().get(0).length);
2690  1 assertEquals(158, toMap.getToRanges().get(0)[0]);
2691  1 assertEquals(164, toMap.getToRanges().get(0)[1]);
2692  1 assertEquals(210, toMap.getToRanges().get(1)[0]);
2693  1 assertEquals(214, toMap.getToRanges().get(1)[1]);
2694    // or summarised as (but toString might change in future):
2695  1 assertEquals("[ [1, 12] ] 1:1 to [ [158, 164] [210, 214] ]",
2696    toMap.toString());
2697   
2698    /*
2699    * an existing value is not overridden
2700    */
2701  1 geneMap = new MapList(new int[] { 1, 36 }, new int[] { 36, 1 }, 1, 1);
2702  1 from.setGeneLoci("inhuman", "GRCh37", "6", geneMap);
2703  1 AlignmentUtils.transferGeneLoci(from, map, to);
2704  1 assertEquals("GRCh38", toLoci.getAssemblyId());
2705  1 assertEquals("7", toLoci.getChromosomeId());
2706  1 toMap = toLoci.getMap();
2707  1 assertEquals("[ [1, 12] ] 1:1 to [ [158, 164] [210, 214] ]",
2708    toMap.toString());
2709    }
2710   
2711    /**
2712    * Tests for the method that maps nucleotide to protein based on CDS features
2713    */
 
2714  1 toggle @Test(groups = "Functional")
2715    public void testMapCdsToProtein()
2716    {
2717  1 SequenceI peptide = new Sequence("pep", "KLQ");
2718   
2719    /*
2720    * Case 1: CDS 3 times length of peptide
2721    * NB method only checks lengths match, not translation
2722    */
2723  1 SequenceI dna = new Sequence("dna", "AACGacgtCTCCT");
2724  1 dna.createDatasetSequence();
2725  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
2726  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 13, null));
2727  1 MapList ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
2728  1 assertEquals(3, ml.getFromRatio());
2729  1 assertEquals(1, ml.getToRatio());
2730  1 assertEquals("[[1, 3]]",
2731    Arrays.deepToString(ml.getToRanges().toArray()));
2732  1 assertEquals("[[1, 4], [9, 13]]",
2733    Arrays.deepToString(ml.getFromRanges().toArray()));
2734   
2735    /*
2736    * Case 2: CDS 3 times length of peptide + stop codon
2737    * (note code does not currently check trailing codon is a stop codon)
2738    */
2739  1 dna = new Sequence("dna", "AACGacgtCTCCTCCC");
2740  1 dna.createDatasetSequence();
2741  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
2742  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 16, null));
2743  1 ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
2744  1 assertEquals(3, ml.getFromRatio());
2745  1 assertEquals(1, ml.getToRatio());
2746  1 assertEquals("[[1, 3]]",
2747    Arrays.deepToString(ml.getToRanges().toArray()));
2748  1 assertEquals("[[1, 4], [9, 13]]",
2749    Arrays.deepToString(ml.getFromRanges().toArray()));
2750   
2751    /*
2752    * Case 3: CDS longer than 3 * peptide + stop codon - no mapping is made
2753    */
2754  1 dna = new Sequence("dna", "AACGacgtCTCCTTGATCA");
2755  1 dna.createDatasetSequence();
2756  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
2757  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 19, null));
2758  1 ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
2759  1 assertNull(ml);
2760   
2761    /*
2762    * Case 4: CDS shorter than 3 * peptide - no mapping is made
2763    */
2764  1 dna = new Sequence("dna", "AACGacgtCTCC");
2765  1 dna.createDatasetSequence();
2766  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
2767  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 12, null));
2768  1 ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
2769  1 assertNull(ml);
2770   
2771    /*
2772    * Case 5: CDS 3 times length of peptide + part codon - mapping is truncated
2773    */
2774  1 dna = new Sequence("dna", "AACGacgtCTCCTTG");
2775  1 dna.createDatasetSequence();
2776  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
2777  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 15, null));
2778  1 ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
2779  1 assertEquals(3, ml.getFromRatio());
2780  1 assertEquals(1, ml.getToRatio());
2781  1 assertEquals("[[1, 3]]",
2782    Arrays.deepToString(ml.getToRanges().toArray()));
2783  1 assertEquals("[[1, 4], [9, 13]]",
2784    Arrays.deepToString(ml.getFromRanges().toArray()));
2785   
2786    /*
2787    * Case 6: incomplete start codon corresponding to X in peptide
2788    */
2789  1 dna = new Sequence("dna", "ACGacgtCTCCTTGG");
2790  1 dna.createDatasetSequence();
2791  1 SequenceFeature sf = new SequenceFeature("CDS", "", 1, 3, null);
2792  1 sf.setPhase("2"); // skip 2 positions (AC) to start of next codon (GCT)
2793  1 dna.addSequenceFeature(sf);
2794  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 8, 15, null));
2795  1 peptide = new Sequence("pep", "XLQ");
2796  1 ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
2797  1 assertEquals("[[2, 3]]",
2798    Arrays.deepToString(ml.getToRanges().toArray()));
2799  1 assertEquals("[[3, 3], [8, 12]]",
2800    Arrays.deepToString(ml.getFromRanges().toArray()));
2801    }
2802   
2803    /**
2804    * Tests for the method that locates the CDS sequence that has a mapping to
2805    * the given protein. That is, given a transcript-to-peptide mapping, find the
2806    * cds-to-peptide mapping that relates to both, and return the CDS sequence.
2807    */
 
2808  0 toggle @Test
2809    public void testFindCdsForProtein()
2810    {
2811  0 List<AlignedCodonFrame> mappings = new ArrayList<>();
2812  0 AlignedCodonFrame acf1 = new AlignedCodonFrame();
2813  0 mappings.add(acf1);
2814   
2815  0 SequenceI dna1 = new Sequence("dna1", "cgatATcgGCTATCTATGacg");
2816  0 dna1.createDatasetSequence();
2817   
2818    // NB we currently exclude STOP codon from CDS sequences
2819    // the test would need to change if this changes in future
2820  0 SequenceI cds1 = new Sequence("cds1", "ATGCTATCT");
2821  0 cds1.createDatasetSequence();
2822   
2823  0 SequenceI pep1 = new Sequence("pep1", "MLS");
2824  0 pep1.createDatasetSequence();
2825  0 List<AlignedCodonFrame> seqMappings = new ArrayList<>();
2826  0 MapList mapList = new MapList(
2827    new int[]
2828    { 5, 6, 9, 15 }, new int[] { 1, 3 }, 3, 1);
2829  0 Mapping dnaToPeptide = new Mapping(pep1.getDatasetSequence(), mapList);
2830   
2831    // add dna to peptide mapping
2832  0 seqMappings.add(acf1);
2833  0 acf1.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(),
2834    mapList);
2835   
2836    /*
2837    * first case - no dna-to-CDS mapping exists - search fails
2838    */
2839  0 SequenceI seq = AlignmentUtils.findCdsForProtein(mappings, dna1,
2840    seqMappings, dnaToPeptide);
2841  0 assertNull(seq);
2842   
2843    /*
2844    * second case - CDS-to-peptide mapping exists but no dna-to-CDS
2845    * - search fails
2846    */
2847    // todo this test fails if the mapping is added to acf1, not acf2
2848    // need to tidy up use of lists of mappings in AlignedCodonFrame
2849  0 AlignedCodonFrame acf2 = new AlignedCodonFrame();
2850  0 mappings.add(acf2);
2851  0 MapList cdsToPeptideMapping = new MapList(new int[]
2852    { 1, 9 }, new int[] { 1, 3 }, 3, 1);
2853  0 acf2.addMap(cds1.getDatasetSequence(), pep1.getDatasetSequence(),
2854    cdsToPeptideMapping);
2855  0 assertNull(AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,
2856    dnaToPeptide));
2857   
2858    /*
2859    * third case - add dna-to-CDS mapping - CDS is now found!
2860    */
2861  0 MapList dnaToCdsMapping = new MapList(new int[] { 5, 6, 9, 15 },
2862    new int[]
2863    { 1, 9 }, 1, 1);
2864  0 acf1.addMap(dna1.getDatasetSequence(), cds1.getDatasetSequence(),
2865    dnaToCdsMapping);
2866  0 seq = AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,
2867    dnaToPeptide);
2868  0 assertSame(seq, cds1.getDatasetSequence());
2869    }
2870   
2871    /**
2872    * Tests for the method that locates the CDS sequence that has a mapping to
2873    * the given protein. That is, given a transcript-to-peptide mapping, find the
2874    * cds-to-peptide mapping that relates to both, and return the CDS sequence.
2875    * This test is for the case where transcript and CDS are the same length.
2876    */
 
2877  0 toggle @Test
2878    public void testFindCdsForProtein_noUTR()
2879    {
2880  0 List<AlignedCodonFrame> mappings = new ArrayList<>();
2881  0 AlignedCodonFrame acf1 = new AlignedCodonFrame();
2882  0 mappings.add(acf1);
2883   
2884  0 SequenceI dna1 = new Sequence("dna1", "ATGCTATCTTAA");
2885  0 dna1.createDatasetSequence();
2886   
2887    // NB we currently exclude STOP codon from CDS sequences
2888    // the test would need to change if this changes in future
2889  0 SequenceI cds1 = new Sequence("cds1", "ATGCTATCT");
2890  0 cds1.createDatasetSequence();
2891   
2892  0 SequenceI pep1 = new Sequence("pep1", "MLS");
2893  0 pep1.createDatasetSequence();
2894  0 List<AlignedCodonFrame> seqMappings = new ArrayList<>();
2895  0 MapList mapList = new MapList(
2896    new int[]
2897    { 1, 9 }, new int[] { 1, 3 }, 3, 1);
2898  0 Mapping dnaToPeptide = new Mapping(pep1.getDatasetSequence(), mapList);
2899   
2900    // add dna to peptide mapping
2901  0 seqMappings.add(acf1);
2902  0 acf1.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(),
2903    mapList);
2904   
2905    /*
2906    * first case - transcript lacks CDS features - it appears to be
2907    * the CDS sequence and is returned
2908    */
2909  0 SequenceI seq = AlignmentUtils.findCdsForProtein(mappings, dna1,
2910    seqMappings, dnaToPeptide);
2911  0 assertSame(seq, dna1.getDatasetSequence());
2912   
2913    /*
2914    * second case - transcript has CDS feature - this means it is
2915    * not returned as a match for CDS (CDS sequences don't have CDS features)
2916    */
2917  0 dna1.addSequenceFeature(
2918    new SequenceFeature(SequenceOntologyI.CDS, "cds", 1, 12, null));
2919  0 seq = AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,
2920    dnaToPeptide);
2921  0 assertNull(seq);
2922   
2923    /*
2924    * third case - CDS-to-peptide mapping exists but no dna-to-CDS
2925    * - search fails
2926    */
2927    // todo this test fails if the mapping is added to acf1, not acf2
2928    // need to tidy up use of lists of mappings in AlignedCodonFrame
2929  0 AlignedCodonFrame acf2 = new AlignedCodonFrame();
2930  0 mappings.add(acf2);
2931  0 MapList cdsToPeptideMapping = new MapList(new int[]
2932    { 1, 9 }, new int[] { 1, 3 }, 3, 1);
2933  0 acf2.addMap(cds1.getDatasetSequence(), pep1.getDatasetSequence(),
2934    cdsToPeptideMapping);
2935  0 assertNull(AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,
2936    dnaToPeptide));
2937   
2938    /*
2939    * fourth case - add dna-to-CDS mapping - CDS is now found!
2940    */
2941  0 MapList dnaToCdsMapping = new MapList(new int[] { 1, 9 },
2942    new int[]
2943    { 1, 9 }, 1, 1);
2944  0 acf1.addMap(dna1.getDatasetSequence(), cds1.getDatasetSequence(),
2945    dnaToCdsMapping);
2946  0 seq = AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,
2947    dnaToPeptide);
2948  0 assertSame(seq, cds1.getDatasetSequence());
2949    }
2950    }