Clover icon

Coverage Report

  1. Project Clover database Wed Nov 12 2025 09:00:47 GMT
  2. Package jalview.analysis

File AlignmentUtilsTests.java

 

Code metrics

14
1,287
54
1
2,952
1,919
61
0.05
23.83
54
1.13

Classes

Class Line # Actions
AlignmentUtilsTests 77 1,287 61
0.971217797.1%
 

Contributing tests

This file is covered by 51 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.analysis;
22   
23    import static org.testng.AssertJUnit.assertEquals;
24    import static org.testng.AssertJUnit.assertFalse;
25    import static org.testng.AssertJUnit.assertNotNull;
26    import static org.testng.AssertJUnit.assertNull;
27    import static org.testng.AssertJUnit.assertSame;
28    import static org.testng.AssertJUnit.assertTrue;
29   
30    import java.awt.Color;
31    import java.io.IOException;
32    import java.util.ArrayList;
33    import java.util.Arrays;
34    import java.util.HashMap;
35    import java.util.LinkedHashMap;
36    import java.util.List;
37    import java.util.Map;
38    import java.util.SortedMap;
39    import java.util.TreeMap;
40    import java.util.Vector;
41   
42    import org.testng.Assert;
43    import org.testng.annotations.BeforeClass;
44    import org.testng.annotations.DataProvider;
45    import org.testng.annotations.Test;
46   
47    import jalview.datamodel.AlignedCodonFrame;
48    import jalview.datamodel.Alignment;
49    import jalview.datamodel.AlignmentAnnotation;
50    import jalview.datamodel.AlignmentI;
51    import jalview.datamodel.Annotation;
52    import jalview.datamodel.ContactListI;
53    import jalview.datamodel.ContactMatrixI;
54    import jalview.datamodel.DBRefEntry;
55    import jalview.datamodel.GeneLociI;
56    import jalview.datamodel.Mapping;
57    import jalview.datamodel.PDBEntry;
58    import jalview.datamodel.SearchResultMatchI;
59    import jalview.datamodel.SearchResultsI;
60    import jalview.datamodel.SeqDistanceContactMatrix;
61    import jalview.datamodel.Sequence;
62    import jalview.datamodel.SequenceFeature;
63    import jalview.datamodel.SequenceGroup;
64    import jalview.datamodel.SequenceI;
65    import jalview.gui.JvOptionPane;
66    import jalview.io.AppletFormatAdapter;
67    import jalview.io.DataSourceType;
68    import jalview.io.FileFormat;
69    import jalview.io.FileFormatI;
70    import jalview.io.FormatAdapter;
71    import jalview.io.gff.SequenceOntologyI;
72    import jalview.util.Comparison;
73    import jalview.util.Constants;
74    import jalview.util.MapList;
75    import jalview.util.MappingUtils;
76   
 
77    public class AlignmentUtilsTests
78    {
79    private static Sequence ts = new Sequence("short",
80    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklm");
81   
 
82  1 toggle @BeforeClass(alwaysRun = true)
83    public void setUpJvOptionPane()
84    {
85  1 JvOptionPane.setInteractiveMode(false);
86  1 JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
87   
88  1 AlignmentAnnotation ann1 = new AlignmentAnnotation(
89    "Secondary Structure", "Secondary Structure",
90    new Annotation[] {});
91  1 AlignmentAnnotation ann2 = new AlignmentAnnotation("jnetpred",
92    "jnetpred", new Annotation[] {});
93  1 AlignmentAnnotation ann3 = new AlignmentAnnotation("Temp", "Temp",
94    new Annotation[] {});
95  1 AlignmentAnnotation ann4 = new AlignmentAnnotation("Temp", "Temp",
96    new Annotation[] {});
97   
98  1 AlignmentAnnotation[] anns1 = new AlignmentAnnotation[] { ann1, ann3,
99    ann4 };
100   
101  1 AlignmentAnnotation[] anns2 = new AlignmentAnnotation[] { ann2, ann3,
102    ann4 };
103   
104  1 AlignmentAnnotation[] anns3 = new AlignmentAnnotation[] { ann3, ann4 };
105   
106  1 AlignmentAnnotation[] anns4 = new AlignmentAnnotation[0];
107   
108  1 AlignmentAnnotation[] anns5 = new AlignmentAnnotation[] { ann1, ann2,
109    ann3, ann4 };
110    }
111   
 
112  1 toggle @Test(groups = { "Functional" })
113    public void testExpandContext()
114    {
115  1 AlignmentI al = new Alignment(new Sequence[] {});
116  6 for (int i = 4; i < 14; i += 2)
117    {
118  5 SequenceI s1 = ts.deriveSequence().getSubSequence(i, i + 7);
119  5 al.addSequence(s1);
120    }
121  1 System.out.println(new AppletFormatAdapter()
122    .formatSequences(FileFormat.Clustal, al, true));
123  27 for (int flnk = -1; flnk < 25; flnk++)
124    {
125  26 AlignmentI exp = AlignmentUtils.expandContext(al, flnk);
126  26 System.out.println("\nFlank size: " + flnk);
127  26 System.out.println(new AppletFormatAdapter()
128    .formatSequences(FileFormat.Clustal, exp, true));
129  26 if (flnk == -1)
130    {
131    /*
132    * Full expansion to complete sequences
133    */
134  1 for (SequenceI sq : exp.getSequences())
135    {
136  5 String ung = sq.getSequenceAsString().replaceAll("-+", "");
137  5 final String errorMsg = "Flanking sequence not the same as original dataset sequence.\n"
138    + ung + "\n"
139    + sq.getDatasetSequence().getSequenceAsString();
140  5 assertTrue(errorMsg, ung.equalsIgnoreCase(
141    sq.getDatasetSequence().getSequenceAsString()));
142    }
143    }
144  25 else if (flnk == 24)
145    {
146    /*
147    * Last sequence is fully expanded, others have leading gaps to match
148    */
149  1 assertTrue(exp.getSequenceAt(4).getSequenceAsString()
150    .startsWith("abc"));
151  1 assertTrue(exp.getSequenceAt(3).getSequenceAsString()
152    .startsWith("--abc"));
153  1 assertTrue(exp.getSequenceAt(2).getSequenceAsString()
154    .startsWith("----abc"));
155  1 assertTrue(exp.getSequenceAt(1).getSequenceAsString()
156    .startsWith("------abc"));
157  1 assertTrue(exp.getSequenceAt(0).getSequenceAsString()
158    .startsWith("--------abc"));
159    }
160    }
161    }
162   
163    /**
164    * Test that annotations are correctly adjusted by expandContext
165    */
 
166  1 toggle @Test(groups = { "Functional" })
167    public void testExpandContext_annotation()
168    {
169  1 AlignmentI al = new Alignment(new Sequence[] {});
170  1 SequenceI ds = new Sequence("Seq1", "ABCDEFGHI");
171    // subsequence DEF:
172  1 SequenceI seq1 = ds.deriveSequence().getSubSequence(3, 6);
173  1 al.addSequence(seq1);
174   
175    /*
176    * Annotate DEF with 4/5/6 respectively
177    */
178  1 Annotation[] anns = new Annotation[] { new Annotation(4),
179    new Annotation(5), new Annotation(6) };
180  1 AlignmentAnnotation ann = new AlignmentAnnotation("SS",
181    "secondary structure", anns);
182  1 seq1.addAlignmentAnnotation(ann);
183   
184    /*
185    * The annotations array should match aligned positions
186    */
187  1 assertEquals(3, ann.annotations.length);
188  1 assertEquals(4, ann.annotations[0].value, 0.001);
189  1 assertEquals(5, ann.annotations[1].value, 0.001);
190  1 assertEquals(6, ann.annotations[2].value, 0.001);
191   
192    /*
193    * Check annotation to sequence position mappings before expanding the
194    * sequence; these are set up in Sequence.addAlignmentAnnotation ->
195    * Annotation.setSequenceRef -> createSequenceMappings
196    */
197  1 assertNull(ann.getAnnotationForPosition(1));
198  1 assertNull(ann.getAnnotationForPosition(2));
199  1 assertNull(ann.getAnnotationForPosition(3));
200  1 assertEquals(4, ann.getAnnotationForPosition(4).value, 0.001);
201  1 assertEquals(5, ann.getAnnotationForPosition(5).value, 0.001);
202  1 assertEquals(6, ann.getAnnotationForPosition(6).value, 0.001);
203  1 assertNull(ann.getAnnotationForPosition(7));
204  1 assertNull(ann.getAnnotationForPosition(8));
205  1 assertNull(ann.getAnnotationForPosition(9));
206   
207    /*
208    * Expand the subsequence to the full sequence abcDEFghi
209    */
210  1 AlignmentI expanded = AlignmentUtils.expandContext(al, -1);
211  1 assertEquals("abcDEFghi",
212    expanded.getSequenceAt(0).getSequenceAsString());
213   
214    /*
215    * Confirm the alignment and sequence have the same SS annotation,
216    * referencing the expanded sequence
217    */
218  1 ann = expanded.getSequenceAt(0).getAnnotation()[0];
219  1 assertSame(ann, expanded.getAlignmentAnnotation()[0]);
220  1 assertSame(expanded.getSequenceAt(0), ann.sequenceRef);
221   
222    /*
223    * The annotations array should have null values except for annotated
224    * positions
225    */
226  1 assertNull(ann.annotations[0]);
227  1 assertNull(ann.annotations[1]);
228  1 assertNull(ann.annotations[2]);
229  1 assertEquals(4, ann.annotations[3].value, 0.001);
230  1 assertEquals(5, ann.annotations[4].value, 0.001);
231  1 assertEquals(6, ann.annotations[5].value, 0.001);
232  1 assertNull(ann.annotations[6]);
233  1 assertNull(ann.annotations[7]);
234  1 assertNull(ann.annotations[8]);
235   
236    /*
237    * sequence position mappings should be unchanged
238    */
239  1 assertNull(ann.getAnnotationForPosition(1));
240  1 assertNull(ann.getAnnotationForPosition(2));
241  1 assertNull(ann.getAnnotationForPosition(3));
242  1 assertEquals(4, ann.getAnnotationForPosition(4).value, 0.001);
243  1 assertEquals(5, ann.getAnnotationForPosition(5).value, 0.001);
244  1 assertEquals(6, ann.getAnnotationForPosition(6).value, 0.001);
245  1 assertNull(ann.getAnnotationForPosition(7));
246  1 assertNull(ann.getAnnotationForPosition(8));
247  1 assertNull(ann.getAnnotationForPosition(9));
248    }
249   
250    /**
251    * Test method that returns a map of lists of sequences by sequence name.
252    *
253    * @throws IOException
254    */
 
255  1 toggle @Test(groups = { "Functional" })
256    public void testGetSequencesByName() throws IOException
257    {
258  1 final String data = ">Seq1Name\nKQYL\n" + ">Seq2Name\nRFPW\n"
259    + ">Seq1Name\nABCD\n";
260  1 AlignmentI al = loadAlignment(data, FileFormat.Fasta);
261  1 Map<String, List<SequenceI>> map = AlignmentUtils
262    .getSequencesByName(al);
263  1 assertEquals(2, map.keySet().size());
264  1 assertEquals(2, map.get("Seq1Name").size());
265  1 assertEquals("KQYL", map.get("Seq1Name").get(0).getSequenceAsString());
266  1 assertEquals("ABCD", map.get("Seq1Name").get(1).getSequenceAsString());
267  1 assertEquals(1, map.get("Seq2Name").size());
268  1 assertEquals("RFPW", map.get("Seq2Name").get(0).getSequenceAsString());
269    }
270   
271    /**
272    * Helper method to load an alignment and ensure dataset sequences are set up.
273    *
274    * @param data
275    * @param format
276    * TODO
277    * @return
278    * @throws IOException
279    */
 
280  1 toggle protected AlignmentI loadAlignment(final String data, FileFormatI format)
281    throws IOException
282    {
283  1 AlignmentI a = new FormatAdapter().readFile(data, DataSourceType.PASTE,
284    format);
285  1 a.setDataset(null);
286  1 return a;
287    }
288   
289    /**
290    * Test mapping of protein to cDNA, for the case where we have no sequence
291    * cross-references, so mappings are made first-served 1-1 where sequences
292    * translate.
293    *
294    * @throws IOException
295    */
 
296  1 toggle @Test(groups = { "Functional" })
297    public void testMapProteinAlignmentToCdna_noXrefs() throws IOException
298    {
299  1 List<SequenceI> protseqs = new ArrayList<>();
300  1 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
301  1 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
302  1 protseqs.add(new Sequence("UNIPROT|V12347", "SAR"));
303  1 AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3]));
304  1 protein.setDataset(null);
305   
306  1 List<SequenceI> dnaseqs = new ArrayList<>();
307  1 dnaseqs.add(new Sequence("EMBL|A11111", "TCAGCACGC")); // = SAR
308  1 dnaseqs.add(new Sequence("EMBL|A22222", "GAGATACAA")); // = EIQ
309  1 dnaseqs.add(new Sequence("EMBL|A33333", "GAAATCCAG")); // = EIQ
310  1 dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG")); // = EIQ
311  1 AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[4]));
312  1 cdna.setDataset(null);
313   
314  1 assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));
315   
316    // 3 mappings made, each from 1 to 1 sequence
317  1 assertEquals(3, protein.getCodonFrames().size());
318  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
319  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
320  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size());
321   
322    // V12345 mapped to A22222
323  1 AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0))
324    .get(0);
325  1 assertEquals(1, acf.getdnaSeqs().length);
326  1 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
327    acf.getdnaSeqs()[0]);
328  1 Mapping[] protMappings = acf.getProtMappings();
329  1 assertEquals(1, protMappings.length);
330  1 MapList mapList = protMappings[0].getMap();
331  1 assertEquals(3, mapList.getFromRatio());
332  1 assertEquals(1, mapList.getToRatio());
333  1 assertTrue(
334    Arrays.equals(new int[]
335    { 1, 9 }, mapList.getFromRanges().get(0)));
336  1 assertEquals(1, mapList.getFromRanges().size());
337  1 assertTrue(
338    Arrays.equals(new int[]
339    { 1, 3 }, mapList.getToRanges().get(0)));
340  1 assertEquals(1, mapList.getToRanges().size());
341   
342    // V12346 mapped to A33333
343  1 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
344  1 assertEquals(1, acf.getdnaSeqs().length);
345  1 assertEquals(cdna.getSequenceAt(2).getDatasetSequence(),
346    acf.getdnaSeqs()[0]);
347   
348    // V12347 mapped to A11111
349  1 acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0);
350  1 assertEquals(1, acf.getdnaSeqs().length);
351  1 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
352    acf.getdnaSeqs()[0]);
353   
354    // no mapping involving the 'extra' A44444
355  1 assertTrue(protein.getCodonFrame(cdna.getSequenceAt(3)).isEmpty());
356    }
357   
358    /**
359    * Test for the alignSequenceAs method that takes two sequences and a mapping.
360    */
 
361  1 toggle @Test(groups = { "Functional" })
362    public void testAlignSequenceAs_withMapping_noIntrons()
363    {
364  1 MapList map = new MapList(new int[] { 1, 6 }, new int[] { 1, 2 }, 3, 1);
365   
366    /*
367    * No existing gaps in dna:
368    */
369  1 checkAlignSequenceAs("GGGAAA", "-A-L-", false, false, map,
370    "---GGG---AAA");
371   
372    /*
373    * Now introduce gaps in dna but ignore them when realigning.
374    */
375  1 checkAlignSequenceAs("-G-G-G-A-A-A-", "-A-L-", false, false, map,
376    "---GGG---AAA");
377   
378    /*
379    * Now include gaps in dna when realigning. First retaining 'mapped' gaps
380    * only, i.e. those within the exon region.
381    */
382  1 checkAlignSequenceAs("-G-G--G-A--A-A-", "-A-L-", true, false, map,
383    "---G-G--G---A--A-A");
384   
385    /*
386    * Include all gaps in dna when realigning (within and without the exon
387    * region). The leading gap, and the gaps between codons, are subsumed by
388    * the protein alignment gap.
389    */
390  1 checkAlignSequenceAs("-G-GG--AA-A---", "-A-L-", true, true, map,
391    "---G-GG---AA-A---");
392   
393    /*
394    * Include only unmapped gaps in dna when realigning (outside the exon
395    * region). The leading gap, and the gaps between codons, are subsumed by
396    * the protein alignment gap.
397    */
398  1 checkAlignSequenceAs("-G-GG--AA-A-", "-A-L-", false, true, map,
399    "---GGG---AAA---");
400    }
401   
402    /**
403    * Test for the alignSequenceAs method that takes two sequences and a mapping.
404    */
 
405  1 toggle @Test(groups = { "Functional" })
406    public void testAlignSequenceAs_withMapping_withIntrons()
407    {
408    /*
409    * Exons at codon 2 (AAA) and 4 (TTT)
410    */
411  1 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
412    new int[]
413    { 1, 2 }, 3, 1);
414   
415    /*
416    * Simple case: no gaps in dna
417    */
418  1 checkAlignSequenceAs("GGGAAACCCTTTGGG", "--A-L-", false, false, map,
419    "GGG---AAACCCTTTGGG");
420   
421    /*
422    * Add gaps to dna - but ignore when realigning.
423    */
424  1 checkAlignSequenceAs("-G-G-G--A--A---AC-CC-T-TT-GG-G-", "--A-L-", false,
425    false, map, "GGG---AAACCCTTTGGG");
426   
427    /*
428    * Add gaps to dna - include within exons only when realigning.
429    */
430  1 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-", true,
431    false, map, "GGG---A--A---ACCCT-TTGGG");
432   
433    /*
434    * Include gaps outside exons only when realigning.
435    */
436  1 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-",
437    false, true, map, "-G-G-GAAAC-CCTTT-GG-G-");
438   
439    /*
440    * Include gaps following first intron if we are 'preserving mapped gaps'
441    */
442  1 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-", true,
443    true, map, "-G-G-G--A--A---A-C-CC-T-TT-GG-G-");
444   
445    /*
446    * Include all gaps in dna when realigning.
447    */
448  1 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-", true,
449    true, map, "-G-G-G--A--A---A-C-CC-T-TT-GG-G-");
450    }
451   
452    /**
453    * Test for the case where not all of the protein sequence is mapped to cDNA.
454    */
 
455  1 toggle @Test(groups = { "Functional" })
456    public void testAlignSequenceAs_withMapping_withUnmappedProtein()
457    {
458    /*
459    * Exons at codon 2 (AAA) and 4 (TTT) mapped to A and P
460    */
461  1 final MapList map = new MapList(new int[] { 4, 6, 10, 12 },
462    new int[]
463    { 1, 1, 3, 3 }, 3, 1);
464   
465    /*
466    * -L- 'aligns' ccc------
467    */
468  1 checkAlignSequenceAs("gggAAAcccTTTggg", "-A-L-P-", false, false, map,
469    "gggAAAccc------TTTggg");
470    }
471   
472    /**
473    * Helper method that performs and verifies the method under test.
474    *
475    * @param alignee
476    * the sequence to be realigned
477    * @param alignModel
478    * the sequence whose alignment is to be copied
479    * @param preserveMappedGaps
480    * @param preserveUnmappedGaps
481    * @param map
482    * @param expected
483    */
 
484  14 toggle protected void checkAlignSequenceAs(final String alignee,
485    final String alignModel, final boolean preserveMappedGaps,
486    final boolean preserveUnmappedGaps, MapList map,
487    final String expected)
488    {
489  14 SequenceI alignMe = new Sequence("Seq1", alignee);
490  14 alignMe.createDatasetSequence();
491  14 SequenceI alignFrom = new Sequence("Seq2", alignModel);
492  14 alignFrom.createDatasetSequence();
493  14 AlignedCodonFrame acf = new AlignedCodonFrame();
494  14 acf.addMap(alignMe.getDatasetSequence(), alignFrom.getDatasetSequence(),
495    map);
496   
497  14 AlignmentUtils.alignSequenceAs(alignMe, alignFrom, acf, "---", '-',
498    preserveMappedGaps, preserveUnmappedGaps);
499  14 assertEquals(expected, alignMe.getSequenceAsString());
500    }
501   
502    /**
503    * Test for the alignSequenceAs method where we preserve gaps in introns only.
504    */
 
505  1 toggle @Test(groups = { "Functional" })
506    public void testAlignSequenceAs_keepIntronGapsOnly()
507    {
508   
509    /*
510    * Intron GGGAAA followed by exon CCCTTT
511    */
512  1 MapList map = new MapList(new int[] { 7, 12 }, new int[] { 1, 2 }, 3,
513    1);
514   
515  1 checkAlignSequenceAs("GG-G-AA-A-C-CC-T-TT", "AL", false, true, map,
516    "GG-G-AA-ACCCTTT");
517    }
518   
519    /**
520    * Test the method that realigns protein to match mapped codon alignment.
521    */
 
522  1 toggle @Test(groups = { "Functional" })
523    public void testAlignProteinAsDna()
524    {
525    // seq1 codons are [1,2,3] [4,5,6] [7,8,9] [10,11,12]
526  1 SequenceI dna1 = new Sequence("Seq1", "TGCCATTACCAG-");
527    // seq2 codons are [1,3,4] [5,6,7] [8,9,10] [11,12,13]
528  1 SequenceI dna2 = new Sequence("Seq2", "T-GCCATTACCAG");
529    // seq3 codons are [1,2,3] [4,5,7] [8,9,10] [11,12,13]
530  1 SequenceI dna3 = new Sequence("Seq3", "TGCCA-TTACCAG");
531  1 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 });
532  1 dna.setDataset(null);
533   
534    // protein alignment will be realigned like dna
535  1 SequenceI prot1 = new Sequence("Seq1", "CHYQ");
536  1 SequenceI prot2 = new Sequence("Seq2", "CHYQ");
537  1 SequenceI prot3 = new Sequence("Seq3", "CHYQ");
538  1 SequenceI prot4 = new Sequence("Seq4", "R-QSV"); // unmapped, unchanged
539  1 AlignmentI protein = new Alignment(
540    new SequenceI[]
541    { prot1, prot2, prot3, prot4 });
542  1 protein.setDataset(null);
543   
544  1 MapList map = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 }, 3,
545    1);
546  1 AlignedCodonFrame acf = new AlignedCodonFrame();
547  1 acf.addMap(dna1.getDatasetSequence(), prot1.getDatasetSequence(), map);
548  1 acf.addMap(dna2.getDatasetSequence(), prot2.getDatasetSequence(), map);
549  1 acf.addMap(dna3.getDatasetSequence(), prot3.getDatasetSequence(), map);
550  1 ArrayList<AlignedCodonFrame> acfs = new ArrayList<>();
551  1 acfs.add(acf);
552  1 protein.setCodonFrames(acfs);
553   
554    /*
555    * Translated codon order is [1,2,3] [1,3,4] [4,5,6] [4,5,7] [5,6,7] [7,8,9]
556    * [8,9,10] [10,11,12] [11,12,13]
557    */
558  1 AlignmentUtils.alignProteinAsDna(protein, dna);
559  1 assertEquals("C-H--Y-Q-", prot1.getSequenceAsString());
560  1 assertEquals("-C--H-Y-Q", prot2.getSequenceAsString());
561  1 assertEquals("C--H--Y-Q", prot3.getSequenceAsString());
562  1 assertEquals("R-QSV", prot4.getSequenceAsString());
563    }
564   
565    /**
566    * Test the method that tests whether a CDNA sequence translates to a protein
567    * sequence
568    */
 
569  1 toggle @Test(groups = { "Functional" })
570    public void testTranslatesAs()
571    {
572    // null arguments check
573  1 assertFalse(AlignmentUtils.translatesAs(null, 0, null));
574  1 assertFalse(AlignmentUtils.translatesAs(new char[] { 't' }, 0, null));
575  1 assertFalse(AlignmentUtils.translatesAs(null, 0, new char[] { 'a' }));
576   
577    // straight translation
578  1 assertTrue(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), 0,
579    "FPKG".toCharArray()));
580    // with extra start codon (not in protein)
581  1 assertTrue(AlignmentUtils.translatesAs("atgtttcccaaaggg".toCharArray(),
582    3, "FPKG".toCharArray()));
583    // with stop codon1 (not in protein)
584  1 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtaa".toCharArray(),
585    0, "FPKG".toCharArray()));
586    // with stop codon1 (in protein as *)
587  1 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtaa".toCharArray(),
588    0, "FPKG*".toCharArray()));
589    // with stop codon2 (not in protein)
590  1 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtag".toCharArray(),
591    0, "FPKG".toCharArray()));
592    // with stop codon3 (not in protein)
593  1 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtga".toCharArray(),
594    0, "FPKG".toCharArray()));
595    // with start and stop codon1
596  1 assertTrue(AlignmentUtils.translatesAs(
597    "atgtttcccaaagggtaa".toCharArray(), 3, "FPKG".toCharArray()));
598    // with start and stop codon1 (in protein as *)
599  1 assertTrue(AlignmentUtils.translatesAs(
600    "atgtttcccaaagggtaa".toCharArray(), 3, "FPKG*".toCharArray()));
601    // with start and stop codon2
602  1 assertTrue(AlignmentUtils.translatesAs(
603    "atgtttcccaaagggtag".toCharArray(), 3, "FPKG".toCharArray()));
604    // with start and stop codon3
605  1 assertTrue(AlignmentUtils.translatesAs(
606    "atgtttcccaaagggtga".toCharArray(), 3, "FPKG".toCharArray()));
607   
608    // with embedded stop codons
609  1 assertTrue(AlignmentUtils.translatesAs(
610    "atgtttTAGcccaaaTAAgggtga".toCharArray(), 3,
611    "F*PK*G".toCharArray()));
612   
613    // wrong protein
614  1 assertFalse(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), 0,
615    "FPMG".toCharArray()));
616   
617    // truncated dna
618  1 assertFalse(AlignmentUtils.translatesAs("tttcccaaagg".toCharArray(), 0,
619    "FPKG".toCharArray()));
620   
621    // truncated protein
622  1 assertFalse(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), 0,
623    "FPK".toCharArray()));
624   
625    // overlong dna (doesn't end in stop codon)
626  1 assertFalse(AlignmentUtils.translatesAs("tttcccaaagggttt".toCharArray(),
627    0, "FPKG".toCharArray()));
628   
629    // dna + stop codon + more
630  1 assertFalse(AlignmentUtils.translatesAs(
631    "tttcccaaagggttaga".toCharArray(), 0, "FPKG".toCharArray()));
632   
633    // overlong protein
634  1 assertFalse(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), 0,
635    "FPKGQ".toCharArray()));
636    }
637   
638    /**
639    * Test mapping of protein to cDNA, for cases where the cDNA has start and/or
640    * stop codons in addition to the protein coding sequence.
641    *
642    * @throws IOException
643    */
 
644  1 toggle @Test(groups = { "Functional" })
645    public void testMapProteinAlignmentToCdna_withStartAndStopCodons()
646    throws IOException
647    {
648  1 List<SequenceI> protseqs = new ArrayList<>();
649  1 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
650  1 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
651  1 protseqs.add(new Sequence("UNIPROT|V12347", "SAR"));
652  1 AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3]));
653  1 protein.setDataset(null);
654   
655  1 List<SequenceI> dnaseqs = new ArrayList<>();
656    // start + SAR:
657  1 dnaseqs.add(new Sequence("EMBL|A11111", "ATGTCAGCACGC"));
658    // = EIQ + stop
659  1 dnaseqs.add(new Sequence("EMBL|A22222", "GAGATACAATAA"));
660    // = start +EIQ + stop
661  1 dnaseqs.add(new Sequence("EMBL|A33333", "ATGGAAATCCAGTAG"));
662  1 dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG"));
663  1 AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[4]));
664  1 cdna.setDataset(null);
665   
666  1 assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));
667   
668    // 3 mappings made, each from 1 to 1 sequence
669  1 assertEquals(3, protein.getCodonFrames().size());
670  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
671  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
672  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size());
673   
674    // V12345 mapped from A22222
675  1 AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0))
676    .get(0);
677  1 assertEquals(1, acf.getdnaSeqs().length);
678  1 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
679    acf.getdnaSeqs()[0]);
680  1 Mapping[] protMappings = acf.getProtMappings();
681  1 assertEquals(1, protMappings.length);
682  1 MapList mapList = protMappings[0].getMap();
683  1 assertEquals(3, mapList.getFromRatio());
684  1 assertEquals(1, mapList.getToRatio());
685  1 assertTrue(
686    Arrays.equals(new int[]
687    { 1, 9 }, mapList.getFromRanges().get(0)));
688  1 assertEquals(1, mapList.getFromRanges().size());
689  1 assertTrue(
690    Arrays.equals(new int[]
691    { 1, 3 }, mapList.getToRanges().get(0)));
692  1 assertEquals(1, mapList.getToRanges().size());
693   
694    // V12346 mapped from A33333 starting position 4
695  1 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
696  1 assertEquals(1, acf.getdnaSeqs().length);
697  1 assertEquals(cdna.getSequenceAt(2).getDatasetSequence(),
698    acf.getdnaSeqs()[0]);
699  1 protMappings = acf.getProtMappings();
700  1 assertEquals(1, protMappings.length);
701  1 mapList = protMappings[0].getMap();
702  1 assertEquals(3, mapList.getFromRatio());
703  1 assertEquals(1, mapList.getToRatio());
704  1 assertTrue(
705    Arrays.equals(new int[]
706    { 4, 12 }, mapList.getFromRanges().get(0)));
707  1 assertEquals(1, mapList.getFromRanges().size());
708  1 assertTrue(
709    Arrays.equals(new int[]
710    { 1, 3 }, mapList.getToRanges().get(0)));
711  1 assertEquals(1, mapList.getToRanges().size());
712   
713    // V12347 mapped to A11111 starting position 4
714  1 acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0);
715  1 assertEquals(1, acf.getdnaSeqs().length);
716  1 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
717    acf.getdnaSeqs()[0]);
718  1 protMappings = acf.getProtMappings();
719  1 assertEquals(1, protMappings.length);
720  1 mapList = protMappings[0].getMap();
721  1 assertEquals(3, mapList.getFromRatio());
722  1 assertEquals(1, mapList.getToRatio());
723  1 assertTrue(
724    Arrays.equals(new int[]
725    { 4, 12 }, mapList.getFromRanges().get(0)));
726  1 assertEquals(1, mapList.getFromRanges().size());
727  1 assertTrue(
728    Arrays.equals(new int[]
729    { 1, 3 }, mapList.getToRanges().get(0)));
730  1 assertEquals(1, mapList.getToRanges().size());
731   
732    // no mapping involving the 'extra' A44444
733  1 assertTrue(protein.getCodonFrame(cdna.getSequenceAt(3)).isEmpty());
734    }
735   
736    /**
737    * Test mapping of protein to cDNA, for the case where we have some sequence
738    * cross-references. Verify that 1-to-many mappings are made where
739    * cross-references exist and sequences are mappable.
740    *
741    * @throws IOException
742    */
 
743  1 toggle @Test(groups = { "Functional" })
744    public void testMapProteinAlignmentToCdna_withXrefs() throws IOException
745    {
746  1 List<SequenceI> protseqs = new ArrayList<>();
747  1 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
748  1 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
749  1 protseqs.add(new Sequence("UNIPROT|V12347", "SAR"));
750  1 AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3]));
751  1 protein.setDataset(null);
752   
753  1 List<SequenceI> dnaseqs = new ArrayList<>();
754  1 dnaseqs.add(new Sequence("EMBL|A11111", "TCAGCACGC")); // = SAR
755  1 dnaseqs.add(new Sequence("EMBL|A22222", "ATGGAGATACAA")); // = start + EIQ
756  1 dnaseqs.add(new Sequence("EMBL|A33333", "GAAATCCAG")); // = EIQ
757  1 dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG")); // = EIQ
758  1 dnaseqs.add(new Sequence("EMBL|A55555", "GAGATTCAG")); // = EIQ
759  1 AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[5]));
760  1 cdna.setDataset(null);
761   
762    // Xref A22222 to V12345 (should get mapped)
763  1 dnaseqs.get(1).addDBRef(new DBRefEntry("UNIPROT", "1", "V12345"));
764    // Xref V12345 to A44444 (should get mapped)
765  1 protseqs.get(0).addDBRef(new DBRefEntry("EMBL", "1", "A44444"));
766    // Xref A33333 to V12347 (sequence mismatch - should not get mapped)
767  1 dnaseqs.get(2).addDBRef(new DBRefEntry("UNIPROT", "1", "V12347"));
768    // as V12345 is mapped to A22222 and A44444, this leaves V12346 unmapped.
769    // it should get paired up with the unmapped A33333
770    // A11111 should be mapped to V12347
771    // A55555 is spare and has no xref so is not mapped
772   
773  1 assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));
774   
775    // 4 protein mappings made for 3 proteins, 2 to V12345, 1 each to V12346/7
776  1 assertEquals(3, protein.getCodonFrames().size());
777  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
778  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
779  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size());
780   
781    // one mapping for each of the first 4 cDNA sequences
782  1 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(0)).size());
783  1 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(1)).size());
784  1 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(2)).size());
785  1 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(3)).size());
786   
787    // V12345 mapped to A22222 and A44444
788  1 AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0))
789    .get(0);
790  1 assertEquals(2, acf.getdnaSeqs().length);
791  1 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
792    acf.getdnaSeqs()[0]);
793  1 assertEquals(cdna.getSequenceAt(3).getDatasetSequence(),
794    acf.getdnaSeqs()[1]);
795   
796    // V12346 mapped to A33333
797  1 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
798  1 assertEquals(1, acf.getdnaSeqs().length);
799  1 assertEquals(cdna.getSequenceAt(2).getDatasetSequence(),
800    acf.getdnaSeqs()[0]);
801   
802    // V12347 mapped to A11111
803  1 acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0);
804  1 assertEquals(1, acf.getdnaSeqs().length);
805  1 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
806    acf.getdnaSeqs()[0]);
807   
808    // no mapping involving the 'extra' A55555
809  1 assertTrue(protein.getCodonFrame(cdna.getSequenceAt(4)).isEmpty());
810    }
811   
812    /**
813    * Test mapping of protein to cDNA, for the case where we have some sequence
814    * cross-references. Verify that once we have made an xref mapping we don't
815    * also map un-xrefd sequeces.
816    *
817    * @throws IOException
818    */
 
819  1 toggle @Test(groups = { "Functional" })
820    public void testMapProteinAlignmentToCdna_prioritiseXrefs()
821    throws IOException
822    {
823  1 List<SequenceI> protseqs = new ArrayList<>();
824  1 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
825  1 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
826  1 AlignmentI protein = new Alignment(
827    protseqs.toArray(new SequenceI[protseqs.size()]));
828  1 protein.setDataset(null);
829   
830  1 List<SequenceI> dnaseqs = new ArrayList<>();
831  1 dnaseqs.add(new Sequence("EMBL|A11111", "GAAATCCAG")); // = EIQ
832  1 dnaseqs.add(new Sequence("EMBL|A22222", "GAAATTCAG")); // = EIQ
833  1 AlignmentI cdna = new Alignment(
834    dnaseqs.toArray(new SequenceI[dnaseqs.size()]));
835  1 cdna.setDataset(null);
836   
837    // Xref A22222 to V12345 (should get mapped)
838    // A11111 should then be mapped to the unmapped V12346
839  1 dnaseqs.get(1).addDBRef(new DBRefEntry("UNIPROT", "1", "V12345"));
840   
841  1 assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));
842   
843    // 2 protein mappings made
844  1 assertEquals(2, protein.getCodonFrames().size());
845  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
846  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
847   
848    // one mapping for each of the cDNA sequences
849  1 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(0)).size());
850  1 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(1)).size());
851   
852    // V12345 mapped to A22222
853  1 AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0))
854    .get(0);
855  1 assertEquals(1, acf.getdnaSeqs().length);
856  1 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
857    acf.getdnaSeqs()[0]);
858   
859    // V12346 mapped to A11111
860  1 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
861  1 assertEquals(1, acf.getdnaSeqs().length);
862  1 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
863    acf.getdnaSeqs()[0]);
864    }
865   
866    /**
867    * Test the method that shows or hides sequence annotations by type(s) and
868    * selection group.
869    */
 
870  1 toggle @Test(groups = { "Functional" })
871    public void testShowOrHideSequenceAnnotations()
872    {
873  1 SequenceI seq1 = new Sequence("Seq1", "AAA");
874  1 SequenceI seq2 = new Sequence("Seq2", "BBB");
875  1 SequenceI seq3 = new Sequence("Seq3", "CCC");
876  1 Annotation[] anns = new Annotation[] { new Annotation(2f) };
877  1 AlignmentAnnotation ann1 = new AlignmentAnnotation("Structure", "ann1",
878    anns);
879  1 ann1.setSequenceRef(seq1);
880  1 AlignmentAnnotation ann2 = new AlignmentAnnotation("Structure", "ann2",
881    anns);
882  1 ann2.setSequenceRef(seq2);
883  1 AlignmentAnnotation ann3 = new AlignmentAnnotation("Structure", "ann3",
884    anns);
885  1 AlignmentAnnotation ann4 = new AlignmentAnnotation("Temp", "ann4",
886    anns);
887  1 ann4.setSequenceRef(seq1);
888  1 AlignmentAnnotation ann5 = new AlignmentAnnotation("Temp", "ann5",
889    anns);
890  1 ann5.setSequenceRef(seq2);
891  1 AlignmentAnnotation ann6 = new AlignmentAnnotation("Temp", "ann6",
892    anns);
893  1 AlignmentI al = new Alignment(new SequenceI[] { seq1, seq2, seq3 });
894  1 al.addAnnotation(ann1); // Structure for Seq1
895  1 al.addAnnotation(ann2); // Structure for Seq2
896  1 al.addAnnotation(ann3); // Structure for no sequence
897  1 al.addAnnotation(ann4); // Temp for seq1
898  1 al.addAnnotation(ann5); // Temp for seq2
899  1 al.addAnnotation(ann6); // Temp for no sequence
900  1 List<String> types = new ArrayList<>();
901  1 List<SequenceI> scope = new ArrayList<>();
902   
903    /*
904    * Set all sequence related Structure to hidden (ann1, ann2)
905    */
906  1 types.add("Structure");
907  1 AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, false,
908    false);
909  1 assertFalse(ann1.visible);
910  1 assertFalse(ann2.visible);
911  1 assertTrue(ann3.visible); // not sequence-related, not affected
912  1 assertTrue(ann4.visible); // not Structure, not affected
913  1 assertTrue(ann5.visible); // "
914  1 assertTrue(ann6.visible); // not sequence-related, not affected
915   
916    /*
917    * Set Temp in {seq1, seq3} to hidden
918    */
919  1 types.clear();
920  1 types.add("Temp");
921  1 scope.add(seq1);
922  1 scope.add(seq3);
923  1 AlignmentUtils.showOrHideSequenceAnnotations(al, types, scope, false,
924    false);
925  1 assertFalse(ann1.visible); // unchanged
926  1 assertFalse(ann2.visible); // unchanged
927  1 assertTrue(ann3.visible); // not sequence-related, not affected
928  1 assertFalse(ann4.visible); // Temp for seq1 hidden
929  1 assertTrue(ann5.visible); // not in scope, not affected
930  1 assertTrue(ann6.visible); // not sequence-related, not affected
931   
932    /*
933    * Set Temp in all sequences to hidden
934    */
935  1 types.clear();
936  1 types.add("Temp");
937  1 scope.add(seq1);
938  1 scope.add(seq3);
939  1 AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, false,
940    false);
941  1 assertFalse(ann1.visible); // unchanged
942  1 assertFalse(ann2.visible); // unchanged
943  1 assertTrue(ann3.visible); // not sequence-related, not affected
944  1 assertFalse(ann4.visible); // Temp for seq1 hidden
945  1 assertFalse(ann5.visible); // Temp for seq2 hidden
946  1 assertTrue(ann6.visible); // not sequence-related, not affected
947   
948    /*
949    * Set all types in {seq1, seq3} to visible
950    */
951  1 types.clear();
952  1 scope.clear();
953  1 scope.add(seq1);
954  1 scope.add(seq3);
955  1 AlignmentUtils.showOrHideSequenceAnnotations(al, types, scope, true,
956    true);
957  1 assertTrue(ann1.visible); // Structure for seq1 set visible
958  1 assertFalse(ann2.visible); // not in scope, unchanged
959  1 assertTrue(ann3.visible); // not sequence-related, not affected
960  1 assertTrue(ann4.visible); // Temp for seq1 set visible
961  1 assertFalse(ann5.visible); // not in scope, unchanged
962  1 assertTrue(ann6.visible); // not sequence-related, not affected
963   
964    /*
965    * Set all types in all scope to hidden
966    */
967  1 AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, true,
968    false);
969  1 assertFalse(ann1.visible);
970  1 assertFalse(ann2.visible);
971  1 assertTrue(ann3.visible); // not sequence-related, not affected
972  1 assertFalse(ann4.visible);
973  1 assertFalse(ann5.visible);
974  1 assertTrue(ann6.visible); // not sequence-related, not affected
975    }
976   
977    /**
978    * Tests for the method that checks if one sequence cross-references another
979    */
 
980  1 toggle @Test(groups = { "Functional" })
981    public void testHasCrossRef()
982    {
983  1 assertFalse(AlignmentUtils.hasCrossRef(null, null));
984  1 SequenceI seq1 = new Sequence("EMBL|A12345", "ABCDEF");
985  1 assertFalse(AlignmentUtils.hasCrossRef(seq1, null));
986  1 assertFalse(AlignmentUtils.hasCrossRef(null, seq1));
987  1 SequenceI seq2 = new Sequence("UNIPROT|V20192", "ABCDEF");
988  1 assertFalse(AlignmentUtils.hasCrossRef(seq1, seq2));
989   
990    // different ref
991  1 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "v20193"));
992  1 assertFalse(AlignmentUtils.hasCrossRef(seq1, seq2));
993   
994    // case-insensitive; version number is ignored
995  1 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "v20192"));
996  1 assertTrue(AlignmentUtils.hasCrossRef(seq1, seq2));
997   
998    // right case!
999  1 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "V20192"));
1000  1 assertTrue(AlignmentUtils.hasCrossRef(seq1, seq2));
1001    // test is one-way only
1002  1 assertFalse(AlignmentUtils.hasCrossRef(seq2, seq1));
1003    }
1004   
1005    /**
1006    * Tests for the method that checks if either sequence cross-references the
1007    * other
1008    */
 
1009  1 toggle @Test(groups = { "Functional" })
1010    public void testHaveCrossRef()
1011    {
1012  1 assertFalse(AlignmentUtils.hasCrossRef(null, null));
1013  1 SequenceI seq1 = new Sequence("EMBL|A12345", "ABCDEF");
1014  1 assertFalse(AlignmentUtils.haveCrossRef(seq1, null));
1015  1 assertFalse(AlignmentUtils.haveCrossRef(null, seq1));
1016  1 SequenceI seq2 = new Sequence("UNIPROT|V20192", "ABCDEF");
1017  1 assertFalse(AlignmentUtils.haveCrossRef(seq1, seq2));
1018   
1019  1 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "V20192"));
1020  1 assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2));
1021    // next is true for haveCrossRef, false for hasCrossRef
1022  1 assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1));
1023   
1024    // now the other way round
1025  1 seq1.setDBRefs(null);
1026  1 seq2.addDBRef(new DBRefEntry("EMBL", "1", "A12345"));
1027  1 assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2));
1028  1 assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1));
1029   
1030    // now both ways
1031  1 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "V20192"));
1032  1 assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2));
1033  1 assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1));
1034    }
1035   
1036    /**
1037    * Test the method that extracts the cds-only part of a dna alignment.
1038    */
 
1039  1 toggle @Test(groups = { "Functional" })
1040    public void testMakeCdsAlignment()
1041    {
1042    /*
1043    * scenario:
1044    * dna1 --> [4, 6] [10,12] --> pep1
1045    * dna2 --> [1, 3] [7, 9] [13,15] --> pep2
1046    */
1047  1 SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
1048  1 SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC");
1049  1 SequenceI pep1 = new Sequence("pep1", "GF");
1050  1 SequenceI pep2 = new Sequence("pep2", "GFP");
1051  1 pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "pep1"));
1052  1 pep2.addDBRef(new DBRefEntry("UNIPROT", "0", "pep2"));
1053  1 dna1.createDatasetSequence();
1054  1 dna2.createDatasetSequence();
1055  1 pep1.createDatasetSequence();
1056  1 pep2.createDatasetSequence();
1057  1 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 });
1058  1 dna.setDataset(null);
1059   
1060    /*
1061    * put a variant feature on dna2 base 8
1062    * - should transfer to cds2 base 5
1063    */
1064  1 dna2.addSequenceFeature(
1065    new SequenceFeature("variant", "hgmd", 8, 8, 0f, null));
1066   
1067    /*
1068    * need a sourceDbRef if we are to construct dbrefs to the CDS
1069    * sequence from the dna contig sequences
1070    */
1071  1 DBRefEntry dbref = new DBRefEntry("ENSEMBL", "0", "dna1");
1072  1 dna1.getDatasetSequence().addDBRef(dbref);
1073  1 org.testng.Assert.assertEquals(dbref, dna1.getPrimaryDBRefs().get(0));
1074  1 dbref = new DBRefEntry("ENSEMBL", "0", "dna2");
1075  1 dna2.getDatasetSequence().addDBRef(dbref);
1076  1 org.testng.Assert.assertEquals(dbref, dna2.getPrimaryDBRefs().get(0));
1077   
1078    /*
1079    * CDS sequences are 'discovered' from dna-to-protein mappings on the alignment
1080    * dataset (e.g. added from dbrefs by CrossRef.findXrefSequences)
1081    */
1082  1 MapList mapfordna1 = new MapList(new int[] { 4, 6, 10, 12 },
1083    new int[]
1084    { 1, 2 }, 3, 1);
1085  1 AlignedCodonFrame acf = new AlignedCodonFrame();
1086  1 acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(),
1087    mapfordna1);
1088  1 dna.addCodonFrame(acf);
1089  1 MapList mapfordna2 = new MapList(new int[] { 1, 3, 7, 9, 13, 15 },
1090    new int[]
1091    { 1, 3 }, 3, 1);
1092  1 acf = new AlignedCodonFrame();
1093  1 acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(),
1094    mapfordna2);
1095  1 dna.addCodonFrame(acf);
1096   
1097    /*
1098    * In this case, mappings originally came from matching Uniprot accessions
1099    * - so need an xref on dna involving those regions.
1100    * These are normally constructed from CDS annotation
1101    */
1102  1 DBRefEntry dna1xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep1",
1103    new Mapping(mapfordna1));
1104  1 dna1.addDBRef(dna1xref);
1105  1 assertEquals(2, dna1.getDBRefs().size()); // to self and to pep1
1106  1 DBRefEntry dna2xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep2",
1107    new Mapping(mapfordna2));
1108  1 dna2.addDBRef(dna2xref);
1109  1 assertEquals(2, dna2.getDBRefs().size()); // to self and to pep2
1110   
1111    /*
1112    * execute method under test:
1113    */
1114  1 AlignmentI cds = AlignmentUtils
1115    .makeCdsAlignment(new SequenceI[]
1116    { dna1, dna2 }, dna.getDataset(), null);
1117   
1118    /*
1119    * verify cds sequences
1120    */
1121  1 assertEquals(2, cds.getSequences().size());
1122  1 assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString());
1123  1 assertEquals("GGGTTTCCC", cds.getSequenceAt(1).getSequenceAsString());
1124   
1125    /*
1126    * verify shared, extended alignment dataset
1127    */
1128  1 assertSame(dna.getDataset(), cds.getDataset());
1129  1 SequenceI cds1Dss = cds.getSequenceAt(0).getDatasetSequence();
1130  1 SequenceI cds2Dss = cds.getSequenceAt(1).getDatasetSequence();
1131  1 assertTrue(dna.getDataset().getSequences().contains(cds1Dss));
1132  1 assertTrue(dna.getDataset().getSequences().contains(cds2Dss));
1133   
1134    /*
1135    * verify CDS has a dbref with mapping to peptide
1136    */
1137  1 assertNotNull(cds1Dss.getDBRefs());
1138  1 assertEquals(2, cds1Dss.getDBRefs().size());
1139  1 dbref = cds1Dss.getDBRefs().get(0);
1140  1 assertEquals(dna1xref.getSource(), dbref.getSource());
1141    // version is via ensembl's primary ref
1142  1 assertEquals(dna1xref.getVersion(), dbref.getVersion());
1143  1 assertEquals(dna1xref.getAccessionId(), dbref.getAccessionId());
1144  1 assertNotNull(dbref.getMap());
1145  1 assertSame(pep1.getDatasetSequence(), dbref.getMap().getTo());
1146  1 MapList cdsMapping = new MapList(new int[] { 1, 6 }, new int[] { 1, 2 },
1147    3, 1);
1148  1 assertEquals(cdsMapping, dbref.getMap().getMap());
1149   
1150    /*
1151    * verify peptide has added a dbref with reverse mapping to CDS
1152    */
1153  1 assertNotNull(pep1.getDBRefs());
1154    // FIXME pep1.getDBRefs() is 1 - is that the correct behaviour ?
1155  1 assertEquals(2, pep1.getDBRefs().size());
1156  1 dbref = pep1.getDBRefs().get(1);
1157  1 assertEquals("ENSEMBL", dbref.getSource());
1158  1 assertEquals("0", dbref.getVersion());
1159  1 assertEquals("CDS|dna1", dbref.getAccessionId());
1160  1 assertNotNull(dbref.getMap());
1161  1 assertSame(cds1Dss, dbref.getMap().getTo());
1162  1 assertEquals(cdsMapping.getInverse(), dbref.getMap().getMap());
1163   
1164    /*
1165    * verify cDNA has added a dbref with mapping to CDS
1166    */
1167  1 assertEquals(3, dna1.getDBRefs().size());
1168  1 DBRefEntry dbRefEntry = dna1.getDBRefs().get(2);
1169  1 assertSame(cds1Dss, dbRefEntry.getMap().getTo());
1170  1 MapList dnaToCdsMapping = new MapList(new int[] { 4, 6, 10, 12 },
1171    new int[]
1172    { 1, 6 }, 1, 1);
1173  1 assertEquals(dnaToCdsMapping, dbRefEntry.getMap().getMap());
1174  1 assertEquals(3, dna2.getDBRefs().size());
1175  1 dbRefEntry = dna2.getDBRefs().get(2);
1176  1 assertSame(cds2Dss, dbRefEntry.getMap().getTo());
1177  1 dnaToCdsMapping = new MapList(new int[] { 1, 3, 7, 9, 13, 15 },
1178    new int[]
1179    { 1, 9 }, 1, 1);
1180  1 assertEquals(dnaToCdsMapping, dbRefEntry.getMap().getMap());
1181   
1182    /*
1183    * verify CDS has added a dbref with mapping to cDNA
1184    */
1185  1 assertEquals(2, cds1Dss.getDBRefs().size());
1186  1 dbRefEntry = cds1Dss.getDBRefs().get(1);
1187  1 assertSame(dna1.getDatasetSequence(), dbRefEntry.getMap().getTo());
1188  1 MapList cdsToDnaMapping = new MapList(new int[] { 1, 6 },
1189    new int[]
1190    { 4, 6, 10, 12 }, 1, 1);
1191  1 assertEquals(cdsToDnaMapping, dbRefEntry.getMap().getMap());
1192  1 assertEquals(2, cds2Dss.getDBRefs().size());
1193  1 dbRefEntry = cds2Dss.getDBRefs().get(1);
1194  1 assertSame(dna2.getDatasetSequence(), dbRefEntry.getMap().getTo());
1195  1 cdsToDnaMapping = new MapList(new int[] { 1, 9 },
1196    new int[]
1197    { 1, 3, 7, 9, 13, 15 }, 1, 1);
1198  1 assertEquals(cdsToDnaMapping, dbRefEntry.getMap().getMap());
1199   
1200    /*
1201    * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide
1202    * the mappings are on the shared alignment dataset
1203    * 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep)
1204    */
1205  1 List<AlignedCodonFrame> cdsMappings = cds.getDataset().getCodonFrames();
1206  1 assertEquals(6, cdsMappings.size());
1207   
1208    /*
1209    * verify that mapping sets for dna and cds alignments are different
1210    * [not current behaviour - all mappings are on the alignment dataset]
1211    */
1212    // select -> subselect type to test.
1213    // Assert.assertNotSame(dna.getCodonFrames(), cds.getCodonFrames());
1214    // assertEquals(4, dna.getCodonFrames().size());
1215    // assertEquals(4, cds.getCodonFrames().size());
1216   
1217    /*
1218    * Two mappings involve pep1 (dna to pep1, cds to pep1)
1219    * Mapping from pep1 to GGGTTT in first new exon sequence
1220    */
1221  1 List<AlignedCodonFrame> pep1Mappings = MappingUtils
1222    .findMappingsForSequence(pep1, cdsMappings);
1223  1 assertEquals(2, pep1Mappings.size());
1224  1 List<AlignedCodonFrame> mappings = MappingUtils
1225    .findMappingsForSequence(cds.getSequenceAt(0), pep1Mappings);
1226  1 assertEquals(1, mappings.size());
1227   
1228    // map G to GGG
1229  1 SearchResultsI sr = MappingUtils.buildSearchResults(pep1, 1, mappings);
1230  1 assertEquals(1, sr.getResults().size());
1231  1 SearchResultMatchI m = sr.getResults().get(0);
1232  1 assertSame(cds1Dss, m.getSequence());
1233  1 assertEquals(1, m.getStart());
1234  1 assertEquals(3, m.getEnd());
1235    // map F to TTT
1236  1 sr = MappingUtils.buildSearchResults(pep1, 2, mappings);
1237  1 m = sr.getResults().get(0);
1238  1 assertSame(cds1Dss, m.getSequence());
1239  1 assertEquals(4, m.getStart());
1240  1 assertEquals(6, m.getEnd());
1241   
1242    /*
1243    * Two mappings involve pep2 (dna to pep2, cds to pep2)
1244    * Verify mapping from pep2 to GGGTTTCCC in second new exon sequence
1245    */
1246  1 List<AlignedCodonFrame> pep2Mappings = MappingUtils
1247    .findMappingsForSequence(pep2, cdsMappings);
1248  1 assertEquals(2, pep2Mappings.size());
1249  1 mappings = MappingUtils.findMappingsForSequence(cds.getSequenceAt(1),
1250    pep2Mappings);
1251  1 assertEquals(1, mappings.size());
1252    // map G to GGG
1253  1 sr = MappingUtils.buildSearchResults(pep2, 1, mappings);
1254  1 assertEquals(1, sr.getResults().size());
1255  1 m = sr.getResults().get(0);
1256  1 assertSame(cds2Dss, m.getSequence());
1257  1 assertEquals(1, m.getStart());
1258  1 assertEquals(3, m.getEnd());
1259    // map F to TTT
1260  1 sr = MappingUtils.buildSearchResults(pep2, 2, mappings);
1261  1 m = sr.getResults().get(0);
1262  1 assertSame(cds2Dss, m.getSequence());
1263  1 assertEquals(4, m.getStart());
1264  1 assertEquals(6, m.getEnd());
1265    // map P to CCC
1266  1 sr = MappingUtils.buildSearchResults(pep2, 3, mappings);
1267  1 m = sr.getResults().get(0);
1268  1 assertSame(cds2Dss, m.getSequence());
1269  1 assertEquals(7, m.getStart());
1270  1 assertEquals(9, m.getEnd());
1271   
1272    /*
1273    * check cds2 acquired a variant feature in position 5
1274    */
1275  1 List<SequenceFeature> sfs = cds2Dss.getSequenceFeatures();
1276  1 assertNotNull(sfs);
1277  1 assertEquals(1, sfs.size());
1278  1 assertEquals("variant", sfs.get(0).type);
1279  1 assertEquals(5, sfs.get(0).begin);
1280  1 assertEquals(5, sfs.get(0).end);
1281    }
1282   
1283    /**
1284    * Test the method that makes a cds-only alignment from a DNA sequence and its
1285    * product mappings, for the case where there are multiple exon mappings to
1286    * different protein products.
1287    */
 
1288  1 toggle @Test(groups = { "Functional" })
1289    public void testMakeCdsAlignment_multipleProteins()
1290    {
1291  1 SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
1292  1 SequenceI pep1 = new Sequence("pep1", "GF"); // GGGTTT
1293  1 SequenceI pep2 = new Sequence("pep2", "KP"); // aaaccc
1294  1 SequenceI pep3 = new Sequence("pep3", "KF"); // aaaTTT
1295  1 dna1.createDatasetSequence();
1296  1 pep1.createDatasetSequence();
1297  1 pep2.createDatasetSequence();
1298  1 pep3.createDatasetSequence();
1299  1 pep1.getDatasetSequence()
1300    .addDBRef(new DBRefEntry("EMBLCDS", "2", "A12345"));
1301  1 pep2.getDatasetSequence()
1302    .addDBRef(new DBRefEntry("EMBLCDS", "3", "A12346"));
1303  1 pep3.getDatasetSequence()
1304    .addDBRef(new DBRefEntry("EMBLCDS", "4", "A12347"));
1305   
1306    /*
1307    * Create the CDS alignment
1308    */
1309  1 AlignmentI dna = new Alignment(new SequenceI[] { dna1 });
1310  1 dna.setDataset(null);
1311   
1312    /*
1313    * Make the mappings from dna to protein
1314    */
1315    // map ...GGG...TTT to GF
1316  1 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
1317    new int[]
1318    { 1, 2 }, 3, 1);
1319  1 AlignedCodonFrame acf = new AlignedCodonFrame();
1320  1 acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
1321  1 dna.addCodonFrame(acf);
1322   
1323    // map aaa...ccc to KP
1324  1 map = new MapList(new int[] { 1, 3, 7, 9 }, new int[] { 1, 2 }, 3, 1);
1325  1 acf = new AlignedCodonFrame();
1326  1 acf.addMap(dna1.getDatasetSequence(), pep2.getDatasetSequence(), map);
1327  1 dna.addCodonFrame(acf);
1328   
1329    // map aaa......TTT to KF
1330  1 map = new MapList(new int[] { 1, 3, 10, 12 }, new int[] { 1, 2 }, 3, 1);
1331  1 acf = new AlignedCodonFrame();
1332  1 acf.addMap(dna1.getDatasetSequence(), pep3.getDatasetSequence(), map);
1333  1 dna.addCodonFrame(acf);
1334   
1335    /*
1336    * execute method under test
1337    */
1338  1 AlignmentI cdsal = AlignmentUtils
1339    .makeCdsAlignment(new SequenceI[]
1340    { dna1 }, dna.getDataset(), null);
1341   
1342    /*
1343    * Verify we have 3 cds sequences, mapped to pep1/2/3 respectively
1344    */
1345  1 List<SequenceI> cds = cdsal.getSequences();
1346  1 assertEquals(3, cds.size());
1347   
1348    /*
1349    * verify shared, extended alignment dataset
1350    */
1351  1 assertSame(cdsal.getDataset(), dna.getDataset());
1352  1 assertTrue(dna.getDataset().getSequences()
1353    .contains(cds.get(0).getDatasetSequence()));
1354  1 assertTrue(dna.getDataset().getSequences()
1355    .contains(cds.get(1).getDatasetSequence()));
1356  1 assertTrue(dna.getDataset().getSequences()
1357    .contains(cds.get(2).getDatasetSequence()));
1358   
1359    /*
1360    * verify aligned cds sequences and their xrefs
1361    */
1362  1 SequenceI cdsSeq = cds.get(0);
1363  1 assertEquals("GGGTTT", cdsSeq.getSequenceAsString());
1364    // assertEquals("dna1|A12345", cdsSeq.getName());
1365  1 assertEquals("CDS|dna1", cdsSeq.getName());
1366    // assertEquals(1, cdsSeq.getDBRefs().length);
1367    // DBRefEntry cdsRef = cdsSeq.getDBRefs()[0];
1368    // assertEquals("EMBLCDS", cdsRef.getSource());
1369    // assertEquals("2", cdsRef.getVersion());
1370    // assertEquals("A12345", cdsRef.getAccessionId());
1371   
1372  1 cdsSeq = cds.get(1);
1373  1 assertEquals("aaaccc", cdsSeq.getSequenceAsString());
1374    // assertEquals("dna1|A12346", cdsSeq.getName());
1375  1 assertEquals("CDS|dna1", cdsSeq.getName());
1376    // assertEquals(1, cdsSeq.getDBRefs().length);
1377    // cdsRef = cdsSeq.getDBRefs()[0];
1378    // assertEquals("EMBLCDS", cdsRef.getSource());
1379    // assertEquals("3", cdsRef.getVersion());
1380    // assertEquals("A12346", cdsRef.getAccessionId());
1381   
1382  1 cdsSeq = cds.get(2);
1383  1 assertEquals("aaaTTT", cdsSeq.getSequenceAsString());
1384    // assertEquals("dna1|A12347", cdsSeq.getName());
1385  1 assertEquals("CDS|dna1", cdsSeq.getName());
1386    // assertEquals(1, cdsSeq.getDBRefs().length);
1387    // cdsRef = cdsSeq.getDBRefs()[0];
1388    // assertEquals("EMBLCDS", cdsRef.getSource());
1389    // assertEquals("4", cdsRef.getVersion());
1390    // assertEquals("A12347", cdsRef.getAccessionId());
1391   
1392    /*
1393    * Verify there are mappings from each cds sequence to its protein product
1394    * and also to its dna source
1395    */
1396  1 List<AlignedCodonFrame> newMappings = cdsal.getCodonFrames();
1397   
1398    /*
1399    * 6 mappings involve dna1 (to pep1/2/3, cds1/2/3)
1400    */
1401  1 List<AlignedCodonFrame> dnaMappings = MappingUtils
1402    .findMappingsForSequence(dna1, newMappings);
1403  1 assertEquals(6, dnaMappings.size());
1404   
1405    /*
1406    * dna1 to pep1
1407    */
1408  1 List<AlignedCodonFrame> mappings = MappingUtils
1409    .findMappingsForSequence(pep1, dnaMappings);
1410  1 assertEquals(1, mappings.size());
1411  1 assertEquals(1, mappings.get(0).getMappings().size());
1412  1 assertSame(pep1.getDatasetSequence(),
1413    mappings.get(0).getMappings().get(0).getMapping().getTo());
1414   
1415    /*
1416    * dna1 to cds1
1417    */
1418  1 List<AlignedCodonFrame> dnaToCds1Mappings = MappingUtils
1419    .findMappingsForSequence(cds.get(0), dnaMappings);
1420  1 Mapping mapping = dnaToCds1Mappings.get(0).getMappings().get(0)
1421    .getMapping();
1422  1 assertSame(cds.get(0).getDatasetSequence(), mapping.getTo());
1423  1 assertEquals("G(1) in CDS should map to G(4) in DNA", 4,
1424    mapping.getMap().getToPosition(1));
1425   
1426    /*
1427    * dna1 to pep2
1428    */
1429  1 mappings = MappingUtils.findMappingsForSequence(pep2, dnaMappings);
1430  1 assertEquals(1, mappings.size());
1431  1 assertEquals(1, mappings.get(0).getMappings().size());
1432  1 assertSame(pep2.getDatasetSequence(),
1433    mappings.get(0).getMappings().get(0).getMapping().getTo());
1434   
1435    /*
1436    * dna1 to cds2
1437    */
1438  1 List<AlignedCodonFrame> dnaToCds2Mappings = MappingUtils
1439    .findMappingsForSequence(cds.get(1), dnaMappings);
1440  1 mapping = dnaToCds2Mappings.get(0).getMappings().get(0).getMapping();
1441  1 assertSame(cds.get(1).getDatasetSequence(), mapping.getTo());
1442  1 assertEquals("c(4) in CDS should map to c(7) in DNA", 7,
1443    mapping.getMap().getToPosition(4));
1444   
1445    /*
1446    * dna1 to pep3
1447    */
1448  1 mappings = MappingUtils.findMappingsForSequence(pep3, dnaMappings);
1449  1 assertEquals(1, mappings.size());
1450  1 assertEquals(1, mappings.get(0).getMappings().size());
1451  1 assertSame(pep3.getDatasetSequence(),
1452    mappings.get(0).getMappings().get(0).getMapping().getTo());
1453   
1454    /*
1455    * dna1 to cds3
1456    */
1457  1 List<AlignedCodonFrame> dnaToCds3Mappings = MappingUtils
1458    .findMappingsForSequence(cds.get(2), dnaMappings);
1459  1 mapping = dnaToCds3Mappings.get(0).getMappings().get(0).getMapping();
1460  1 assertSame(cds.get(2).getDatasetSequence(), mapping.getTo());
1461  1 assertEquals("T(4) in CDS should map to T(10) in DNA", 10,
1462    mapping.getMap().getToPosition(4));
1463    }
1464   
 
1465  1 toggle @Test(groups = { "Functional" })
1466    public void testIsMappable()
1467    {
1468  1 SequenceI dna1 = new Sequence("dna1", "cgCAGtgGT");
1469  1 SequenceI aa1 = new Sequence("aa1", "RSG");
1470  1 AlignmentI al1 = new Alignment(new SequenceI[] { dna1 });
1471  1 AlignmentI al2 = new Alignment(new SequenceI[] { aa1 });
1472   
1473  1 assertFalse(AlignmentUtils.isMappable(null, null));
1474  1 assertFalse(AlignmentUtils.isMappable(al1, null));
1475  1 assertFalse(AlignmentUtils.isMappable(null, al1));
1476  1 assertFalse(AlignmentUtils.isMappable(al1, al1));
1477  1 assertFalse(AlignmentUtils.isMappable(al2, al2));
1478   
1479  1 assertTrue(AlignmentUtils.isMappable(al1, al2));
1480  1 assertTrue(AlignmentUtils.isMappable(al2, al1));
1481    }
1482   
1483    /**
1484    * Test creating a mapping when the sequences involved do not start at residue
1485    * 1
1486    *
1487    * @throws IOException
1488    */
 
1489  1 toggle @Test(groups = { "Functional" })
1490    public void testMapCdnaToProtein_forSubsequence() throws IOException
1491    {
1492  1 SequenceI prot = new Sequence("UNIPROT|V12345", "E-I--Q", 10, 12);
1493  1 prot.createDatasetSequence();
1494   
1495  1 SequenceI dna = new Sequence("EMBL|A33333", "GAA--AT-C-CAG", 40, 48);
1496  1 dna.createDatasetSequence();
1497   
1498  1 MapList map = AlignmentUtils.mapCdnaToProtein(prot, dna);
1499  1 assertEquals(10, map.getToLowest());
1500  1 assertEquals(12, map.getToHighest());
1501  1 assertEquals(40, map.getFromLowest());
1502  1 assertEquals(48, map.getFromHighest());
1503    }
1504   
1505    /**
1506    * Test for the alignSequenceAs method where we have protein mapped to protein
1507    */
 
1508  1 toggle @Test(groups = { "Functional" })
1509    public void testAlignSequenceAs_mappedProteinProtein()
1510    {
1511   
1512  1 SequenceI alignMe = new Sequence("Match", "MGAASEV");
1513  1 alignMe.createDatasetSequence();
1514  1 SequenceI alignFrom = new Sequence("Query", "LQTGYMGAASEVMFSPTRR");
1515  1 alignFrom.createDatasetSequence();
1516   
1517  1 AlignedCodonFrame acf = new AlignedCodonFrame();
1518    // this is like a domain or motif match of part of a peptide sequence
1519  1 MapList map = new MapList(new int[] { 6, 12 }, new int[] { 1, 7 }, 1,
1520    1);
1521  1 acf.addMap(alignFrom.getDatasetSequence(), alignMe.getDatasetSequence(),
1522    map);
1523   
1524  1 AlignmentUtils.alignSequenceAs(alignMe, alignFrom, acf, "-", '-', true,
1525    true);
1526  1 assertEquals("-----MGAASEV-------", alignMe.getSequenceAsString());
1527    }
1528   
1529    /**
1530    * Test for the alignSequenceAs method where there are trailing unmapped
1531    * residues in the model sequence
1532    */
 
1533  1 toggle @Test(groups = { "Functional" })
1534    public void testAlignSequenceAs_withTrailingPeptide()
1535    {
1536    // map first 3 codons to KPF; G is a trailing unmapped residue
1537  1 MapList map = new MapList(new int[] { 1, 9 }, new int[] { 1, 3 }, 3, 1);
1538   
1539  1 checkAlignSequenceAs("AAACCCTTT", "K-PFG", true, true, map,
1540    "AAA---CCCTTT---");
1541    }
1542   
1543    /**
1544    * Tests for transferring features between mapped sequences
1545    */
 
1546  1 toggle @Test(groups = { "Functional" })
1547    public void testTransferFeatures()
1548    {
1549  1 SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt");
1550  1 SequenceI cds = new Sequence("cds/10-15", "TAGGCC");
1551   
1552    // no overlap
1553  1 dna.addSequenceFeature(
1554    new SequenceFeature("type1", "desc1", 1, 2, 1f, null));
1555    // partial overlap - to [1, 1]
1556  1 dna.addSequenceFeature(
1557    new SequenceFeature("type2", "desc2", 3, 4, 2f, null));
1558    // exact overlap - to [1, 3]
1559  1 dna.addSequenceFeature(
1560    new SequenceFeature("type3", "desc3", 4, 6, 3f, null));
1561    // spanning overlap - to [2, 5]
1562  1 dna.addSequenceFeature(
1563    new SequenceFeature("type4", "desc4", 5, 11, 4f, null));
1564    // exactly overlaps whole mapped range [1, 6]
1565  1 dna.addSequenceFeature(
1566    new SequenceFeature("type5", "desc5", 4, 12, 5f, null));
1567    // no overlap (internal)
1568  1 dna.addSequenceFeature(
1569    new SequenceFeature("type6", "desc6", 7, 9, 6f, null));
1570    // no overlap (3' end)
1571  1 dna.addSequenceFeature(
1572    new SequenceFeature("type7", "desc7", 13, 15, 7f, null));
1573    // overlap (3' end) - to [6, 6]
1574  1 dna.addSequenceFeature(
1575    new SequenceFeature("type8", "desc8", 12, 12, 8f, null));
1576    // extended overlap - to [6, +]
1577  1 dna.addSequenceFeature(
1578    new SequenceFeature("type9", "desc9", 12, 13, 9f, null));
1579   
1580  1 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
1581    new int[]
1582    { 1, 6 }, 1, 1);
1583   
1584    /*
1585    * transferFeatures() will build 'partial overlap' for regions
1586    * that partially overlap 5' or 3' (start or end) of target sequence
1587    */
1588  1 AlignmentUtils.transferFeatures(dna, cds, map, null);
1589  1 List<SequenceFeature> sfs = cds.getSequenceFeatures();
1590  1 assertEquals(6, sfs.size());
1591   
1592  1 SequenceFeature sf = sfs.get(0);
1593  1 assertEquals("type2", sf.getType());
1594  1 assertEquals("desc2", sf.getDescription());
1595  1 assertEquals(2f, sf.getScore());
1596  1 assertEquals(1, sf.getBegin());
1597  1 assertEquals(1, sf.getEnd());
1598   
1599  1 sf = sfs.get(1);
1600  1 assertEquals("type3", sf.getType());
1601  1 assertEquals("desc3", sf.getDescription());
1602  1 assertEquals(3f, sf.getScore());
1603  1 assertEquals(1, sf.getBegin());
1604  1 assertEquals(3, sf.getEnd());
1605   
1606  1 sf = sfs.get(2);
1607  1 assertEquals("type4", sf.getType());
1608  1 assertEquals(2, sf.getBegin());
1609  1 assertEquals(5, sf.getEnd());
1610   
1611  1 sf = sfs.get(3);
1612  1 assertEquals("type5", sf.getType());
1613  1 assertEquals(1, sf.getBegin());
1614  1 assertEquals(6, sf.getEnd());
1615   
1616  1 sf = sfs.get(4);
1617  1 assertEquals("type8", sf.getType());
1618  1 assertEquals(6, sf.getBegin());
1619  1 assertEquals(6, sf.getEnd());
1620   
1621  1 sf = sfs.get(5);
1622  1 assertEquals("type9", sf.getType());
1623  1 assertEquals(6, sf.getBegin());
1624  1 assertEquals(6, sf.getEnd());
1625    }
1626   
1627    /**
1628    * Tests for transferring features between mapped sequences
1629    */
 
1630  1 toggle @Test(groups = { "Functional" })
1631    public void testTransferFeatures_withOmit()
1632    {
1633  1 SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt");
1634  1 SequenceI cds = new Sequence("cds/10-15", "TAGGCC");
1635   
1636  1 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
1637    new int[]
1638    { 1, 6 }, 1, 1);
1639   
1640    // [5, 11] maps to [2, 5]
1641  1 dna.addSequenceFeature(
1642    new SequenceFeature("type4", "desc4", 5, 11, 4f, null));
1643    // [4, 12] maps to [1, 6]
1644  1 dna.addSequenceFeature(
1645    new SequenceFeature("type5", "desc5", 4, 12, 5f, null));
1646    // [12, 12] maps to [6, 6]
1647  1 dna.addSequenceFeature(
1648    new SequenceFeature("type8", "desc8", 12, 12, 8f, null));
1649   
1650    // desc4 and desc8 are the 'omit these' varargs
1651  1 AlignmentUtils.transferFeatures(dna, cds, map, null, "type4", "type8");
1652  1 List<SequenceFeature> sfs = cds.getSequenceFeatures();
1653  1 assertEquals(1, sfs.size());
1654   
1655  1 SequenceFeature sf = sfs.get(0);
1656  1 assertEquals("type5", sf.getType());
1657  1 assertEquals(1, sf.getBegin());
1658  1 assertEquals(6, sf.getEnd());
1659    }
1660   
1661    /**
1662    * Tests for transferring features between mapped sequences
1663    */
 
1664  1 toggle @Test(groups = { "Functional" })
1665    public void testTransferFeatures_withSelect()
1666    {
1667  1 SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt");
1668  1 SequenceI cds = new Sequence("cds/10-15", "TAGGCC");
1669   
1670  1 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
1671    new int[]
1672    { 1, 6 }, 1, 1);
1673   
1674    // [5, 11] maps to [2, 5]
1675  1 dna.addSequenceFeature(
1676    new SequenceFeature("type4", "desc4", 5, 11, 4f, null));
1677    // [4, 12] maps to [1, 6]
1678  1 dna.addSequenceFeature(
1679    new SequenceFeature("type5", "desc5", 4, 12, 5f, null));
1680    // [12, 12] maps to [6, 6]
1681  1 dna.addSequenceFeature(
1682    new SequenceFeature("type8", "desc8", 12, 12, 8f, null));
1683   
1684    // "type5" is the 'select this type' argument
1685  1 AlignmentUtils.transferFeatures(dna, cds, map, "type5");
1686  1 List<SequenceFeature> sfs = cds.getSequenceFeatures();
1687  1 assertEquals(1, sfs.size());
1688   
1689  1 SequenceFeature sf = sfs.get(0);
1690  1 assertEquals("type5", sf.getType());
1691  1 assertEquals(1, sf.getBegin());
1692  1 assertEquals(6, sf.getEnd());
1693    }
1694   
1695    /**
1696    * Test the method that extracts the cds-only part of a dna alignment, for the
1697    * case where the cds should be aligned to match its nucleotide sequence.
1698    */
 
1699  1 toggle @Test(groups = { "Functional" })
1700    public void testMakeCdsAlignment_alternativeTranscripts()
1701    {
1702  1 SequenceI dna1 = new Sequence("dna1", "aaaGGGCC-----CTTTaaaGGG");
1703    // alternative transcript of same dna skips CCC codon
1704  1 SequenceI dna2 = new Sequence("dna2", "aaaGGGCC-----cttTaaaGGG");
1705    // dna3 has no mapping (protein product) so should be ignored here
1706  1 SequenceI dna3 = new Sequence("dna3", "aaaGGGCCCCCGGGcttTaaaGGG");
1707  1 SequenceI pep1 = new Sequence("pep1", "GPFG");
1708  1 SequenceI pep2 = new Sequence("pep2", "GPG");
1709  1 dna1.createDatasetSequence();
1710  1 dna2.createDatasetSequence();
1711  1 dna3.createDatasetSequence();
1712  1 pep1.createDatasetSequence();
1713  1 pep2.createDatasetSequence();
1714   
1715  1 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 });
1716  1 dna.setDataset(null);
1717   
1718  1 MapList map = new MapList(new int[] { 4, 12, 16, 18 },
1719    new int[]
1720    { 1, 4 }, 3, 1);
1721  1 AlignedCodonFrame acf = new AlignedCodonFrame();
1722  1 acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
1723  1 dna.addCodonFrame(acf);
1724  1 map = new MapList(new int[] { 4, 8, 12, 12, 16, 18 },
1725    new int[]
1726    { 1, 3 }, 3, 1);
1727  1 acf = new AlignedCodonFrame();
1728  1 acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
1729  1 dna.addCodonFrame(acf);
1730   
1731  1 AlignmentI cds = AlignmentUtils
1732    .makeCdsAlignment(new SequenceI[]
1733    { dna1, dna2, dna3 }, dna.getDataset(), null);
1734  1 List<SequenceI> cdsSeqs = cds.getSequences();
1735  1 assertEquals(2, cdsSeqs.size());
1736  1 assertEquals("GGGCCCTTTGGG", cdsSeqs.get(0).getSequenceAsString());
1737  1 assertEquals("GGGCCTGGG", cdsSeqs.get(1).getSequenceAsString());
1738   
1739    /*
1740    * verify shared, extended alignment dataset
1741    */
1742  1 assertSame(dna.getDataset(), cds.getDataset());
1743  1 assertTrue(dna.getDataset().getSequences()
1744    .contains(cdsSeqs.get(0).getDatasetSequence()));
1745  1 assertTrue(dna.getDataset().getSequences()
1746    .contains(cdsSeqs.get(1).getDatasetSequence()));
1747   
1748    /*
1749    * Verify 6 mappings: dna1 to cds1, cds1 to pep1, dna1 to pep1
1750    * and the same for dna2/cds2/pep2
1751    */
1752  1 List<AlignedCodonFrame> mappings = cds.getCodonFrames();
1753  1 assertEquals(6, mappings.size());
1754   
1755    /*
1756    * 2 mappings involve pep1
1757    */
1758  1 List<AlignedCodonFrame> pep1Mappings = MappingUtils
1759    .findMappingsForSequence(pep1, mappings);
1760  1 assertEquals(2, pep1Mappings.size());
1761   
1762    /*
1763    * Get mapping of pep1 to cds1 and verify it
1764    * maps GPFG to 1-3,4-6,7-9,10-12
1765    */
1766  1 List<AlignedCodonFrame> pep1CdsMappings = MappingUtils
1767    .findMappingsForSequence(cds.getSequenceAt(0), pep1Mappings);
1768  1 assertEquals(1, pep1CdsMappings.size());
1769  1 SearchResultsI sr = MappingUtils.buildSearchResults(pep1, 1,
1770    pep1CdsMappings);
1771  1 assertEquals(1, sr.getResults().size());
1772  1 SearchResultMatchI m = sr.getResults().get(0);
1773  1 assertEquals(cds.getSequenceAt(0).getDatasetSequence(),
1774    m.getSequence());
1775  1 assertEquals(1, m.getStart());
1776  1 assertEquals(3, m.getEnd());
1777  1 sr = MappingUtils.buildSearchResults(pep1, 2, pep1CdsMappings);
1778  1 m = sr.getResults().get(0);
1779  1 assertEquals(4, m.getStart());
1780  1 assertEquals(6, m.getEnd());
1781  1 sr = MappingUtils.buildSearchResults(pep1, 3, pep1CdsMappings);
1782  1 m = sr.getResults().get(0);
1783  1 assertEquals(7, m.getStart());
1784  1 assertEquals(9, m.getEnd());
1785  1 sr = MappingUtils.buildSearchResults(pep1, 4, pep1CdsMappings);
1786  1 m = sr.getResults().get(0);
1787  1 assertEquals(10, m.getStart());
1788  1 assertEquals(12, m.getEnd());
1789   
1790    /*
1791    * Get mapping of pep2 to cds2 and verify it
1792    * maps GPG in pep2 to 1-3,4-6,7-9 in second CDS sequence
1793    */
1794  1 List<AlignedCodonFrame> pep2Mappings = MappingUtils
1795    .findMappingsForSequence(pep2, mappings);
1796  1 assertEquals(2, pep2Mappings.size());
1797  1 List<AlignedCodonFrame> pep2CdsMappings = MappingUtils
1798    .findMappingsForSequence(cds.getSequenceAt(1), pep2Mappings);
1799  1 assertEquals(1, pep2CdsMappings.size());
1800  1 sr = MappingUtils.buildSearchResults(pep2, 1, pep2CdsMappings);
1801  1 assertEquals(1, sr.getResults().size());
1802  1 m = sr.getResults().get(0);
1803  1 assertEquals(cds.getSequenceAt(1).getDatasetSequence(),
1804    m.getSequence());
1805  1 assertEquals(1, m.getStart());
1806  1 assertEquals(3, m.getEnd());
1807  1 sr = MappingUtils.buildSearchResults(pep2, 2, pep2CdsMappings);
1808  1 m = sr.getResults().get(0);
1809  1 assertEquals(4, m.getStart());
1810  1 assertEquals(6, m.getEnd());
1811  1 sr = MappingUtils.buildSearchResults(pep2, 3, pep2CdsMappings);
1812  1 m = sr.getResults().get(0);
1813  1 assertEquals(7, m.getStart());
1814  1 assertEquals(9, m.getEnd());
1815    }
1816   
1817    /**
1818    * Test the method that realigns protein to match mapped codon alignment.
1819    */
 
1820  1 toggle @Test(groups = { "Functional" })
1821    public void testAlignProteinAsDna_incompleteStartCodon()
1822    {
1823    // seq1: incomplete start codon (not mapped), then [3, 11]
1824  1 SequenceI dna1 = new Sequence("Seq1", "ccAAA-TTT-GGG-");
1825    // seq2 codons are [4, 5], [8, 11]
1826  1 SequenceI dna2 = new Sequence("Seq2", "ccaAA-ttT-GGG-");
1827    // seq3 incomplete start codon at 'tt'
1828  1 SequenceI dna3 = new Sequence("Seq3", "ccaaa-ttt-GGG-");
1829  1 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 });
1830  1 dna.setDataset(null);
1831   
1832    // prot1 has 'X' for incomplete start codon (not mapped)
1833  1 SequenceI prot1 = new Sequence("Seq1", "XKFG"); // X for incomplete start
1834  1 SequenceI prot2 = new Sequence("Seq2", "NG");
1835  1 SequenceI prot3 = new Sequence("Seq3", "XG"); // X for incomplete start
1836  1 AlignmentI protein = new Alignment(
1837    new SequenceI[]
1838    { prot1, prot2, prot3 });
1839  1 protein.setDataset(null);
1840   
1841    // map dna1 [3, 11] to prot1 [2, 4] KFG
1842  1 MapList map = new MapList(new int[] { 3, 11 }, new int[] { 2, 4 }, 3,
1843    1);
1844  1 AlignedCodonFrame acf = new AlignedCodonFrame();
1845  1 acf.addMap(dna1.getDatasetSequence(), prot1.getDatasetSequence(), map);
1846   
1847    // map dna2 [4, 5] [8, 11] to prot2 [1, 2] NG
1848  1 map = new MapList(new int[] { 4, 5, 8, 11 }, new int[] { 1, 2 }, 3, 1);
1849  1 acf.addMap(dna2.getDatasetSequence(), prot2.getDatasetSequence(), map);
1850   
1851    // map dna3 [9, 11] to prot3 [2, 2] G
1852  1 map = new MapList(new int[] { 9, 11 }, new int[] { 2, 2 }, 3, 1);
1853  1 acf.addMap(dna3.getDatasetSequence(), prot3.getDatasetSequence(), map);
1854   
1855  1 ArrayList<AlignedCodonFrame> acfs = new ArrayList<>();
1856  1 acfs.add(acf);
1857  1 protein.setCodonFrames(acfs);
1858   
1859    /*
1860    * verify X is included in the aligned proteins, and placed just
1861    * before the first mapped residue
1862    * CCT is between CCC and TTT
1863    */
1864  1 AlignmentUtils.alignProteinAsDna(protein, dna);
1865  1 assertEquals("XK-FG", prot1.getSequenceAsString());
1866  1 assertEquals("--N-G", prot2.getSequenceAsString());
1867  1 assertEquals("---XG", prot3.getSequenceAsString());
1868    }
1869   
1870    /**
1871    * Tests for the method that maps the subset of a dna sequence that has CDS
1872    * (or subtype) feature - case where the start codon is incomplete.
1873    */
 
1874  1 toggle @Test(groups = "Functional")
1875    public void testFindCdsPositions_fivePrimeIncomplete()
1876    {
1877  1 SequenceI dnaSeq = new Sequence("dna", "aaagGGCCCaaaTTTttt");
1878  1 dnaSeq.createDatasetSequence();
1879  1 SequenceI ds = dnaSeq.getDatasetSequence();
1880   
1881    // CDS for dna 5-6 (incomplete codon), 7-9
1882  1 SequenceFeature sf = new SequenceFeature("CDS", "", 5, 9, 0f, null);
1883  1 sf.setPhase("2"); // skip 2 bases to start of next codon
1884  1 ds.addSequenceFeature(sf);
1885    // CDS for dna 13-15
1886  1 sf = new SequenceFeature("CDS_predicted", "", 13, 15, 0f, null);
1887  1 ds.addSequenceFeature(sf);
1888   
1889  1 List<int[]> ranges = AlignmentUtils.findCdsPositions(dnaSeq);
1890   
1891    /*
1892    * check the mapping starts with the first complete codon
1893    */
1894  1 assertEquals(6, MappingUtils.getLength(ranges));
1895  1 assertEquals(2, ranges.size());
1896  1 assertEquals(7, ranges.get(0)[0]);
1897  1 assertEquals(9, ranges.get(0)[1]);
1898  1 assertEquals(13, ranges.get(1)[0]);
1899  1 assertEquals(15, ranges.get(1)[1]);
1900    }
1901   
1902    /**
1903    * Tests for the method that maps the subset of a dna sequence that has CDS
1904    * (or subtype) feature.
1905    */
 
1906  1 toggle @Test(groups = "Functional")
1907    public void testFindCdsPositions()
1908    {
1909  1 SequenceI dnaSeq = new Sequence("dna", "aaaGGGcccAAATTTttt");
1910  1 dnaSeq.createDatasetSequence();
1911  1 SequenceI ds = dnaSeq.getDatasetSequence();
1912   
1913    // CDS for dna 10-12
1914  1 SequenceFeature sf = new SequenceFeature("CDS_predicted", "", 10, 12,
1915    0f, null);
1916  1 sf.setStrand("+");
1917  1 ds.addSequenceFeature(sf);
1918    // CDS for dna 4-6
1919  1 sf = new SequenceFeature("CDS", "", 4, 6, 0f, null);
1920  1 sf.setStrand("+");
1921  1 ds.addSequenceFeature(sf);
1922    // exon feature should be ignored here
1923  1 sf = new SequenceFeature("exon", "", 7, 9, 0f, null);
1924  1 ds.addSequenceFeature(sf);
1925   
1926  1 List<int[]> ranges = AlignmentUtils.findCdsPositions(dnaSeq);
1927    /*
1928    * verify ranges { [4-6], [12-10] }
1929    * note CDS ranges are ordered ascending even if the CDS
1930    * features are not
1931    */
1932  1 assertEquals(6, MappingUtils.getLength(ranges));
1933  1 assertEquals(2, ranges.size());
1934  1 assertEquals(4, ranges.get(0)[0]);
1935  1 assertEquals(6, ranges.get(0)[1]);
1936  1 assertEquals(10, ranges.get(1)[0]);
1937  1 assertEquals(12, ranges.get(1)[1]);
1938    }
1939   
1940    /**
1941    * Tests for the method that maps the subset of a dna sequence that has CDS
1942    * (or subtype) feature, with CDS strand = '-' (reverse)
1943    */
1944    // test turned off as currently findCdsPositions is not strand-dependent
1945    // left in case it comes around again...
 
1946  0 toggle @Test(groups = "Functional", enabled = false)
1947    public void testFindCdsPositions_reverseStrand()
1948    {
1949  0 SequenceI dnaSeq = new Sequence("dna", "aaaGGGcccAAATTTttt");
1950  0 dnaSeq.createDatasetSequence();
1951  0 SequenceI ds = dnaSeq.getDatasetSequence();
1952   
1953    // CDS for dna 4-6
1954  0 SequenceFeature sf = new SequenceFeature("CDS", "", 4, 6, 0f, null);
1955  0 sf.setStrand("-");
1956  0 ds.addSequenceFeature(sf);
1957    // exon feature should be ignored here
1958  0 sf = new SequenceFeature("exon", "", 7, 9, 0f, null);
1959  0 ds.addSequenceFeature(sf);
1960    // CDS for dna 10-12
1961  0 sf = new SequenceFeature("CDS_predicted", "", 10, 12, 0f, null);
1962  0 sf.setStrand("-");
1963  0 ds.addSequenceFeature(sf);
1964   
1965  0 List<int[]> ranges = AlignmentUtils.findCdsPositions(dnaSeq);
1966    /*
1967    * verify ranges { [12-10], [6-4] }
1968    */
1969  0 assertEquals(6, MappingUtils.getLength(ranges));
1970  0 assertEquals(2, ranges.size());
1971  0 assertEquals(12, ranges.get(0)[0]);
1972  0 assertEquals(10, ranges.get(0)[1]);
1973  0 assertEquals(6, ranges.get(1)[0]);
1974  0 assertEquals(4, ranges.get(1)[1]);
1975    }
1976   
1977    /**
1978    * Tests for the method that maps the subset of a dna sequence that has CDS
1979    * (or subtype) feature - reverse strand case where the start codon is
1980    * incomplete.
1981    */
 
1982  0 toggle @Test(groups = "Functional", enabled = false)
1983    // test turned off as currently findCdsPositions is not strand-dependent
1984    // left in case it comes around again...
1985    public void testFindCdsPositions_reverseStrandThreePrimeIncomplete()
1986    {
1987  0 SequenceI dnaSeq = new Sequence("dna", "aaagGGCCCaaaTTTttt");
1988  0 dnaSeq.createDatasetSequence();
1989  0 SequenceI ds = dnaSeq.getDatasetSequence();
1990   
1991    // CDS for dna 5-9
1992  0 SequenceFeature sf = new SequenceFeature("CDS", "", 5, 9, 0f, null);
1993  0 sf.setStrand("-");
1994  0 ds.addSequenceFeature(sf);
1995    // CDS for dna 13-15
1996  0 sf = new SequenceFeature("CDS_predicted", "", 13, 15, 0f, null);
1997  0 sf.setStrand("-");
1998  0 sf.setPhase("2"); // skip 2 bases to start of next codon
1999  0 ds.addSequenceFeature(sf);
2000   
2001  0 List<int[]> ranges = AlignmentUtils.findCdsPositions(dnaSeq);
2002   
2003    /*
2004    * check the mapping starts with the first complete codon
2005    * expect ranges [13, 13], [9, 5]
2006    */
2007  0 assertEquals(6, MappingUtils.getLength(ranges));
2008  0 assertEquals(2, ranges.size());
2009  0 assertEquals(13, ranges.get(0)[0]);
2010  0 assertEquals(13, ranges.get(0)[1]);
2011  0 assertEquals(9, ranges.get(1)[0]);
2012  0 assertEquals(5, ranges.get(1)[1]);
2013    }
2014   
 
2015  1 toggle @Test(groups = "Functional")
2016    public void testAlignAs_alternateTranscriptsUngapped()
2017    {
2018  1 SequenceI dna1 = new Sequence("dna1", "cccGGGTTTaaa");
2019  1 SequenceI dna2 = new Sequence("dna2", "CCCgggtttAAA");
2020  1 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 });
2021  1 ((Alignment) dna).createDatasetAlignment();
2022  1 SequenceI cds1 = new Sequence("cds1", "GGGTTT");
2023  1 SequenceI cds2 = new Sequence("cds2", "CCCAAA");
2024  1 AlignmentI cds = new Alignment(new SequenceI[] { cds1, cds2 });
2025  1 ((Alignment) cds).createDatasetAlignment();
2026   
2027  1 AlignedCodonFrame acf = new AlignedCodonFrame();
2028  1 MapList map = new MapList(new int[] { 4, 9 }, new int[] { 1, 6 }, 1, 1);
2029  1 acf.addMap(dna1.getDatasetSequence(), cds1.getDatasetSequence(), map);
2030  1 map = new MapList(new int[] { 1, 3, 10, 12 }, new int[] { 1, 6 }, 1, 1);
2031  1 acf.addMap(dna2.getDatasetSequence(), cds2.getDatasetSequence(), map);
2032   
2033    /*
2034    * verify CDS alignment is as:
2035    * cccGGGTTTaaa (cdna)
2036    * CCCgggtttAAA (cdna)
2037    *
2038    * ---GGGTTT--- (cds)
2039    * CCC------AAA (cds)
2040    */
2041  1 dna.addCodonFrame(acf);
2042  1 AlignmentUtils.alignAs(cds, dna);
2043  1 assertEquals("---GGGTTT", cds.getSequenceAt(0).getSequenceAsString());
2044  1 assertEquals("CCC------AAA",
2045    cds.getSequenceAt(1).getSequenceAsString());
2046    }
2047   
 
2048  1 toggle @Test(groups = { "Functional" })
2049    public void testAddMappedPositions()
2050    {
2051  1 SequenceI from = new Sequence("dna", "ggAA-ATcc-TT-g");
2052  1 SequenceI seq1 = new Sequence("cds", "AAATTT");
2053  1 from.createDatasetSequence();
2054  1 seq1.createDatasetSequence();
2055  1 Mapping mapping = new Mapping(seq1,
2056    new MapList(new int[]
2057    { 3, 6, 9, 10 }, new int[] { 1, 6 }, 1, 1));
2058  1 Map<Integer, Map<SequenceI, Character>> map = new TreeMap<>();
2059  1 AlignmentUtils.addMappedPositions(seq1, from, mapping, map);
2060   
2061    /*
2062    * verify map has seq1 residues in columns 3,4,6,7,11,12
2063    */
2064  1 assertEquals(6, map.size());
2065  1 assertEquals('A', map.get(3).get(seq1).charValue());
2066  1 assertEquals('A', map.get(4).get(seq1).charValue());
2067  1 assertEquals('A', map.get(6).get(seq1).charValue());
2068  1 assertEquals('T', map.get(7).get(seq1).charValue());
2069  1 assertEquals('T', map.get(11).get(seq1).charValue());
2070  1 assertEquals('T', map.get(12).get(seq1).charValue());
2071   
2072    /*
2073    *
2074    */
2075    }
2076   
2077    /**
2078    * Test case where the mapping 'from' range includes a stop codon which is
2079    * absent in the 'to' range
2080    */
 
2081  1 toggle @Test(groups = { "Functional" })
2082    public void testAddMappedPositions_withStopCodon()
2083    {
2084  1 SequenceI from = new Sequence("dna", "ggAA-ATcc-TT-g");
2085  1 SequenceI seq1 = new Sequence("cds", "AAATTT");
2086  1 from.createDatasetSequence();
2087  1 seq1.createDatasetSequence();
2088  1 Mapping mapping = new Mapping(seq1,
2089    new MapList(new int[]
2090    { 3, 6, 9, 10 }, new int[] { 1, 6 }, 1, 1));
2091  1 Map<Integer, Map<SequenceI, Character>> map = new TreeMap<>();
2092  1 AlignmentUtils.addMappedPositions(seq1, from, mapping, map);
2093   
2094    /*
2095    * verify map has seq1 residues in columns 3,4,6,7,11,12
2096    */
2097  1 assertEquals(6, map.size());
2098  1 assertEquals('A', map.get(3).get(seq1).charValue());
2099  1 assertEquals('A', map.get(4).get(seq1).charValue());
2100  1 assertEquals('A', map.get(6).get(seq1).charValue());
2101  1 assertEquals('T', map.get(7).get(seq1).charValue());
2102  1 assertEquals('T', map.get(11).get(seq1).charValue());
2103  1 assertEquals('T', map.get(12).get(seq1).charValue());
2104    }
2105   
2106    /**
2107    * Test for the case where the products for which we want CDS are specified.
2108    * This is to represent the case where EMBL has CDS mappings to both Uniprot
2109    * and EMBLCDSPROTEIN. makeCdsAlignment() should only return the mappings for
2110    * the protein sequences specified.
2111    */
 
2112  1 toggle @Test(groups = { "Functional" })
2113    public void testMakeCdsAlignment_filterProducts()
2114    {
2115  1 SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
2116  1 SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC");
2117  1 SequenceI pep1 = new Sequence("Uniprot|pep1", "GF");
2118  1 SequenceI pep2 = new Sequence("Uniprot|pep2", "GFP");
2119  1 SequenceI pep3 = new Sequence("EMBL|pep3", "GF");
2120  1 SequenceI pep4 = new Sequence("EMBL|pep4", "GFP");
2121  1 dna1.createDatasetSequence();
2122  1 dna2.createDatasetSequence();
2123  1 pep1.createDatasetSequence();
2124  1 pep2.createDatasetSequence();
2125  1 pep3.createDatasetSequence();
2126  1 pep4.createDatasetSequence();
2127  1 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 });
2128  1 dna.setDataset(null);
2129  1 AlignmentI emblPeptides = new Alignment(new SequenceI[] { pep3, pep4 });
2130  1 emblPeptides.setDataset(null);
2131   
2132  1 AlignedCodonFrame acf = new AlignedCodonFrame();
2133  1 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
2134    new int[]
2135    { 1, 2 }, 3, 1);
2136  1 acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
2137  1 acf.addMap(dna1.getDatasetSequence(), pep3.getDatasetSequence(), map);
2138  1 dna.addCodonFrame(acf);
2139   
2140  1 acf = new AlignedCodonFrame();
2141  1 map = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, new int[] { 1, 3 },
2142    3, 1);
2143  1 acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
2144  1 acf.addMap(dna2.getDatasetSequence(), pep4.getDatasetSequence(), map);
2145  1 dna.addCodonFrame(acf);
2146   
2147    /*
2148    * execute method under test to find CDS for EMBL peptides only
2149    */
2150  1 AlignmentI cds = AlignmentUtils
2151    .makeCdsAlignment(new SequenceI[]
2152    { dna1, dna2 }, dna.getDataset(),
2153    emblPeptides.getSequencesArray());
2154   
2155  1 assertEquals(2, cds.getSequences().size());
2156  1 assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString());
2157  1 assertEquals("GGGTTTCCC", cds.getSequenceAt(1).getSequenceAsString());
2158   
2159    /*
2160    * verify shared, extended alignment dataset
2161    */
2162  1 assertSame(dna.getDataset(), cds.getDataset());
2163  1 assertTrue(dna.getDataset().getSequences()
2164    .contains(cds.getSequenceAt(0).getDatasetSequence()));
2165  1 assertTrue(dna.getDataset().getSequences()
2166    .contains(cds.getSequenceAt(1).getDatasetSequence()));
2167   
2168    /*
2169    * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide
2170    * the mappings are on the shared alignment dataset
2171    */
2172  1 List<AlignedCodonFrame> cdsMappings = cds.getDataset().getCodonFrames();
2173    /*
2174    * 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep)
2175    */
2176  1 assertEquals(6, cdsMappings.size());
2177   
2178    /*
2179    * verify that mapping sets for dna and cds alignments are different
2180    * [not current behaviour - all mappings are on the alignment dataset]
2181    */
2182    // select -> subselect type to test.
2183    // Assert.assertNotSame(dna.getCodonFrames(), cds.getCodonFrames());
2184    // assertEquals(4, dna.getCodonFrames().size());
2185    // assertEquals(4, cds.getCodonFrames().size());
2186   
2187    /*
2188    * Two mappings involve pep3 (dna to pep3, cds to pep3)
2189    * Mapping from pep3 to GGGTTT in first new exon sequence
2190    */
2191  1 List<AlignedCodonFrame> pep3Mappings = MappingUtils
2192    .findMappingsForSequence(pep3, cdsMappings);
2193  1 assertEquals(2, pep3Mappings.size());
2194  1 List<AlignedCodonFrame> mappings = MappingUtils
2195    .findMappingsForSequence(cds.getSequenceAt(0), pep3Mappings);
2196  1 assertEquals(1, mappings.size());
2197   
2198    // map G to GGG
2199  1 SearchResultsI sr = MappingUtils.buildSearchResults(pep3, 1, mappings);
2200  1 assertEquals(1, sr.getResults().size());
2201  1 SearchResultMatchI m = sr.getResults().get(0);
2202  1 assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence());
2203  1 assertEquals(1, m.getStart());
2204  1 assertEquals(3, m.getEnd());
2205    // map F to TTT
2206  1 sr = MappingUtils.buildSearchResults(pep3, 2, mappings);
2207  1 m = sr.getResults().get(0);
2208  1 assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence());
2209  1 assertEquals(4, m.getStart());
2210  1 assertEquals(6, m.getEnd());
2211   
2212    /*
2213    * Two mappings involve pep4 (dna to pep4, cds to pep4)
2214    * Verify mapping from pep4 to GGGTTTCCC in second new exon sequence
2215    */
2216  1 List<AlignedCodonFrame> pep4Mappings = MappingUtils
2217    .findMappingsForSequence(pep4, cdsMappings);
2218  1 assertEquals(2, pep4Mappings.size());
2219  1 mappings = MappingUtils.findMappingsForSequence(cds.getSequenceAt(1),
2220    pep4Mappings);
2221  1 assertEquals(1, mappings.size());
2222    // map G to GGG
2223  1 sr = MappingUtils.buildSearchResults(pep4, 1, mappings);
2224  1 assertEquals(1, sr.getResults().size());
2225  1 m = sr.getResults().get(0);
2226  1 assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
2227  1 assertEquals(1, m.getStart());
2228  1 assertEquals(3, m.getEnd());
2229    // map F to TTT
2230  1 sr = MappingUtils.buildSearchResults(pep4, 2, mappings);
2231  1 m = sr.getResults().get(0);
2232  1 assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
2233  1 assertEquals(4, m.getStart());
2234  1 assertEquals(6, m.getEnd());
2235    // map P to CCC
2236  1 sr = MappingUtils.buildSearchResults(pep4, 3, mappings);
2237  1 m = sr.getResults().get(0);
2238  1 assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
2239  1 assertEquals(7, m.getStart());
2240  1 assertEquals(9, m.getEnd());
2241    }
2242   
2243    /**
2244    * Test the method that just copies aligned sequences, provided all sequences
2245    * to be aligned share the aligned sequence's dataset
2246    */
 
2247  1 toggle @Test(groups = "Functional")
2248    public void testAlignAsSameSequences()
2249    {
2250  1 SequenceI dna1 = new Sequence("dna1", "cccGGGTTTaaa");
2251  1 SequenceI dna2 = new Sequence("dna2", "CCCgggtttAAA");
2252  1 AlignmentI al1 = new Alignment(new SequenceI[] { dna1, dna2 });
2253  1 ((Alignment) al1).createDatasetAlignment();
2254   
2255  1 SequenceI dna3 = new Sequence(dna1);
2256  1 SequenceI dna4 = new Sequence(dna2);
2257  1 assertSame(dna3.getDatasetSequence(), dna1.getDatasetSequence());
2258  1 assertSame(dna4.getDatasetSequence(), dna2.getDatasetSequence());
2259  1 String seq1 = "-cc-GG-GT-TT--aaa";
2260  1 dna3.setSequence(seq1);
2261  1 String seq2 = "C--C-Cgg--gtt-tAA-A-";
2262  1 dna4.setSequence(seq2);
2263  1 AlignmentI al2 = new Alignment(new SequenceI[] { dna3, dna4 });
2264  1 ((Alignment) al2).createDatasetAlignment();
2265   
2266    /*
2267    * alignment removes gapped columns (two internal, two trailing)
2268    */
2269  1 assertTrue(AlignmentUtils.alignAsSameSequences(al1, al2));
2270  1 String aligned1 = "-cc-GG-GTTT-aaa";
2271  1 assertEquals(aligned1, al1.getSequenceAt(0).getSequenceAsString());
2272  1 String aligned2 = "C--C-Cgg-gtttAAA";
2273  1 assertEquals(aligned2, al1.getSequenceAt(1).getSequenceAsString());
2274   
2275    /*
2276    * add another sequence to 'aligned' - should still succeed, since
2277    * unaligned sequences still share a dataset with aligned sequences
2278    */
2279  1 SequenceI dna5 = new Sequence("dna5", "CCCgggtttAAA");
2280  1 dna5.createDatasetSequence();
2281  1 al2.addSequence(dna5);
2282  1 assertTrue(AlignmentUtils.alignAsSameSequences(al1, al2));
2283  1 assertEquals(aligned1, al1.getSequenceAt(0).getSequenceAsString());
2284  1 assertEquals(aligned2, al1.getSequenceAt(1).getSequenceAsString());
2285   
2286    /*
2287    * add another sequence to 'unaligned' - should fail, since now not
2288    * all unaligned sequences share a dataset with aligned sequences
2289    */
2290  1 SequenceI dna6 = new Sequence("dna6", "CCCgggtttAAA");
2291  1 dna6.createDatasetSequence();
2292  1 al1.addSequence(dna6);
2293    // JAL-2110 JBP Comment: what's the use case for this behaviour ?
2294  1 assertFalse(AlignmentUtils.alignAsSameSequences(al1, al2));
2295    }
2296   
 
2297  1 toggle @Test(groups = "Functional")
2298    public void testAlignAsSameSequencesMultipleSubSeq()
2299    {
2300  1 SequenceI dna1 = new Sequence("dna1", "cccGGGTTTaaa");
2301  1 SequenceI dna2 = new Sequence("dna2", "CCCgggtttAAA");
2302  1 SequenceI as1 = dna1.deriveSequence(); // cccGGGTTTaaa/1-12
2303  1 SequenceI as2 = dna1.deriveSequence().getSubSequence(3, 7); // GGGT/4-7
2304  1 SequenceI as3 = dna2.deriveSequence(); // CCCgggtttAAA/1-12
2305  1 as1.insertCharAt(6, 5, '-');
2306  1 assertEquals("cccGGG-----TTTaaa", as1.getSequenceAsString());
2307  1 as2.insertCharAt(6, 5, '-');
2308  1 assertEquals("GGGT-----", as2.getSequenceAsString());
2309  1 as3.insertCharAt(3, 5, '-');
2310  1 assertEquals("CCC-----gggtttAAA", as3.getSequenceAsString());
2311  1 AlignmentI aligned = new Alignment(new SequenceI[] { as1, as2, as3 });
2312   
2313    // why do we need to cast this still ?
2314  1 ((Alignment) aligned).createDatasetAlignment();
2315  1 SequenceI uas1 = dna1.deriveSequence();
2316  1 SequenceI uas2 = dna1.deriveSequence().getSubSequence(3, 7);
2317  1 SequenceI uas3 = dna2.deriveSequence();
2318  1 AlignmentI tobealigned = new Alignment(
2319    new SequenceI[]
2320    { uas1, uas2, uas3 });
2321  1 ((Alignment) tobealigned).createDatasetAlignment();
2322   
2323    /*
2324    * alignAs lines up dataset sequences and removes empty columns (two)
2325    */
2326  1 assertTrue(AlignmentUtils.alignAsSameSequences(tobealigned, aligned));
2327  1 assertEquals("cccGGG---TTTaaa", uas1.getSequenceAsString());
2328  1 assertEquals("GGGT", uas2.getSequenceAsString());
2329  1 assertEquals("CCC---gggtttAAA", uas3.getSequenceAsString());
2330    }
2331   
 
2332  1 toggle @Test(groups = { "Functional" })
2333    public void testTransferGeneLoci()
2334    {
2335  1 SequenceI from = new Sequence("transcript",
2336    "aaacccgggTTTAAACCCGGGtttaaacccgggttt");
2337  1 SequenceI to = new Sequence("CDS", "TTTAAACCCGGG");
2338  1 MapList map = new MapList(new int[] { 1, 12 }, new int[] { 10, 21 }, 1,
2339    1);
2340   
2341    /*
2342    * first with nothing to transfer
2343    */
2344  1 AlignmentUtils.transferGeneLoci(from, map, to);
2345  1 assertNull(to.getGeneLoci());
2346   
2347    /*
2348    * next with gene loci set on 'from' sequence
2349    */
2350  1 int[] exons = new int[] { 100, 105, 155, 164, 210, 229 };
2351  1 MapList geneMap = new MapList(new int[] { 1, 36 }, exons, 1, 1);
2352  1 from.setGeneLoci("human", "GRCh38", "7", geneMap);
2353  1 AlignmentUtils.transferGeneLoci(from, map, to);
2354   
2355  1 GeneLociI toLoci = to.getGeneLoci();
2356  1 assertNotNull(toLoci);
2357    // DBRefEntry constructor upper-cases 'source'
2358  1 assertEquals("HUMAN", toLoci.getSpeciesId());
2359  1 assertEquals("GRCh38", toLoci.getAssemblyId());
2360  1 assertEquals("7", toLoci.getChromosomeId());
2361   
2362    /*
2363    * transcript 'exons' are 1-6, 7-16, 17-36
2364    * CDS 1:12 is transcript 10-21
2365    * transcript 'CDS' is 10-16, 17-21
2366    * which is 'gene' 158-164, 210-214
2367    */
2368  1 MapList toMap = toLoci.getMapping();
2369  1 assertEquals(1, toMap.getFromRanges().size());
2370  1 assertEquals(2, toMap.getFromRanges().get(0).length);
2371  1 assertEquals(1, toMap.getFromRanges().get(0)[0]);
2372  1 assertEquals(12, toMap.getFromRanges().get(0)[1]);
2373  1 assertEquals(2, toMap.getToRanges().size());
2374  1 assertEquals(2, toMap.getToRanges().get(0).length);
2375  1 assertEquals(158, toMap.getToRanges().get(0)[0]);
2376  1 assertEquals(164, toMap.getToRanges().get(0)[1]);
2377  1 assertEquals(210, toMap.getToRanges().get(1)[0]);
2378  1 assertEquals(214, toMap.getToRanges().get(1)[1]);
2379    // or summarised as (but toString might change in future):
2380  1 assertEquals("[ [1, 12] ] 1:1 to [ [158, 164] [210, 214] ]",
2381    toMap.toString());
2382   
2383    /*
2384    * an existing value is not overridden
2385    */
2386  1 geneMap = new MapList(new int[] { 1, 36 }, new int[] { 36, 1 }, 1, 1);
2387  1 from.setGeneLoci("inhuman", "GRCh37", "6", geneMap);
2388  1 AlignmentUtils.transferGeneLoci(from, map, to);
2389  1 assertEquals("GRCh38", toLoci.getAssemblyId());
2390  1 assertEquals("7", toLoci.getChromosomeId());
2391  1 toMap = toLoci.getMapping();
2392  1 assertEquals("[ [1, 12] ] 1:1 to [ [158, 164] [210, 214] ]",
2393    toMap.toString());
2394    }
2395   
2396    /**
2397    * Tests for the method that maps nucleotide to protein based on CDS features
2398    */
 
2399  1 toggle @Test(groups = "Functional")
2400    public void testMapCdsToProtein()
2401    {
2402  1 SequenceI peptide = new Sequence("pep", "KLQ");
2403   
2404    /*
2405    * Case 1: CDS 3 times length of peptide
2406    * NB method only checks lengths match, not translation
2407    */
2408  1 SequenceI dna = new Sequence("dna", "AACGacgtCTCCT");
2409  1 dna.createDatasetSequence();
2410  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
2411  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 13, null));
2412  1 MapList ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
2413  1 assertEquals(3, ml.getFromRatio());
2414  1 assertEquals(1, ml.getToRatio());
2415  1 assertEquals("[[1, 3]]",
2416    Arrays.deepToString(ml.getToRanges().toArray()));
2417  1 assertEquals("[[1, 4], [9, 13]]",
2418    Arrays.deepToString(ml.getFromRanges().toArray()));
2419   
2420    /*
2421    * Case 2: CDS 3 times length of peptide + stop codon
2422    * (note code does not currently check trailing codon is a stop codon)
2423    */
2424  1 dna = new Sequence("dna", "AACGacgtCTCCTCCC");
2425  1 dna.createDatasetSequence();
2426  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
2427  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 16, null));
2428  1 ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
2429  1 assertEquals(3, ml.getFromRatio());
2430  1 assertEquals(1, ml.getToRatio());
2431  1 assertEquals("[[1, 3]]",
2432    Arrays.deepToString(ml.getToRanges().toArray()));
2433  1 assertEquals("[[1, 4], [9, 13]]",
2434    Arrays.deepToString(ml.getFromRanges().toArray()));
2435   
2436    /*
2437    * Case 3: CDS longer than 3 * peptide + stop codon - no mapping is made
2438    */
2439  1 dna = new Sequence("dna", "AACGacgtCTCCTTGATCA");
2440  1 dna.createDatasetSequence();
2441  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
2442  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 19, null));
2443  1 ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
2444  1 assertNull(ml);
2445   
2446    /*
2447    * Case 4: CDS shorter than 3 * peptide - no mapping is made
2448    */
2449  1 dna = new Sequence("dna", "AACGacgtCTCC");
2450  1 dna.createDatasetSequence();
2451  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
2452  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 12, null));
2453  1 ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
2454  1 assertNull(ml);
2455   
2456    /*
2457    * Case 5: CDS 3 times length of peptide + part codon - mapping is truncated
2458    */
2459  1 dna = new Sequence("dna", "AACGacgtCTCCTTG");
2460  1 dna.createDatasetSequence();
2461  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
2462  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 15, null));
2463  1 ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
2464  1 assertEquals(3, ml.getFromRatio());
2465  1 assertEquals(1, ml.getToRatio());
2466  1 assertEquals("[[1, 3]]",
2467    Arrays.deepToString(ml.getToRanges().toArray()));
2468  1 assertEquals("[[1, 4], [9, 13]]",
2469    Arrays.deepToString(ml.getFromRanges().toArray()));
2470   
2471    /*
2472    * Case 6: incomplete start codon corresponding to X in peptide
2473    */
2474  1 dna = new Sequence("dna", "ACGacgtCTCCTTGG");
2475  1 dna.createDatasetSequence();
2476  1 SequenceFeature sf = new SequenceFeature("CDS", "", 1, 3, null);
2477  1 sf.setPhase("2"); // skip 2 positions (AC) to start of next codon (GCT)
2478  1 dna.addSequenceFeature(sf);
2479  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 8, 15, null));
2480  1 peptide = new Sequence("pep", "XLQ");
2481  1 ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
2482  1 assertEquals("[[2, 3]]",
2483    Arrays.deepToString(ml.getToRanges().toArray()));
2484  1 assertEquals("[[3, 3], [8, 12]]",
2485    Arrays.deepToString(ml.getFromRanges().toArray()));
2486    }
2487   
2488    /**
2489    * Tests for the method that locates the CDS sequence that has a mapping to
2490    * the given protein. That is, given a transcript-to-peptide mapping, find the
2491    * cds-to-peptide mapping that relates to both, and return the CDS sequence.
2492    */
 
2493  1 toggle @Test(groups = "Functional")
2494    public void testFindCdsForProtein()
2495    {
2496  1 List<AlignedCodonFrame> mappings = new ArrayList<>();
2497  1 AlignedCodonFrame acf1 = new AlignedCodonFrame();
2498  1 mappings.add(acf1);
2499   
2500  1 SequenceI dna1 = new Sequence("dna1", "cgatATcgGCTATCTATGacg");
2501  1 dna1.createDatasetSequence();
2502   
2503    // NB we currently exclude STOP codon from CDS sequences
2504    // the test would need to change if this changes in future
2505  1 SequenceI cds1 = new Sequence("cds1", "ATGCTATCT");
2506  1 cds1.createDatasetSequence();
2507   
2508  1 SequenceI pep1 = new Sequence("pep1", "MLS");
2509  1 pep1.createDatasetSequence();
2510  1 List<AlignedCodonFrame> seqMappings = new ArrayList<>();
2511  1 MapList mapList = new MapList(new int[] { 5, 6, 9, 15 },
2512    new int[]
2513    { 1, 3 }, 3, 1);
2514  1 Mapping dnaToPeptide = new Mapping(pep1.getDatasetSequence(), mapList);
2515   
2516    // add dna to peptide mapping
2517  1 seqMappings.add(acf1);
2518  1 acf1.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(),
2519    mapList);
2520   
2521    /*
2522    * first case - no dna-to-CDS mapping exists - search fails
2523    */
2524  1 SequenceI seq = AlignmentUtils.findCdsForProtein(mappings, dna1,
2525    seqMappings, dnaToPeptide);
2526  1 assertNull(seq);
2527   
2528    /*
2529    * second case - CDS-to-peptide mapping exists but no dna-to-CDS
2530    * - search fails
2531    */
2532    // todo this test fails if the mapping is added to acf1, not acf2
2533    // need to tidy up use of lists of mappings in AlignedCodonFrame
2534  1 AlignedCodonFrame acf2 = new AlignedCodonFrame();
2535  1 mappings.add(acf2);
2536  1 MapList cdsToPeptideMapping = new MapList(new int[] { 1, 9 },
2537    new int[]
2538    { 1, 3 }, 3, 1);
2539  1 acf2.addMap(cds1.getDatasetSequence(), pep1.getDatasetSequence(),
2540    cdsToPeptideMapping);
2541  1 assertNull(AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,
2542    dnaToPeptide));
2543   
2544    /*
2545    * third case - add dna-to-CDS mapping - CDS is now found!
2546    */
2547  1 MapList dnaToCdsMapping = new MapList(new int[] { 5, 6, 9, 15 },
2548    new int[]
2549    { 1, 9 }, 1, 1);
2550  1 acf1.addMap(dna1.getDatasetSequence(), cds1.getDatasetSequence(),
2551    dnaToCdsMapping);
2552  1 seq = AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,
2553    dnaToPeptide);
2554  1 assertSame(seq, cds1.getDatasetSequence());
2555    }
2556   
2557    /**
2558    * Tests for the method that locates the CDS sequence that has a mapping to
2559    * the given protein. That is, given a transcript-to-peptide mapping, find the
2560    * cds-to-peptide mapping that relates to both, and return the CDS sequence.
2561    * This test is for the case where transcript and CDS are the same length.
2562    */
 
2563  1 toggle @Test(groups = "Functional")
2564    public void testFindCdsForProtein_noUTR()
2565    {
2566  1 List<AlignedCodonFrame> mappings = new ArrayList<>();
2567  1 AlignedCodonFrame acf1 = new AlignedCodonFrame();
2568  1 mappings.add(acf1);
2569   
2570  1 SequenceI dna1 = new Sequence("dna1", "ATGCTATCTTAA");
2571  1 dna1.createDatasetSequence();
2572   
2573    // NB we currently exclude STOP codon from CDS sequences
2574    // the test would need to change if this changes in future
2575  1 SequenceI cds1 = new Sequence("cds1", "ATGCTATCT");
2576  1 cds1.createDatasetSequence();
2577   
2578  1 SequenceI pep1 = new Sequence("pep1", "MLS");
2579  1 pep1.createDatasetSequence();
2580  1 List<AlignedCodonFrame> seqMappings = new ArrayList<>();
2581  1 MapList mapList = new MapList(new int[] { 1, 9 }, new int[] { 1, 3 }, 3,
2582    1);
2583  1 Mapping dnaToPeptide = new Mapping(pep1.getDatasetSequence(), mapList);
2584   
2585    // add dna to peptide mapping
2586  1 seqMappings.add(acf1);
2587  1 acf1.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(),
2588    mapList);
2589   
2590    /*
2591    * first case - transcript lacks CDS features - it appears to be
2592    * the CDS sequence and is returned
2593    */
2594  1 SequenceI seq = AlignmentUtils.findCdsForProtein(mappings, dna1,
2595    seqMappings, dnaToPeptide);
2596  1 assertSame(seq, dna1.getDatasetSequence());
2597   
2598    /*
2599    * second case - transcript has CDS feature - this means it is
2600    * not returned as a match for CDS (CDS sequences don't have CDS features)
2601    */
2602  1 dna1.addSequenceFeature(
2603    new SequenceFeature(SequenceOntologyI.CDS, "cds", 1, 12, null));
2604  1 seq = AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,
2605    dnaToPeptide);
2606  1 assertNull(seq);
2607   
2608    /*
2609    * third case - CDS-to-peptide mapping exists but no dna-to-CDS
2610    * - search fails
2611    */
2612    // todo this test fails if the mapping is added to acf1, not acf2
2613    // need to tidy up use of lists of mappings in AlignedCodonFrame
2614  1 AlignedCodonFrame acf2 = new AlignedCodonFrame();
2615  1 mappings.add(acf2);
2616  1 MapList cdsToPeptideMapping = new MapList(new int[] { 1, 9 },
2617    new int[]
2618    { 1, 3 }, 3, 1);
2619  1 acf2.addMap(cds1.getDatasetSequence(), pep1.getDatasetSequence(),
2620    cdsToPeptideMapping);
2621  1 assertNull(AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,
2622    dnaToPeptide));
2623   
2624    /*
2625    * fourth case - add dna-to-CDS mapping - CDS is now found!
2626    */
2627  1 MapList dnaToCdsMapping = new MapList(new int[] { 1, 9 },
2628    new int[]
2629    { 1, 9 }, 1, 1);
2630  1 acf1.addMap(dna1.getDatasetSequence(), cds1.getDatasetSequence(),
2631    dnaToCdsMapping);
2632  1 seq = AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,
2633    dnaToPeptide);
2634  1 assertSame(seq, cds1.getDatasetSequence());
2635    }
2636   
 
2637  1 toggle @Test(groups = "Functional")
2638    public void testAddReferenceAnnotations()
2639    {
2640  1 SequenceI longseq = new Sequence("longA", "ASDASDASDASDAASDASDASDASDA");
2641  1 Annotation[] aa = new Annotation[longseq.getLength()];
2642   
2643  27 for (int p = 0; p < aa.length; p++)
2644    {
2645  26 aa[p] = new Annotation("P", "pos " + (p + 1), (char) 0,
2646    (float) p + 1);
2647    }
2648  1 AlignmentAnnotation refAnnot = new AlignmentAnnotation("LongSeqAnnot",
2649    "Annotations", aa);
2650  1 refAnnot.setCalcId("Test");
2651  1 longseq.addAlignmentAnnotation(refAnnot);
2652  1 verifyExpectedSequenceAnnotation(refAnnot);
2653   
2654  1 Alignment ourAl = new Alignment(
2655    new SequenceI[]
2656    { longseq.getSubSequence(5, 10),
2657    longseq.getSubSequence(7, 12) });
2658  1 ourAl.createDatasetAlignment();
2659   
2660    // transfer annotation
2661  1 SortedMap<String, String> tipEntries = new TreeMap<>();
2662  1 Map<SequenceI, List<AlignmentAnnotation>> candidates = new LinkedHashMap<>();
2663   
2664  1 AlignmentUtils.findAddableReferenceAnnotations(ourAl.getSequences(),
2665    tipEntries, candidates, ourAl);
2666  1 AlignmentUtils.addReferenceAnnotations(candidates, ourAl, null);
2667   
2668  1 assertNotNull(ourAl.getAlignmentAnnotation());
2669  1 assertEquals(ourAl.getAlignmentAnnotation().length, 2);
2670   
2671  1 for (AlignmentAnnotation alan : ourAl.getAlignmentAnnotation())
2672    {
2673  2 verifyExpectedSequenceAnnotation(alan);
2674    }
2675    // Everything above works for 2.11.3 and 2.11.2.x.
2676    // now simulate copy/paste to new alignment
2677  1 SequenceI[] newSeqAl = new SequenceI[2];
2678    // copy sequences but no annotation
2679  1 newSeqAl[0] = new Sequence(ourAl.getSequenceAt(0),
2680    ourAl.getSequenceAt(0).getAnnotation());
2681  1 newSeqAl[1] = new Sequence(ourAl.getSequenceAt(1),
2682    ourAl.getSequenceAt(1).getAnnotation());
2683   
2684  1 Alignment newAl = new Alignment(newSeqAl);
2685    // delete annotation
2686  1 for (SequenceI sq : newAl.getSequences())
2687    {
2688  2 sq.setAlignmentAnnotation(new AlignmentAnnotation[0]);
2689    }
2690    // JAL-4182 scenario test
2691  1 SequenceGroup sg = new SequenceGroup(Arrays.asList(newSeqAl));
2692  1 sg.setStartRes(0);
2693  1 sg.setEndRes(newAl.getWidth());
2694  1 AlignmentUtils.addReferenceAnnotationTo(newAl, newSeqAl[0],
2695    newSeqAl[0].getDatasetSequence().getAnnotation()[0], sg);
2696  1 AlignmentUtils.addReferenceAnnotationTo(newAl, newSeqAl[1],
2697    newSeqAl[1].getDatasetSequence().getAnnotation()[0], sg);
2698  1 for (AlignmentAnnotation alan : newAl.getAlignmentAnnotation())
2699    {
2700  2 verifyExpectedSequenceAnnotation(alan);
2701    }
2702    }
2703   
2704    /**
2705    * helper - tests annotation is mapped to position it was originally created
2706    * for
2707    *
2708    * @param alan
2709    */
 
2710  5 toggle private void verifyExpectedSequenceAnnotation(AlignmentAnnotation alan)
2711    {
2712  51 for (int c = 0; c < alan.annotations.length; c++)
2713    {
2714  46 Annotation a = alan.annotations[c];
2715  46 if (a != null)
2716    {
2717  46 assertEquals("Misaligned annotation at " + c,
2718    (float) alan.sequenceRef.findPosition(c), a.value);
2719    }
2720    else
2721    {
2722  0 assertTrue("Unexpected Null at position " + c,
2723    c >= alan.sequenceRef.getLength()
2724    || Comparison.isGap(alan.sequenceRef.getCharAt(c)));
2725    }
2726    }
2727    }
2728   
 
2729  1 toggle @Test(groups = "Functional")
2730    public void testAddReferenceContactMap()
2731    {
2732  1 SequenceI sq = new Sequence("a", "SSSQ");
2733  1 ContactMatrixI cm = new SeqDistanceContactMatrix(4);
2734  1 AlignmentAnnotation cm_aan = sq.addContactList(cm);
2735  1 cm_aan.description = cm_aan.description + " cm1";
2736  1 SequenceI dssq = sq.createDatasetSequence();
2737   
2738    // remove annotation on our non-dataset sequence
2739  1 sq.removeAlignmentAnnotation(sq.getAnnotation()[0]);
2740    // test transfer
2741  1 Alignment al = new Alignment(new SequenceI[] { sq });
2742  1 SortedMap<String, String> tipEntries = new TreeMap<>();
2743  1 Map<SequenceI, List<AlignmentAnnotation>> candidates = new LinkedHashMap<>();
2744   
2745  1 AlignmentUtils.findAddableReferenceAnnotations(al.getSequences(),
2746    tipEntries, candidates, al);
2747  1 AlignmentUtils.addReferenceAnnotations(candidates, al, null);
2748  1 assertTrue("No contact map annotation transferred",
2749    al.getAlignmentAnnotation() != null
2750    && al.getAlignmentAnnotation().length == 1);
2751  1 AlignmentAnnotation alan = al.findAnnotations(sq, null, cm_aan.label)
2752    .iterator().next();
2753  1 ContactMatrixI t_cm = al.getContactMatrixFor(alan);
2754  1 assertNotNull("No contact map for the transferred annotation row.",
2755    t_cm);
2756  1 assertTrue(t_cm instanceof SeqDistanceContactMatrix);
2757  1 assertTrue(((SeqDistanceContactMatrix) t_cm).hasReferenceSeq());
2758   
2759  1 ContactListI cl = al.getContactListFor(alan, 1);
2760  1 assertNotNull(
2761    "No contact matrix recovered after reference annotation transfer",
2762    cl);
2763    // semantics of sequence associated contact list is slightly tricky - column
2764    // 3 in alignment should have data
2765  1 cl = al.getContactListFor(alan, 3);
2766  1 assertNotNull(
2767    "Contact matrix should have data for last position in sequence",
2768    cl);
2769   
2770  1 ContactMatrixI cm2 = new SeqDistanceContactMatrix(4);
2771  1 dssq.addContactList(cm2);
2772  1 tipEntries = new TreeMap<>();
2773  1 candidates = new LinkedHashMap<>();
2774   
2775  1 AlignmentUtils.findAddableReferenceAnnotations(al.getSequences(),
2776    tipEntries, candidates, al);
2777  1 AlignmentUtils.addReferenceAnnotations(candidates, al, null);
2778  1 assertTrue("Expected two contact map annotation transferred",
2779    al.getAlignmentAnnotation() != null
2780    && al.getAlignmentAnnotation().length == 2);
2781   
2782    }
2783   
 
2784  5 toggle @Test(
2785    groups = "Functional",
2786    dataProvider = "SecondaryStructureAnnotations")
2787    public void testSecondaryStructurePresentAndSources(
2788    AlignmentAnnotation[] annotations, boolean expectedSSPresent,
2789    ArrayList<String> expectedSSSources)
2790    {
2791  5 Assert.assertEquals(expectedSSPresent,
2792    AlignmentUtils.isSecondaryStructurePresent(annotations));
2793    }
2794   
 
2795  1 toggle @DataProvider(name = "SecondaryStructureAnnotations")
2796    public static Object[][] provideSecondaryStructureAnnotations()
2797    {
2798  1 AlignmentAnnotation ann1 = new AlignmentAnnotation(
2799    "Secondary Structure", "Secondary Structure",
2800    new Annotation[] {});
2801  1 AlignmentAnnotation ann2 = new AlignmentAnnotation("jnetpred",
2802    "jnetpred", new Annotation[] {});
2803  1 AlignmentAnnotation ann3 = new AlignmentAnnotation("Temp", "Temp",
2804    new Annotation[] {});
2805  1 AlignmentAnnotation ann4 = new AlignmentAnnotation("Temp", "Temp",
2806    new Annotation[] {});
2807   
2808  1 List<String> ssSources1 = new ArrayList<>(
2809    Arrays.asList("3D Structures"));
2810  1 List<String> ssSources2 = new ArrayList<>(Arrays.asList("JPred"));
2811  1 List<String> ssSources3 = new ArrayList<>(
2812    Arrays.asList("3D Structures", "JPred"));
2813  1 List<String> ssSources4 = new ArrayList<>();
2814   
2815  1 return new Object[][] {
2816    { new AlignmentAnnotation[]
2817    { ann1, ann3, ann4 }, true, ssSources1 },
2818    { new AlignmentAnnotation[]
2819    { ann2, ann3, ann4 }, true, ssSources2 },
2820    { new AlignmentAnnotation[]
2821    { ann3, ann4 }, false, ssSources4 },
2822    { new AlignmentAnnotation[] {}, false, ssSources4 },
2823    { new AlignmentAnnotation[]
2824    { ann1, ann2, ann3, ann4 }, true, ssSources3 } };
2825    }
2826   
 
2827  4 toggle @Test(dataProvider = "SecondaryStructureAnnotationColours", groups = { "Functional" })
2828    public void testSecondaryStructureAnnotationColour(char symbol,
2829    Color expectedColor)
2830    {
2831  4 Color actualColor = AlignmentUtils
2832    .getSecondaryStructureAnnotationColour(symbol);
2833  4 Assert.assertEquals(actualColor, expectedColor);
2834    }
2835   
 
2836  1 toggle @DataProvider(name = "SecondaryStructureAnnotationColours")
2837    public static Object[][] provideSecondaryStructureAnnotationColours()
2838    {
2839  1 return new Object[][] { { 'C', Color.gray }, { 'E', Color.green },
2840    { 'H', Color.red },
2841    { '-', Color.white } };
2842    }
2843   
 
2844  4 toggle @Test(dataProvider = "SSAnnotationPresence", groups = { "Functional" })
2845    public void testIsSSAnnotationPresent(
2846    Map<SequenceI, List<AlignmentAnnotation>> annotations,
2847    boolean expectedPresence)
2848    {
2849  4 boolean actualPresence = AlignmentUtils
2850    .isSSAnnotationPresent(annotations);
2851  4 Assert.assertEquals(actualPresence, expectedPresence);
2852    }
2853   
 
2854  1 toggle @DataProvider(name = "SSAnnotationPresence")
2855    public static Object[][] provideSSAnnotationPresence()
2856    {
2857  1 Map<SequenceI, List<AlignmentAnnotation>> annotations1 = new HashMap<>();
2858  1 SequenceI seq1 = new Sequence("Seq1", "ASD---ASD---ASD", 37, 45);
2859  1 List<AlignmentAnnotation> annotationsList1 = new ArrayList<>();
2860  1 annotationsList1.add(new AlignmentAnnotation("Secondary Structure",
2861    "Secondary Structure", new Annotation[] {}));
2862  1 annotations1.put(seq1, annotationsList1); // Annotation present secondary
2863    // structure for seq1
2864   
2865  1 Map<SequenceI, List<AlignmentAnnotation>> annotations2 = new HashMap<>();
2866  1 SequenceI seq2 = new Sequence("Seq2", "ASD---ASD------", 37, 42);
2867  1 List<AlignmentAnnotation> annotationsList2 = new ArrayList<>();
2868  1 annotationsList2.add(new AlignmentAnnotation("Other Annotation",
2869    "Other Annotation", new Annotation[] {}));
2870  1 annotations2.put(seq2, annotationsList2); // Annotation not related to any
2871    // of secondary structure for seq2
2872   
2873  1 Map<SequenceI, List<AlignmentAnnotation>> annotations3 = new HashMap<>();
2874    // Empty annotation map
2875   
2876  1 Map<SequenceI, List<AlignmentAnnotation>> annotations4 = new HashMap<>();
2877  1 SequenceI seq4 = new Sequence("Seq4", "ASD---ASD---AS-", 37, 44);
2878  1 List<AlignmentAnnotation> annotationsList4 = new ArrayList<>();
2879  1 annotationsList4.add(new AlignmentAnnotation("jnetpred", "jnetpred",
2880    new Annotation[] {}));
2881  1 annotations4.put(seq4, annotationsList4); // Annotation present from JPred
2882    // for seq4
2883   
2884  1 return new Object[][] { { annotations1, true }, // Annotations present
2885    // secondary structure
2886    // present
2887    { annotations2, false }, // No annotations related to any of the
2888    // secondary structure present
2889    { annotations3, false }, // Empty annotation map
2890    { annotations4, true }, // Annotations present from JPred secondary
2891    // structure present
2892    };
2893    }
2894   
2895   
 
2896  1 toggle @Test(groups = "Functional")
2897    public void testGetAlignmentAnnotationForSource()
2898    {
2899   
2900  1 SequenceI seq = new Sequence("testSeq", "ACDEFGHIKLMNPQRSTVWY");
2901   
2902  1 AlignmentAnnotation annot1 =
2903    new AlignmentAnnotation("Secondary Structure",
2904    "Secondary Structure for 4zhpA", new Annotation[] {}); //PDB
2905  1 annot1.setProperty(Constants.SS_PROVIDER_PROPERTY, "PDB");
2906  1 AlignmentAnnotation annot2 =
2907    new AlignmentAnnotation("Secondary Structure",
2908    "Secondary Structure for 5zhpA", new Annotation[] {}); //PDB
2909  1 annot2.setProperty(Constants.SS_PROVIDER_PROPERTY, "PDB");
2910  1 AlignmentAnnotation annot3 = new AlignmentAnnotation("Secondary Structure",
2911    "Secondary Structure for p09911_54-147__3a7wzn.1.p3502557454997462030P",
2912    new Annotation[] {}); //Swiss model
2913  1 annot3.setProperty(Constants.SS_PROVIDER_PROPERTY, "SWISS-MODEL");
2914  1 AlignmentAnnotation annot4 = new AlignmentAnnotation("Secondary Structure",
2915    "Secondary Structure for af-q43517-f1A", new Annotation[] {}); //Alphafold
2916  1 annot4.setProperty(Constants.SS_PROVIDER_PROPERTY, "AlphaFold DB");
2917  1 AlignmentAnnotation annot5 = new AlignmentAnnotation("Secondary Structure",
2918    "Secondary Structure for af-q43517-f1A", new Annotation[] {}); //Alphafold
2919  1 annot5.setProperty(Constants.SS_PROVIDER_PROPERTY, "AlphaFold DB");
2920   
2921  1 seq.addAlignmentAnnotation(annot1);
2922  1 seq.addAlignmentAnnotation(annot2);
2923  1 seq.addAlignmentAnnotation(annot3);
2924  1 seq.addAlignmentAnnotation(annot4);
2925  1 seq.addAlignmentAnnotation(annot5);
2926   
2927  1 List<AlignmentAnnotation> all = AlignmentUtils.getAlignmentAnnotationForSource(
2928    seq, Constants.SS_ALL_PROVIDERS);
2929  1 assertTrue("Expected non-null result for SS_ALL_PROVIDERS",
2930    all != null);
2931  1 Assert.assertEquals(all.size(), 5, "Expected all annotations");
2932   
2933  1 List<AlignmentAnnotation> pdb = AlignmentUtils.getAlignmentAnnotationForSource(
2934    seq, "PDB");
2935  1 assertTrue("Expected non-null result for PDB",
2936    pdb != null);
2937  1 Assert.assertEquals(pdb.size(), 2, "Expected 2 annotations");
2938   
2939  1 List<AlignmentAnnotation> swiss = AlignmentUtils.getAlignmentAnnotationForSource(
2940    seq, "SWISS-MODEL");
2941  1 assertTrue("Expected non-null result for SWISS-MODEL",
2942    swiss != null);
2943  1 Assert.assertEquals(swiss.size(), 1, "Expected 1 annotation");
2944   
2945  1 List<AlignmentAnnotation> alphafold = AlignmentUtils.getAlignmentAnnotationForSource(
2946    seq, "AlphaFold DB");
2947  1 assertTrue("Expected non-null result for AlphaFold DB",
2948    alphafold != null);
2949  1 Assert.assertEquals(alphafold.size(), 2, "Expected 2 annotations");
2950    }
2951   
2952    }