Clover icon

Coverage Report

  1. Project Clover database Wed Jan 7 2026 02:49:01 GMT
  2. Package jalview.analysis

File AlignmentUtilsTests.java

 

Code metrics

22
1,352
57
1
3,069
2,005
68
0.05
23.72
57
1.19

Classes

Class Line # Actions
AlignmentUtilsTests 79 1,352 68
0.968553596.9%
 

Contributing tests

This file is covered by 58 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.analysis;
22   
23    import static org.testng.AssertJUnit.assertEquals;
24    import static org.testng.AssertJUnit.assertFalse;
25    import static org.testng.AssertJUnit.assertNotNull;
26    import static org.testng.AssertJUnit.assertNull;
27    import static org.testng.AssertJUnit.assertSame;
28    import static org.testng.AssertJUnit.assertTrue;
29   
30    import java.awt.Color;
31    import java.io.IOException;
32    import java.util.ArrayList;
33    import java.util.Arrays;
34    import java.util.HashMap;
35    import java.util.LinkedHashMap;
36    import java.util.List;
37    import java.util.Map;
38    import java.util.SortedMap;
39    import java.util.TreeMap;
40    import java.util.Vector;
41   
42    import org.testng.Assert;
43    import org.testng.annotations.BeforeClass;
44    import org.testng.annotations.DataProvider;
45    import org.testng.annotations.Test;
46   
47    import jalview.datamodel.AlignedCodonFrame;
48    import jalview.datamodel.Alignment;
49    import jalview.datamodel.AlignmentAnnotation;
50    import jalview.datamodel.AlignmentI;
51    import jalview.datamodel.Annotation;
52    import jalview.datamodel.ContactListI;
53    import jalview.datamodel.ContactMatrixI;
54    import jalview.datamodel.DBRefEntry;
55    import jalview.datamodel.GeneLociI;
56    import jalview.datamodel.Mapping;
57    import jalview.datamodel.PDBEntry;
58    import jalview.datamodel.SearchResultMatchI;
59    import jalview.datamodel.SearchResultsI;
60    import jalview.datamodel.SeqDistanceContactMatrix;
61    import jalview.datamodel.Sequence;
62    import jalview.datamodel.SequenceFeature;
63    import jalview.datamodel.SequenceGroup;
64    import jalview.datamodel.SequenceI;
65    import jalview.gui.JvOptionPane;
66    import jalview.io.AppletFormatAdapter;
67    import jalview.io.DataSourceType;
68    import jalview.io.FastaFile;
69    import jalview.io.FileFormat;
70    import jalview.io.FileFormatI;
71    import jalview.io.FormatAdapter;
72    import jalview.io.gff.SequenceOntologyI;
73    import jalview.util.Comparison;
74    import jalview.util.Constants;
75    import jalview.util.MapList;
76    import jalview.util.MappingUtils;
77    import jalview.util.ShiftList;
78   
 
79    public class AlignmentUtilsTests
80    {
81    private static Sequence ts = new Sequence("short",
82    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklm");
83   
 
84  1 toggle @BeforeClass(alwaysRun = true)
85    public void setUpJvOptionPane()
86    {
87  1 JvOptionPane.setInteractiveMode(false);
88  1 JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
89   
90  1 AlignmentAnnotation ann1 = new AlignmentAnnotation(
91    "Secondary Structure", "Secondary Structure",
92    new Annotation[] {});
93  1 AlignmentAnnotation ann2 = new AlignmentAnnotation("jnetpred",
94    "jnetpred", new Annotation[] {});
95  1 AlignmentAnnotation ann3 = new AlignmentAnnotation("Temp", "Temp",
96    new Annotation[] {});
97  1 AlignmentAnnotation ann4 = new AlignmentAnnotation("Temp", "Temp",
98    new Annotation[] {});
99   
100  1 AlignmentAnnotation[] anns1 = new AlignmentAnnotation[] { ann1, ann3,
101    ann4 };
102   
103  1 AlignmentAnnotation[] anns2 = new AlignmentAnnotation[] { ann2, ann3,
104    ann4 };
105   
106  1 AlignmentAnnotation[] anns3 = new AlignmentAnnotation[] { ann3, ann4 };
107   
108  1 AlignmentAnnotation[] anns4 = new AlignmentAnnotation[0];
109   
110  1 AlignmentAnnotation[] anns5 = new AlignmentAnnotation[] { ann1, ann2,
111    ann3, ann4 };
112    }
113   
 
114  1 toggle @Test(groups = { "Functional" })
115    public void testExpandContext()
116    {
117  1 AlignmentI al = new Alignment(new Sequence[] {});
118  6 for (int i = 4; i < 14; i += 2)
119    {
120  5 SequenceI s1 = ts.deriveSequence().getSubSequence(i, i + 7);
121  5 al.addSequence(s1);
122    }
123  1 System.out.println(new AppletFormatAdapter()
124    .formatSequences(FileFormat.Clustal, al, true));
125  27 for (int flnk = -1; flnk < 25; flnk++)
126    {
127  26 AlignmentI exp = AlignmentUtils.expandContext(al, flnk);
128  26 System.out.println("\nFlank size: " + flnk);
129  26 System.out.println(new AppletFormatAdapter()
130    .formatSequences(FileFormat.Clustal, exp, true));
131  26 if (flnk == -1)
132    {
133    /*
134    * Full expansion to complete sequences
135    */
136  1 for (SequenceI sq : exp.getSequences())
137    {
138  5 String ung = sq.getSequenceAsString().replaceAll("-+", "");
139  5 final String errorMsg = "Flanking sequence not the same as original dataset sequence.\n"
140    + ung + "\n"
141    + sq.getDatasetSequence().getSequenceAsString();
142  5 assertTrue(errorMsg, ung.equalsIgnoreCase(
143    sq.getDatasetSequence().getSequenceAsString()));
144    }
145    }
146  25 else if (flnk == 24)
147    {
148    /*
149    * Last sequence is fully expanded, others have leading gaps to match
150    */
151  1 assertTrue(exp.getSequenceAt(4).getSequenceAsString()
152    .startsWith("abc"));
153  1 assertTrue(exp.getSequenceAt(3).getSequenceAsString()
154    .startsWith("--abc"));
155  1 assertTrue(exp.getSequenceAt(2).getSequenceAsString()
156    .startsWith("----abc"));
157  1 assertTrue(exp.getSequenceAt(1).getSequenceAsString()
158    .startsWith("------abc"));
159  1 assertTrue(exp.getSequenceAt(0).getSequenceAsString()
160    .startsWith("--------abc"));
161    }
162    }
163    }
164   
165    /**
166    * Test that annotations are correctly adjusted by expandContext
167    */
 
168  1 toggle @Test(groups = { "Functional" })
169    public void testExpandContext_annotation()
170    {
171  1 AlignmentI al = new Alignment(new Sequence[] {});
172  1 SequenceI ds = new Sequence("Seq1", "ABCDEFGHI");
173    // subsequence DEF:
174  1 SequenceI seq1 = ds.deriveSequence().getSubSequence(3, 6);
175  1 al.addSequence(seq1);
176   
177    /*
178    * Annotate DEF with 4/5/6 respectively
179    */
180  1 Annotation[] anns = new Annotation[] { new Annotation(4),
181    new Annotation(5), new Annotation(6) };
182  1 AlignmentAnnotation ann = new AlignmentAnnotation("SS",
183    "secondary structure", anns);
184  1 seq1.addAlignmentAnnotation(ann);
185   
186    /*
187    * The annotations array should match aligned positions
188    */
189  1 assertEquals(3, ann.annotations.length);
190  1 assertEquals(4, ann.annotations[0].value, 0.001);
191  1 assertEquals(5, ann.annotations[1].value, 0.001);
192  1 assertEquals(6, ann.annotations[2].value, 0.001);
193   
194    /*
195    * Check annotation to sequence position mappings before expanding the
196    * sequence; these are set up in Sequence.addAlignmentAnnotation ->
197    * Annotation.setSequenceRef -> createSequenceMappings
198    */
199  1 assertNull(ann.getAnnotationForPosition(1));
200  1 assertNull(ann.getAnnotationForPosition(2));
201  1 assertNull(ann.getAnnotationForPosition(3));
202  1 assertEquals(4, ann.getAnnotationForPosition(4).value, 0.001);
203  1 assertEquals(5, ann.getAnnotationForPosition(5).value, 0.001);
204  1 assertEquals(6, ann.getAnnotationForPosition(6).value, 0.001);
205  1 assertNull(ann.getAnnotationForPosition(7));
206  1 assertNull(ann.getAnnotationForPosition(8));
207  1 assertNull(ann.getAnnotationForPosition(9));
208   
209    /*
210    * Expand the subsequence to the full sequence abcDEFghi
211    */
212  1 AlignmentI expanded = AlignmentUtils.expandContext(al, -1);
213  1 assertEquals("abcDEFghi",
214    expanded.getSequenceAt(0).getSequenceAsString());
215   
216    /*
217    * Confirm the alignment and sequence have the same SS annotation,
218    * referencing the expanded sequence
219    */
220  1 ann = expanded.getSequenceAt(0).getAnnotation()[0];
221  1 assertSame(ann, expanded.getAlignmentAnnotation()[0]);
222  1 assertSame(expanded.getSequenceAt(0), ann.sequenceRef);
223   
224    /*
225    * The annotations array should have null values except for annotated
226    * positions
227    */
228  1 assertNull(ann.annotations[0]);
229  1 assertNull(ann.annotations[1]);
230  1 assertNull(ann.annotations[2]);
231  1 assertEquals(4, ann.annotations[3].value, 0.001);
232  1 assertEquals(5, ann.annotations[4].value, 0.001);
233  1 assertEquals(6, ann.annotations[5].value, 0.001);
234  1 assertNull(ann.annotations[6]);
235  1 assertNull(ann.annotations[7]);
236  1 assertNull(ann.annotations[8]);
237   
238    /*
239    * sequence position mappings should be unchanged
240    */
241  1 assertNull(ann.getAnnotationForPosition(1));
242  1 assertNull(ann.getAnnotationForPosition(2));
243  1 assertNull(ann.getAnnotationForPosition(3));
244  1 assertEquals(4, ann.getAnnotationForPosition(4).value, 0.001);
245  1 assertEquals(5, ann.getAnnotationForPosition(5).value, 0.001);
246  1 assertEquals(6, ann.getAnnotationForPosition(6).value, 0.001);
247  1 assertNull(ann.getAnnotationForPosition(7));
248  1 assertNull(ann.getAnnotationForPosition(8));
249  1 assertNull(ann.getAnnotationForPosition(9));
250    }
251   
252    /**
253    * Test method that returns a map of lists of sequences by sequence name.
254    *
255    * @throws IOException
256    */
 
257  1 toggle @Test(groups = { "Functional" })
258    public void testGetSequencesByName() throws IOException
259    {
260  1 final String data = ">Seq1Name\nKQYL\n" + ">Seq2Name\nRFPW\n"
261    + ">Seq1Name\nABCD\n";
262  1 AlignmentI al = loadAlignment(data, FileFormat.Fasta);
263  1 Map<String, List<SequenceI>> map = AlignmentUtils
264    .getSequencesByName(al);
265  1 assertEquals(2, map.keySet().size());
266  1 assertEquals(2, map.get("Seq1Name").size());
267  1 assertEquals("KQYL", map.get("Seq1Name").get(0).getSequenceAsString());
268  1 assertEquals("ABCD", map.get("Seq1Name").get(1).getSequenceAsString());
269  1 assertEquals(1, map.get("Seq2Name").size());
270  1 assertEquals("RFPW", map.get("Seq2Name").get(0).getSequenceAsString());
271    }
272   
273    /**
274    * Helper method to load an alignment and ensure dataset sequences are set up.
275    *
276    * @param data
277    * @param format
278    * TODO
279    * @return
280    * @throws IOException
281    */
 
282  1 toggle protected AlignmentI loadAlignment(final String data, FileFormatI format)
283    throws IOException
284    {
285  1 AlignmentI a = new FormatAdapter().readFile(data, DataSourceType.PASTE,
286    format);
287  1 a.setDataset(null);
288  1 return a;
289    }
290   
291    /**
292    * Test mapping of protein to cDNA, for the case where we have no sequence
293    * cross-references, so mappings are made first-served 1-1 where sequences
294    * translate.
295    *
296    * @throws IOException
297    */
 
298  1 toggle @Test(groups = { "Functional" })
299    public void testMapProteinAlignmentToCdna_noXrefs() throws IOException
300    {
301  1 List<SequenceI> protseqs = new ArrayList<>();
302  1 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
303  1 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
304  1 protseqs.add(new Sequence("UNIPROT|V12347", "SAR"));
305  1 AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3]));
306  1 protein.setDataset(null);
307   
308  1 List<SequenceI> dnaseqs = new ArrayList<>();
309  1 dnaseqs.add(new Sequence("EMBL|A11111", "TCAGCACGC")); // = SAR
310  1 dnaseqs.add(new Sequence("EMBL|A22222", "GAGATACAA")); // = EIQ
311  1 dnaseqs.add(new Sequence("EMBL|A33333", "GAAATCCAG")); // = EIQ
312  1 dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG")); // = EIQ
313  1 AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[4]));
314  1 cdna.setDataset(null);
315   
316  1 assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));
317   
318    // 3 mappings made, each from 1 to 1 sequence
319  1 assertEquals(3, protein.getCodonFrames().size());
320  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
321  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
322  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size());
323   
324    // V12345 mapped to A22222
325  1 AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0))
326    .get(0);
327  1 assertEquals(1, acf.getdnaSeqs().length);
328  1 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
329    acf.getdnaSeqs()[0]);
330  1 Mapping[] protMappings = acf.getProtMappings();
331  1 assertEquals(1, protMappings.length);
332  1 MapList mapList = protMappings[0].getMap();
333  1 assertEquals(3, mapList.getFromRatio());
334  1 assertEquals(1, mapList.getToRatio());
335  1 assertTrue(
336    Arrays.equals(new int[]
337    { 1, 9 }, mapList.getFromRanges().get(0)));
338  1 assertEquals(1, mapList.getFromRanges().size());
339  1 assertTrue(
340    Arrays.equals(new int[]
341    { 1, 3 }, mapList.getToRanges().get(0)));
342  1 assertEquals(1, mapList.getToRanges().size());
343   
344    // V12346 mapped to A33333
345  1 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
346  1 assertEquals(1, acf.getdnaSeqs().length);
347  1 assertEquals(cdna.getSequenceAt(2).getDatasetSequence(),
348    acf.getdnaSeqs()[0]);
349   
350    // V12347 mapped to A11111
351  1 acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0);
352  1 assertEquals(1, acf.getdnaSeqs().length);
353  1 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
354    acf.getdnaSeqs()[0]);
355   
356    // no mapping involving the 'extra' A44444
357  1 assertTrue(protein.getCodonFrame(cdna.getSequenceAt(3)).isEmpty());
358    }
359   
360    /**
361    * Test for the alignSequenceAs method that takes two sequences and a mapping.
362    */
 
363  1 toggle @Test(groups = { "Functional" })
364    public void testAlignSequenceAs_withMapping_noIntrons()
365    {
366  1 MapList map = new MapList(new int[] { 1, 6 }, new int[] { 1, 2 }, 3, 1);
367   
368    /*
369    * No existing gaps in dna:
370    */
371  1 checkAlignSequenceAs("GGGAAA", "-A-L-", false, false, map,
372    "---GGG---AAA");
373   
374    /*
375    * Now introduce gaps in dna but ignore them when realigning.
376    */
377  1 checkAlignSequenceAs("-G-G-G-A-A-A-", "-A-L-", false, false, map,
378    "---GGG---AAA");
379   
380    /*
381    * Now include gaps in dna when realigning. First retaining 'mapped' gaps
382    * only, i.e. those within the exon region.
383    */
384  1 checkAlignSequenceAs("-G-G--G-A--A-A-", "-A-L-", true, false, map,
385    "---G-G--G---A--A-A");
386   
387    /*
388    * Include all gaps in dna when realigning (within and without the exon
389    * region). The leading gap, and the gaps between codons, are subsumed by
390    * the protein alignment gap.
391    */
392  1 checkAlignSequenceAs("-G-GG--AA-A---", "-A-L-", true, true, map,
393    "---G-GG---AA-A---");
394   
395    /*
396    * Include only unmapped gaps in dna when realigning (outside the exon
397    * region). The leading gap, and the gaps between codons, are subsumed by
398    * the protein alignment gap.
399    */
400  1 checkAlignSequenceAs("-G-GG--AA-A-", "-A-L-", false, true, map,
401    "---GGG---AAA---");
402    }
403   
404    /**
405    * Test for the alignSequenceAs method that takes two sequences and a mapping.
406    */
 
407  1 toggle @Test(groups = { "Functional" })
408    public void testAlignSequenceAs_withMapping_withIntrons()
409    {
410    /*
411    * Exons at codon 2 (AAA) and 4 (TTT)
412    */
413  1 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
414    new int[]
415    { 1, 2 }, 3, 1);
416   
417    /*
418    * Simple case: no gaps in dna
419    */
420  1 checkAlignSequenceAs("GGGAAACCCTTTGGG", "--A-L-", false, false, map,
421    "GGG---AAACCCTTTGGG");
422   
423    /*
424    * Add gaps to dna - but ignore when realigning.
425    */
426  1 checkAlignSequenceAs("-G-G-G--A--A---AC-CC-T-TT-GG-G-", "--A-L-", false,
427    false, map, "GGG---AAACCCTTTGGG");
428   
429    /*
430    * Add gaps to dna - include within exons only when realigning.
431    */
432  1 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-", true,
433    false, map, "GGG---A--A---ACCCT-TTGGG");
434   
435    /*
436    * Include gaps outside exons only when realigning.
437    */
438  1 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-",
439    false, true, map, "-G-G-GAAAC-CCTTT-GG-G-");
440   
441    /*
442    * Include gaps following first intron if we are 'preserving mapped gaps'
443    */
444  1 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-", true,
445    true, map, "-G-G-G--A--A---A-C-CC-T-TT-GG-G-");
446   
447    /*
448    * Include all gaps in dna when realigning.
449    */
450  1 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-", true,
451    true, map, "-G-G-G--A--A---A-C-CC-T-TT-GG-G-");
452    }
453   
454    /**
455    * Test for the case where not all of the protein sequence is mapped to cDNA.
456    */
 
457  1 toggle @Test(groups = { "Functional" })
458    public void testAlignSequenceAs_withMapping_withUnmappedProtein()
459    {
460    /*
461    * Exons at codon 2 (AAA) and 4 (TTT) mapped to A and P
462    */
463  1 final MapList map = new MapList(new int[] { 4, 6, 10, 12 },
464    new int[]
465    { 1, 1, 3, 3 }, 3, 1);
466   
467    /*
468    * -L- 'aligns' ccc------
469    */
470  1 checkAlignSequenceAs("gggAAAcccTTTggg", "-A-L-P-", false, false, map,
471    "gggAAAccc------TTTggg");
472    }
473   
474    /**
475    * Helper method that performs and verifies the method under test.
476    *
477    * @param alignee
478    * the sequence to be realigned
479    * @param alignModel
480    * the sequence whose alignment is to be copied
481    * @param preserveMappedGaps
482    * @param preserveUnmappedGaps
483    * @param map
484    * @param expected
485    */
 
486  14 toggle protected void checkAlignSequenceAs(final String alignee,
487    final String alignModel, final boolean preserveMappedGaps,
488    final boolean preserveUnmappedGaps, MapList map,
489    final String expected)
490    {
491  14 SequenceI alignMe = new Sequence("Seq1", alignee);
492  14 alignMe.createDatasetSequence();
493  14 SequenceI alignFrom = new Sequence("Seq2", alignModel);
494  14 alignFrom.createDatasetSequence();
495  14 AlignedCodonFrame acf = new AlignedCodonFrame();
496  14 acf.addMap(alignMe.getDatasetSequence(), alignFrom.getDatasetSequence(),
497    map);
498   
499  14 AlignmentUtils.alignSequenceAs(alignMe, alignFrom, acf, "---", '-',
500    preserveMappedGaps, preserveUnmappedGaps);
501  14 assertEquals(expected, alignMe.getSequenceAsString());
502    }
503   
504    /**
505    * Test for the alignSequenceAs method where we preserve gaps in introns only.
506    */
 
507  1 toggle @Test(groups = { "Functional" })
508    public void testAlignSequenceAs_keepIntronGapsOnly()
509    {
510   
511    /*
512    * Intron GGGAAA followed by exon CCCTTT
513    */
514  1 MapList map = new MapList(new int[] { 7, 12 }, new int[] { 1, 2 }, 3,
515    1);
516   
517  1 checkAlignSequenceAs("GG-G-AA-A-C-CC-T-TT", "AL", false, true, map,
518    "GG-G-AA-ACCCTTT");
519    }
520   
521    /**
522    * Test the method that realigns protein to match mapped codon alignment.
523    */
 
524  1 toggle @Test(groups = { "Functional" })
525    public void testAlignProteinAsDna()
526    {
527    // seq1 codons are [1,2,3] [4,5,6] [7,8,9] [10,11,12]
528  1 SequenceI dna1 = new Sequence("Seq1", "TGCCATTACCAG-");
529    // seq2 codons are [1,3,4] [5,6,7] [8,9,10] [11,12,13]
530  1 SequenceI dna2 = new Sequence("Seq2", "T-GCCATTACCAG");
531    // seq3 codons are [1,2,3] [4,5,7] [8,9,10] [11,12,13]
532  1 SequenceI dna3 = new Sequence("Seq3", "TGCCA-TTACCAG");
533  1 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 });
534  1 dna.setDataset(null);
535   
536    // protein alignment will be realigned like dna
537  1 SequenceI prot1 = new Sequence("Seq1", "CHYQ");
538  1 SequenceI prot2 = new Sequence("Seq2", "CHYQ");
539  1 SequenceI prot3 = new Sequence("Seq3", "CHYQ");
540  1 SequenceI prot4 = new Sequence("Seq4", "R-QSV"); // unmapped, unchanged
541  1 AlignmentI protein = new Alignment(
542    new SequenceI[]
543    { prot1, prot2, prot3, prot4 });
544  1 protein.setDataset(null);
545   
546  1 MapList map = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 }, 3,
547    1);
548  1 AlignedCodonFrame acf = new AlignedCodonFrame();
549  1 acf.addMap(dna1.getDatasetSequence(), prot1.getDatasetSequence(), map);
550  1 acf.addMap(dna2.getDatasetSequence(), prot2.getDatasetSequence(), map);
551  1 acf.addMap(dna3.getDatasetSequence(), prot3.getDatasetSequence(), map);
552  1 ArrayList<AlignedCodonFrame> acfs = new ArrayList<>();
553  1 acfs.add(acf);
554  1 protein.setCodonFrames(acfs);
555   
556    /*
557    * Translated codon order is [1,2,3] [1,3,4] [4,5,6] [4,5,7] [5,6,7] [7,8,9]
558    * [8,9,10] [10,11,12] [11,12,13]
559    */
560  1 AlignmentUtils.alignProteinAsDna(protein, dna);
561  1 assertEquals("C-H--Y-Q-", prot1.getSequenceAsString());
562  1 assertEquals("-C--H-Y-Q", prot2.getSequenceAsString());
563  1 assertEquals("C--H--Y-Q", prot3.getSequenceAsString());
564  1 assertEquals("R-QSV", prot4.getSequenceAsString());
565    }
566   
567    /**
568    * Test the method that tests whether a CDNA sequence translates to a protein
569    * sequence
570    */
 
571  1 toggle @Test(groups = { "Functional" })
572    public void testTranslatesAs()
573    {
574    // null arguments check
575  1 assertFalse(AlignmentUtils.translatesAs(null, 0, null));
576  1 assertFalse(AlignmentUtils.translatesAs(new char[] { 't' }, 0, null));
577  1 assertFalse(AlignmentUtils.translatesAs(null, 0, new char[] { 'a' }));
578   
579    // straight translation
580  1 assertTrue(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), 0,
581    "FPKG".toCharArray()));
582    // with extra start codon (not in protein)
583  1 assertTrue(AlignmentUtils.translatesAs("atgtttcccaaaggg".toCharArray(),
584    3, "FPKG".toCharArray()));
585    // with stop codon1 (not in protein)
586  1 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtaa".toCharArray(),
587    0, "FPKG".toCharArray()));
588    // with stop codon1 (in protein as *)
589  1 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtaa".toCharArray(),
590    0, "FPKG*".toCharArray()));
591    // with stop codon2 (not in protein)
592  1 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtag".toCharArray(),
593    0, "FPKG".toCharArray()));
594    // with stop codon3 (not in protein)
595  1 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtga".toCharArray(),
596    0, "FPKG".toCharArray()));
597    // with start and stop codon1
598  1 assertTrue(AlignmentUtils.translatesAs(
599    "atgtttcccaaagggtaa".toCharArray(), 3, "FPKG".toCharArray()));
600    // with start and stop codon1 (in protein as *)
601  1 assertTrue(AlignmentUtils.translatesAs(
602    "atgtttcccaaagggtaa".toCharArray(), 3, "FPKG*".toCharArray()));
603    // with start and stop codon2
604  1 assertTrue(AlignmentUtils.translatesAs(
605    "atgtttcccaaagggtag".toCharArray(), 3, "FPKG".toCharArray()));
606    // with start and stop codon3
607  1 assertTrue(AlignmentUtils.translatesAs(
608    "atgtttcccaaagggtga".toCharArray(), 3, "FPKG".toCharArray()));
609   
610    // with embedded stop codons
611  1 assertTrue(AlignmentUtils.translatesAs(
612    "atgtttTAGcccaaaTAAgggtga".toCharArray(), 3,
613    "F*PK*G".toCharArray()));
614   
615    // wrong protein
616  1 assertFalse(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), 0,
617    "FPMG".toCharArray()));
618   
619    // truncated dna
620  1 assertFalse(AlignmentUtils.translatesAs("tttcccaaagg".toCharArray(), 0,
621    "FPKG".toCharArray()));
622   
623    // truncated protein
624  1 assertFalse(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), 0,
625    "FPK".toCharArray()));
626   
627    // overlong dna (doesn't end in stop codon)
628  1 assertFalse(AlignmentUtils.translatesAs("tttcccaaagggttt".toCharArray(),
629    0, "FPKG".toCharArray()));
630   
631    // dna + stop codon + more
632  1 assertFalse(AlignmentUtils.translatesAs(
633    "tttcccaaagggttaga".toCharArray(), 0, "FPKG".toCharArray()));
634   
635    // overlong protein
636  1 assertFalse(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), 0,
637    "FPKGQ".toCharArray()));
638    }
639   
640    /**
641    * Test mapping of protein to cDNA, for cases where the cDNA has start and/or
642    * stop codons in addition to the protein coding sequence.
643    *
644    * @throws IOException
645    */
 
646  1 toggle @Test(groups = { "Functional" })
647    public void testMapProteinAlignmentToCdna_withStartAndStopCodons()
648    throws IOException
649    {
650  1 List<SequenceI> protseqs = new ArrayList<>();
651  1 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
652  1 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
653  1 protseqs.add(new Sequence("UNIPROT|V12347", "SAR"));
654  1 AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3]));
655  1 protein.setDataset(null);
656   
657  1 List<SequenceI> dnaseqs = new ArrayList<>();
658    // start + SAR:
659  1 dnaseqs.add(new Sequence("EMBL|A11111", "ATGTCAGCACGC"));
660    // = EIQ + stop
661  1 dnaseqs.add(new Sequence("EMBL|A22222", "GAGATACAATAA"));
662    // = start +EIQ + stop
663  1 dnaseqs.add(new Sequence("EMBL|A33333", "ATGGAAATCCAGTAG"));
664  1 dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG"));
665  1 AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[4]));
666  1 cdna.setDataset(null);
667   
668  1 assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));
669   
670    // 3 mappings made, each from 1 to 1 sequence
671  1 assertEquals(3, protein.getCodonFrames().size());
672  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
673  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
674  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size());
675   
676    // V12345 mapped from A22222
677  1 AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0))
678    .get(0);
679  1 assertEquals(1, acf.getdnaSeqs().length);
680  1 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
681    acf.getdnaSeqs()[0]);
682  1 Mapping[] protMappings = acf.getProtMappings();
683  1 assertEquals(1, protMappings.length);
684  1 MapList mapList = protMappings[0].getMap();
685  1 assertEquals(3, mapList.getFromRatio());
686  1 assertEquals(1, mapList.getToRatio());
687  1 assertTrue(
688    Arrays.equals(new int[]
689    { 1, 9 }, mapList.getFromRanges().get(0)));
690  1 assertEquals(1, mapList.getFromRanges().size());
691  1 assertTrue(
692    Arrays.equals(new int[]
693    { 1, 3 }, mapList.getToRanges().get(0)));
694  1 assertEquals(1, mapList.getToRanges().size());
695   
696    // V12346 mapped from A33333 starting position 4
697  1 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
698  1 assertEquals(1, acf.getdnaSeqs().length);
699  1 assertEquals(cdna.getSequenceAt(2).getDatasetSequence(),
700    acf.getdnaSeqs()[0]);
701  1 protMappings = acf.getProtMappings();
702  1 assertEquals(1, protMappings.length);
703  1 mapList = protMappings[0].getMap();
704  1 assertEquals(3, mapList.getFromRatio());
705  1 assertEquals(1, mapList.getToRatio());
706  1 assertTrue(
707    Arrays.equals(new int[]
708    { 4, 12 }, mapList.getFromRanges().get(0)));
709  1 assertEquals(1, mapList.getFromRanges().size());
710  1 assertTrue(
711    Arrays.equals(new int[]
712    { 1, 3 }, mapList.getToRanges().get(0)));
713  1 assertEquals(1, mapList.getToRanges().size());
714   
715    // V12347 mapped to A11111 starting position 4
716  1 acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0);
717  1 assertEquals(1, acf.getdnaSeqs().length);
718  1 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
719    acf.getdnaSeqs()[0]);
720  1 protMappings = acf.getProtMappings();
721  1 assertEquals(1, protMappings.length);
722  1 mapList = protMappings[0].getMap();
723  1 assertEquals(3, mapList.getFromRatio());
724  1 assertEquals(1, mapList.getToRatio());
725  1 assertTrue(
726    Arrays.equals(new int[]
727    { 4, 12 }, mapList.getFromRanges().get(0)));
728  1 assertEquals(1, mapList.getFromRanges().size());
729  1 assertTrue(
730    Arrays.equals(new int[]
731    { 1, 3 }, mapList.getToRanges().get(0)));
732  1 assertEquals(1, mapList.getToRanges().size());
733   
734    // no mapping involving the 'extra' A44444
735  1 assertTrue(protein.getCodonFrame(cdna.getSequenceAt(3)).isEmpty());
736    }
737   
738    /**
739    * Test mapping of protein to cDNA, for the case where we have some sequence
740    * cross-references. Verify that 1-to-many mappings are made where
741    * cross-references exist and sequences are mappable.
742    *
743    * @throws IOException
744    */
 
745  1 toggle @Test(groups = { "Functional" })
746    public void testMapProteinAlignmentToCdna_withXrefs() throws IOException
747    {
748  1 List<SequenceI> protseqs = new ArrayList<>();
749  1 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
750  1 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
751  1 protseqs.add(new Sequence("UNIPROT|V12347", "SAR"));
752  1 AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3]));
753  1 protein.setDataset(null);
754   
755  1 List<SequenceI> dnaseqs = new ArrayList<>();
756  1 dnaseqs.add(new Sequence("EMBL|A11111", "TCAGCACGC")); // = SAR
757  1 dnaseqs.add(new Sequence("EMBL|A22222", "ATGGAGATACAA")); // = start + EIQ
758  1 dnaseqs.add(new Sequence("EMBL|A33333", "GAAATCCAG")); // = EIQ
759  1 dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG")); // = EIQ
760  1 dnaseqs.add(new Sequence("EMBL|A55555", "GAGATTCAG")); // = EIQ
761  1 AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[5]));
762  1 cdna.setDataset(null);
763   
764    // Xref A22222 to V12345 (should get mapped)
765  1 dnaseqs.get(1).addDBRef(new DBRefEntry("UNIPROT", "1", "V12345"));
766    // Xref V12345 to A44444 (should get mapped)
767  1 protseqs.get(0).addDBRef(new DBRefEntry("EMBL", "1", "A44444"));
768    // Xref A33333 to V12347 (sequence mismatch - should not get mapped)
769  1 dnaseqs.get(2).addDBRef(new DBRefEntry("UNIPROT", "1", "V12347"));
770    // as V12345 is mapped to A22222 and A44444, this leaves V12346 unmapped.
771    // it should get paired up with the unmapped A33333
772    // A11111 should be mapped to V12347
773    // A55555 is spare and has no xref so is not mapped
774   
775  1 assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));
776   
777    // 4 protein mappings made for 3 proteins, 2 to V12345, 1 each to V12346/7
778  1 assertEquals(3, protein.getCodonFrames().size());
779  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
780  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
781  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size());
782   
783    // one mapping for each of the first 4 cDNA sequences
784  1 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(0)).size());
785  1 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(1)).size());
786  1 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(2)).size());
787  1 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(3)).size());
788   
789    // V12345 mapped to A22222 and A44444
790  1 AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0))
791    .get(0);
792  1 assertEquals(2, acf.getdnaSeqs().length);
793  1 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
794    acf.getdnaSeqs()[0]);
795  1 assertEquals(cdna.getSequenceAt(3).getDatasetSequence(),
796    acf.getdnaSeqs()[1]);
797   
798    // V12346 mapped to A33333
799  1 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
800  1 assertEquals(1, acf.getdnaSeqs().length);
801  1 assertEquals(cdna.getSequenceAt(2).getDatasetSequence(),
802    acf.getdnaSeqs()[0]);
803   
804    // V12347 mapped to A11111
805  1 acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0);
806  1 assertEquals(1, acf.getdnaSeqs().length);
807  1 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
808    acf.getdnaSeqs()[0]);
809   
810    // no mapping involving the 'extra' A55555
811  1 assertTrue(protein.getCodonFrame(cdna.getSequenceAt(4)).isEmpty());
812    }
813   
814    /**
815    * Test mapping of protein to cDNA, for the case where we have some sequence
816    * cross-references. Verify that once we have made an xref mapping we don't
817    * also map un-xrefd sequeces.
818    *
819    * @throws IOException
820    */
 
821  1 toggle @Test(groups = { "Functional" })
822    public void testMapProteinAlignmentToCdna_prioritiseXrefs()
823    throws IOException
824    {
825  1 List<SequenceI> protseqs = new ArrayList<>();
826  1 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
827  1 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
828  1 AlignmentI protein = new Alignment(
829    protseqs.toArray(new SequenceI[protseqs.size()]));
830  1 protein.setDataset(null);
831   
832  1 List<SequenceI> dnaseqs = new ArrayList<>();
833  1 dnaseqs.add(new Sequence("EMBL|A11111", "GAAATCCAG")); // = EIQ
834  1 dnaseqs.add(new Sequence("EMBL|A22222", "GAAATTCAG")); // = EIQ
835  1 AlignmentI cdna = new Alignment(
836    dnaseqs.toArray(new SequenceI[dnaseqs.size()]));
837  1 cdna.setDataset(null);
838   
839    // Xref A22222 to V12345 (should get mapped)
840    // A11111 should then be mapped to the unmapped V12346
841  1 dnaseqs.get(1).addDBRef(new DBRefEntry("UNIPROT", "1", "V12345"));
842   
843  1 assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));
844   
845    // 2 protein mappings made
846  1 assertEquals(2, protein.getCodonFrames().size());
847  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
848  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
849   
850    // one mapping for each of the cDNA sequences
851  1 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(0)).size());
852  1 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(1)).size());
853   
854    // V12345 mapped to A22222
855  1 AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0))
856    .get(0);
857  1 assertEquals(1, acf.getdnaSeqs().length);
858  1 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
859    acf.getdnaSeqs()[0]);
860   
861    // V12346 mapped to A11111
862  1 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
863  1 assertEquals(1, acf.getdnaSeqs().length);
864  1 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
865    acf.getdnaSeqs()[0]);
866    }
867   
868    /**
869    * Test the method that shows or hides sequence annotations by type(s) and
870    * selection group.
871    */
 
872  1 toggle @Test(groups = { "Functional" })
873    public void testShowOrHideSequenceAnnotations()
874    {
875  1 SequenceI seq1 = new Sequence("Seq1", "AAA");
876  1 SequenceI seq2 = new Sequence("Seq2", "BBB");
877  1 SequenceI seq3 = new Sequence("Seq3", "CCC");
878  1 Annotation[] anns = new Annotation[] { new Annotation(2f) };
879  1 AlignmentAnnotation ann1 = new AlignmentAnnotation("Structure", "ann1",
880    anns);
881  1 ann1.setSequenceRef(seq1);
882  1 AlignmentAnnotation ann2 = new AlignmentAnnotation("Structure", "ann2",
883    anns);
884  1 ann2.setSequenceRef(seq2);
885  1 AlignmentAnnotation ann3 = new AlignmentAnnotation("Structure", "ann3",
886    anns);
887  1 AlignmentAnnotation ann4 = new AlignmentAnnotation("Temp", "ann4",
888    anns);
889  1 ann4.setSequenceRef(seq1);
890  1 AlignmentAnnotation ann5 = new AlignmentAnnotation("Temp", "ann5",
891    anns);
892  1 ann5.setSequenceRef(seq2);
893  1 AlignmentAnnotation ann6 = new AlignmentAnnotation("Temp", "ann6",
894    anns);
895  1 AlignmentI al = new Alignment(new SequenceI[] { seq1, seq2, seq3 });
896  1 al.addAnnotation(ann1); // Structure for Seq1
897  1 al.addAnnotation(ann2); // Structure for Seq2
898  1 al.addAnnotation(ann3); // Structure for no sequence
899  1 al.addAnnotation(ann4); // Temp for seq1
900  1 al.addAnnotation(ann5); // Temp for seq2
901  1 al.addAnnotation(ann6); // Temp for no sequence
902  1 List<String> types = new ArrayList<>();
903  1 List<SequenceI> scope = new ArrayList<>();
904   
905    /*
906    * Set all sequence related Structure to hidden (ann1, ann2)
907    */
908  1 types.add("Structure");
909  1 AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, false,
910    false);
911  1 assertFalse(ann1.visible);
912  1 assertFalse(ann2.visible);
913  1 assertTrue(ann3.visible); // not sequence-related, not affected
914  1 assertTrue(ann4.visible); // not Structure, not affected
915  1 assertTrue(ann5.visible); // "
916  1 assertTrue(ann6.visible); // not sequence-related, not affected
917   
918    /*
919    * Set Temp in {seq1, seq3} to hidden
920    */
921  1 types.clear();
922  1 types.add("Temp");
923  1 scope.add(seq1);
924  1 scope.add(seq3);
925  1 AlignmentUtils.showOrHideSequenceAnnotations(al, types, scope, false,
926    false);
927  1 assertFalse(ann1.visible); // unchanged
928  1 assertFalse(ann2.visible); // unchanged
929  1 assertTrue(ann3.visible); // not sequence-related, not affected
930  1 assertFalse(ann4.visible); // Temp for seq1 hidden
931  1 assertTrue(ann5.visible); // not in scope, not affected
932  1 assertTrue(ann6.visible); // not sequence-related, not affected
933   
934    /*
935    * Set Temp in all sequences to hidden
936    */
937  1 types.clear();
938  1 types.add("Temp");
939  1 scope.add(seq1);
940  1 scope.add(seq3);
941  1 AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, false,
942    false);
943  1 assertFalse(ann1.visible); // unchanged
944  1 assertFalse(ann2.visible); // unchanged
945  1 assertTrue(ann3.visible); // not sequence-related, not affected
946  1 assertFalse(ann4.visible); // Temp for seq1 hidden
947  1 assertFalse(ann5.visible); // Temp for seq2 hidden
948  1 assertTrue(ann6.visible); // not sequence-related, not affected
949   
950    /*
951    * Set all types in {seq1, seq3} to visible
952    */
953  1 types.clear();
954  1 scope.clear();
955  1 scope.add(seq1);
956  1 scope.add(seq3);
957  1 AlignmentUtils.showOrHideSequenceAnnotations(al, types, scope, true,
958    true);
959  1 assertTrue(ann1.visible); // Structure for seq1 set visible
960  1 assertFalse(ann2.visible); // not in scope, unchanged
961  1 assertTrue(ann3.visible); // not sequence-related, not affected
962  1 assertTrue(ann4.visible); // Temp for seq1 set visible
963  1 assertFalse(ann5.visible); // not in scope, unchanged
964  1 assertTrue(ann6.visible); // not sequence-related, not affected
965   
966    /*
967    * Set all types in all scope to hidden
968    */
969  1 AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, true,
970    false);
971  1 assertFalse(ann1.visible);
972  1 assertFalse(ann2.visible);
973  1 assertTrue(ann3.visible); // not sequence-related, not affected
974  1 assertFalse(ann4.visible);
975  1 assertFalse(ann5.visible);
976  1 assertTrue(ann6.visible); // not sequence-related, not affected
977    }
978   
979    /**
980    * Tests for the method that checks if one sequence cross-references another
981    */
 
982  1 toggle @Test(groups = { "Functional" })
983    public void testHasCrossRef()
984    {
985  1 assertFalse(AlignmentUtils.hasCrossRef(null, null));
986  1 SequenceI seq1 = new Sequence("EMBL|A12345", "ABCDEF");
987  1 assertFalse(AlignmentUtils.hasCrossRef(seq1, null));
988  1 assertFalse(AlignmentUtils.hasCrossRef(null, seq1));
989  1 SequenceI seq2 = new Sequence("UNIPROT|V20192", "ABCDEF");
990  1 assertFalse(AlignmentUtils.hasCrossRef(seq1, seq2));
991   
992    // different ref
993  1 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "v20193"));
994  1 assertFalse(AlignmentUtils.hasCrossRef(seq1, seq2));
995   
996    // case-insensitive; version number is ignored
997  1 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "v20192"));
998  1 assertTrue(AlignmentUtils.hasCrossRef(seq1, seq2));
999   
1000    // right case!
1001  1 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "V20192"));
1002  1 assertTrue(AlignmentUtils.hasCrossRef(seq1, seq2));
1003    // test is one-way only
1004  1 assertFalse(AlignmentUtils.hasCrossRef(seq2, seq1));
1005    }
1006   
1007    /**
1008    * Tests for the method that checks if either sequence cross-references the
1009    * other
1010    */
 
1011  1 toggle @Test(groups = { "Functional" })
1012    public void testHaveCrossRef()
1013    {
1014  1 assertFalse(AlignmentUtils.hasCrossRef(null, null));
1015  1 SequenceI seq1 = new Sequence("EMBL|A12345", "ABCDEF");
1016  1 assertFalse(AlignmentUtils.haveCrossRef(seq1, null));
1017  1 assertFalse(AlignmentUtils.haveCrossRef(null, seq1));
1018  1 SequenceI seq2 = new Sequence("UNIPROT|V20192", "ABCDEF");
1019  1 assertFalse(AlignmentUtils.haveCrossRef(seq1, seq2));
1020   
1021  1 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "V20192"));
1022  1 assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2));
1023    // next is true for haveCrossRef, false for hasCrossRef
1024  1 assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1));
1025   
1026    // now the other way round
1027  1 seq1.setDBRefs(null);
1028  1 seq2.addDBRef(new DBRefEntry("EMBL", "1", "A12345"));
1029  1 assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2));
1030  1 assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1));
1031   
1032    // now both ways
1033  1 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "V20192"));
1034  1 assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2));
1035  1 assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1));
1036    }
1037   
1038    /**
1039    * Test the method that extracts the cds-only part of a dna alignment.
1040    */
 
1041  1 toggle @Test(groups = { "Functional" })
1042    public void testMakeCdsAlignment()
1043    {
1044    /*
1045    * scenario:
1046    * dna1 --> [4, 6] [10,12] --> pep1
1047    * dna2 --> [1, 3] [7, 9] [13,15] --> pep2
1048    */
1049  1 SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
1050  1 SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC");
1051  1 SequenceI pep1 = new Sequence("pep1", "GF");
1052  1 SequenceI pep2 = new Sequence("pep2", "GFP");
1053  1 pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "pep1"));
1054  1 pep2.addDBRef(new DBRefEntry("UNIPROT", "0", "pep2"));
1055  1 dna1.createDatasetSequence();
1056  1 dna2.createDatasetSequence();
1057  1 pep1.createDatasetSequence();
1058  1 pep2.createDatasetSequence();
1059  1 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 });
1060  1 dna.setDataset(null);
1061   
1062    /*
1063    * put a variant feature on dna2 base 8
1064    * - should transfer to cds2 base 5
1065    */
1066  1 dna2.addSequenceFeature(
1067    new SequenceFeature("variant", "hgmd", 8, 8, 0f, null));
1068   
1069    /*
1070    * need a sourceDbRef if we are to construct dbrefs to the CDS
1071    * sequence from the dna contig sequences
1072    */
1073  1 DBRefEntry dbref = new DBRefEntry("ENSEMBL", "0", "dna1");
1074  1 dna1.getDatasetSequence().addDBRef(dbref);
1075  1 org.testng.Assert.assertEquals(dbref, dna1.getPrimaryDBRefs().get(0));
1076  1 dbref = new DBRefEntry("ENSEMBL", "0", "dna2");
1077  1 dna2.getDatasetSequence().addDBRef(dbref);
1078  1 org.testng.Assert.assertEquals(dbref, dna2.getPrimaryDBRefs().get(0));
1079   
1080    /*
1081    * CDS sequences are 'discovered' from dna-to-protein mappings on the alignment
1082    * dataset (e.g. added from dbrefs by CrossRef.findXrefSequences)
1083    */
1084  1 MapList mapfordna1 = new MapList(new int[] { 4, 6, 10, 12 },
1085    new int[]
1086    { 1, 2 }, 3, 1);
1087  1 AlignedCodonFrame acf = new AlignedCodonFrame();
1088  1 acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(),
1089    mapfordna1);
1090  1 dna.addCodonFrame(acf);
1091  1 MapList mapfordna2 = new MapList(new int[] { 1, 3, 7, 9, 13, 15 },
1092    new int[]
1093    { 1, 3 }, 3, 1);
1094  1 acf = new AlignedCodonFrame();
1095  1 acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(),
1096    mapfordna2);
1097  1 dna.addCodonFrame(acf);
1098   
1099    /*
1100    * In this case, mappings originally came from matching Uniprot accessions
1101    * - so need an xref on dna involving those regions.
1102    * These are normally constructed from CDS annotation
1103    */
1104  1 DBRefEntry dna1xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep1",
1105    new Mapping(mapfordna1));
1106  1 dna1.addDBRef(dna1xref);
1107  1 assertEquals(2, dna1.getDBRefs().size()); // to self and to pep1
1108  1 DBRefEntry dna2xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep2",
1109    new Mapping(mapfordna2));
1110  1 dna2.addDBRef(dna2xref);
1111  1 assertEquals(2, dna2.getDBRefs().size()); // to self and to pep2
1112   
1113    /*
1114    * execute method under test:
1115    */
1116  1 AlignmentI cds = AlignmentUtils
1117    .makeCdsAlignment(new SequenceI[]
1118    { dna1, dna2 }, dna.getDataset(), null);
1119   
1120    /*
1121    * verify cds sequences
1122    */
1123  1 assertEquals(2, cds.getSequences().size());
1124  1 assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString());
1125  1 assertEquals("GGGTTTCCC", cds.getSequenceAt(1).getSequenceAsString());
1126   
1127    /*
1128    * verify shared, extended alignment dataset
1129    */
1130  1 assertSame(dna.getDataset(), cds.getDataset());
1131  1 SequenceI cds1Dss = cds.getSequenceAt(0).getDatasetSequence();
1132  1 SequenceI cds2Dss = cds.getSequenceAt(1).getDatasetSequence();
1133  1 assertTrue(dna.getDataset().getSequences().contains(cds1Dss));
1134  1 assertTrue(dna.getDataset().getSequences().contains(cds2Dss));
1135   
1136    /*
1137    * verify CDS has a dbref with mapping to peptide
1138    */
1139  1 assertNotNull(cds1Dss.getDBRefs());
1140  1 assertEquals(2, cds1Dss.getDBRefs().size());
1141  1 dbref = cds1Dss.getDBRefs().get(0);
1142  1 assertEquals(dna1xref.getSource(), dbref.getSource());
1143    // version is via ensembl's primary ref
1144  1 assertEquals(dna1xref.getVersion(), dbref.getVersion());
1145  1 assertEquals(dna1xref.getAccessionId(), dbref.getAccessionId());
1146  1 assertNotNull(dbref.getMap());
1147  1 assertSame(pep1.getDatasetSequence(), dbref.getMap().getTo());
1148  1 MapList cdsMapping = new MapList(new int[] { 1, 6 }, new int[] { 1, 2 },
1149    3, 1);
1150  1 assertEquals(cdsMapping, dbref.getMap().getMap());
1151   
1152    /*
1153    * verify peptide has added a dbref with reverse mapping to CDS
1154    */
1155  1 assertNotNull(pep1.getDBRefs());
1156    // FIXME pep1.getDBRefs() is 1 - is that the correct behaviour ?
1157  1 assertEquals(2, pep1.getDBRefs().size());
1158  1 dbref = pep1.getDBRefs().get(1);
1159  1 assertEquals("ENSEMBL", dbref.getSource());
1160  1 assertEquals("0", dbref.getVersion());
1161  1 assertEquals("CDS|dna1", dbref.getAccessionId());
1162  1 assertNotNull(dbref.getMap());
1163  1 assertSame(cds1Dss, dbref.getMap().getTo());
1164  1 assertEquals(cdsMapping.getInverse(), dbref.getMap().getMap());
1165   
1166    /*
1167    * verify cDNA has added a dbref with mapping to CDS
1168    */
1169  1 assertEquals(3, dna1.getDBRefs().size());
1170  1 DBRefEntry dbRefEntry = dna1.getDBRefs().get(2);
1171  1 assertSame(cds1Dss, dbRefEntry.getMap().getTo());
1172  1 MapList dnaToCdsMapping = new MapList(new int[] { 4, 6, 10, 12 },
1173    new int[]
1174    { 1, 6 }, 1, 1);
1175  1 assertEquals(dnaToCdsMapping, dbRefEntry.getMap().getMap());
1176  1 assertEquals(3, dna2.getDBRefs().size());
1177  1 dbRefEntry = dna2.getDBRefs().get(2);
1178  1 assertSame(cds2Dss, dbRefEntry.getMap().getTo());
1179  1 dnaToCdsMapping = new MapList(new int[] { 1, 3, 7, 9, 13, 15 },
1180    new int[]
1181    { 1, 9 }, 1, 1);
1182  1 assertEquals(dnaToCdsMapping, dbRefEntry.getMap().getMap());
1183   
1184    /*
1185    * verify CDS has added a dbref with mapping to cDNA
1186    */
1187  1 assertEquals(2, cds1Dss.getDBRefs().size());
1188  1 dbRefEntry = cds1Dss.getDBRefs().get(1);
1189  1 assertSame(dna1.getDatasetSequence(), dbRefEntry.getMap().getTo());
1190  1 MapList cdsToDnaMapping = new MapList(new int[] { 1, 6 },
1191    new int[]
1192    { 4, 6, 10, 12 }, 1, 1);
1193  1 assertEquals(cdsToDnaMapping, dbRefEntry.getMap().getMap());
1194  1 assertEquals(2, cds2Dss.getDBRefs().size());
1195  1 dbRefEntry = cds2Dss.getDBRefs().get(1);
1196  1 assertSame(dna2.getDatasetSequence(), dbRefEntry.getMap().getTo());
1197  1 cdsToDnaMapping = new MapList(new int[] { 1, 9 },
1198    new int[]
1199    { 1, 3, 7, 9, 13, 15 }, 1, 1);
1200  1 assertEquals(cdsToDnaMapping, dbRefEntry.getMap().getMap());
1201   
1202    /*
1203    * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide
1204    * the mappings are on the shared alignment dataset
1205    * 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep)
1206    */
1207  1 List<AlignedCodonFrame> cdsMappings = cds.getDataset().getCodonFrames();
1208  1 assertEquals(6, cdsMappings.size());
1209   
1210    /*
1211    * verify that mapping sets for dna and cds alignments are different
1212    * [not current behaviour - all mappings are on the alignment dataset]
1213    */
1214    // select -> subselect type to test.
1215    // Assert.assertNotSame(dna.getCodonFrames(), cds.getCodonFrames());
1216    // assertEquals(4, dna.getCodonFrames().size());
1217    // assertEquals(4, cds.getCodonFrames().size());
1218   
1219    /*
1220    * Two mappings involve pep1 (dna to pep1, cds to pep1)
1221    * Mapping from pep1 to GGGTTT in first new exon sequence
1222    */
1223  1 List<AlignedCodonFrame> pep1Mappings = MappingUtils
1224    .findMappingsForSequence(pep1, cdsMappings);
1225  1 assertEquals(2, pep1Mappings.size());
1226  1 List<AlignedCodonFrame> mappings = MappingUtils
1227    .findMappingsForSequence(cds.getSequenceAt(0), pep1Mappings);
1228  1 assertEquals(1, mappings.size());
1229   
1230    // map G to GGG
1231  1 SearchResultsI sr = MappingUtils.buildSearchResults(pep1, 1, mappings);
1232  1 assertEquals(1, sr.getResults().size());
1233  1 SearchResultMatchI m = sr.getResults().get(0);
1234  1 assertSame(cds1Dss, m.getSequence());
1235  1 assertEquals(1, m.getStart());
1236  1 assertEquals(3, m.getEnd());
1237    // map F to TTT
1238  1 sr = MappingUtils.buildSearchResults(pep1, 2, mappings);
1239  1 m = sr.getResults().get(0);
1240  1 assertSame(cds1Dss, m.getSequence());
1241  1 assertEquals(4, m.getStart());
1242  1 assertEquals(6, m.getEnd());
1243   
1244    /*
1245    * Two mappings involve pep2 (dna to pep2, cds to pep2)
1246    * Verify mapping from pep2 to GGGTTTCCC in second new exon sequence
1247    */
1248  1 List<AlignedCodonFrame> pep2Mappings = MappingUtils
1249    .findMappingsForSequence(pep2, cdsMappings);
1250  1 assertEquals(2, pep2Mappings.size());
1251  1 mappings = MappingUtils.findMappingsForSequence(cds.getSequenceAt(1),
1252    pep2Mappings);
1253  1 assertEquals(1, mappings.size());
1254    // map G to GGG
1255  1 sr = MappingUtils.buildSearchResults(pep2, 1, mappings);
1256  1 assertEquals(1, sr.getResults().size());
1257  1 m = sr.getResults().get(0);
1258  1 assertSame(cds2Dss, m.getSequence());
1259  1 assertEquals(1, m.getStart());
1260  1 assertEquals(3, m.getEnd());
1261    // map F to TTT
1262  1 sr = MappingUtils.buildSearchResults(pep2, 2, mappings);
1263  1 m = sr.getResults().get(0);
1264  1 assertSame(cds2Dss, m.getSequence());
1265  1 assertEquals(4, m.getStart());
1266  1 assertEquals(6, m.getEnd());
1267    // map P to CCC
1268  1 sr = MappingUtils.buildSearchResults(pep2, 3, mappings);
1269  1 m = sr.getResults().get(0);
1270  1 assertSame(cds2Dss, m.getSequence());
1271  1 assertEquals(7, m.getStart());
1272  1 assertEquals(9, m.getEnd());
1273   
1274    /*
1275    * check cds2 acquired a variant feature in position 5
1276    */
1277  1 List<SequenceFeature> sfs = cds2Dss.getSequenceFeatures();
1278  1 assertNotNull(sfs);
1279  1 assertEquals(1, sfs.size());
1280  1 assertEquals("variant", sfs.get(0).type);
1281  1 assertEquals(5, sfs.get(0).begin);
1282  1 assertEquals(5, sfs.get(0).end);
1283    }
1284   
1285    /**
1286    * Test the method that makes a cds-only alignment from a DNA sequence and its
1287    * product mappings, for the case where there are multiple exon mappings to
1288    * different protein products.
1289    */
 
1290  1 toggle @Test(groups = { "Functional" })
1291    public void testMakeCdsAlignment_multipleProteins()
1292    {
1293  1 SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
1294  1 SequenceI pep1 = new Sequence("pep1", "GF"); // GGGTTT
1295  1 SequenceI pep2 = new Sequence("pep2", "KP"); // aaaccc
1296  1 SequenceI pep3 = new Sequence("pep3", "KF"); // aaaTTT
1297  1 dna1.createDatasetSequence();
1298  1 pep1.createDatasetSequence();
1299  1 pep2.createDatasetSequence();
1300  1 pep3.createDatasetSequence();
1301  1 pep1.getDatasetSequence()
1302    .addDBRef(new DBRefEntry("EMBLCDS", "2", "A12345"));
1303  1 pep2.getDatasetSequence()
1304    .addDBRef(new DBRefEntry("EMBLCDS", "3", "A12346"));
1305  1 pep3.getDatasetSequence()
1306    .addDBRef(new DBRefEntry("EMBLCDS", "4", "A12347"));
1307   
1308    /*
1309    * Create the CDS alignment
1310    */
1311  1 AlignmentI dna = new Alignment(new SequenceI[] { dna1 });
1312  1 dna.setDataset(null);
1313   
1314    /*
1315    * Make the mappings from dna to protein
1316    */
1317    // map ...GGG...TTT to GF
1318  1 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
1319    new int[]
1320    { 1, 2 }, 3, 1);
1321  1 AlignedCodonFrame acf = new AlignedCodonFrame();
1322  1 acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
1323  1 dna.addCodonFrame(acf);
1324   
1325    // map aaa...ccc to KP
1326  1 map = new MapList(new int[] { 1, 3, 7, 9 }, new int[] { 1, 2 }, 3, 1);
1327  1 acf = new AlignedCodonFrame();
1328  1 acf.addMap(dna1.getDatasetSequence(), pep2.getDatasetSequence(), map);
1329  1 dna.addCodonFrame(acf);
1330   
1331    // map aaa......TTT to KF
1332  1 map = new MapList(new int[] { 1, 3, 10, 12 }, new int[] { 1, 2 }, 3, 1);
1333  1 acf = new AlignedCodonFrame();
1334  1 acf.addMap(dna1.getDatasetSequence(), pep3.getDatasetSequence(), map);
1335  1 dna.addCodonFrame(acf);
1336   
1337    /*
1338    * execute method under test
1339    */
1340  1 AlignmentI cdsal = AlignmentUtils
1341    .makeCdsAlignment(new SequenceI[]
1342    { dna1 }, dna.getDataset(), null);
1343   
1344    /*
1345    * Verify we have 3 cds sequences, mapped to pep1/2/3 respectively
1346    */
1347  1 List<SequenceI> cds = cdsal.getSequences();
1348  1 assertEquals(3, cds.size());
1349   
1350    /*
1351    * verify shared, extended alignment dataset
1352    */
1353  1 assertSame(cdsal.getDataset(), dna.getDataset());
1354  1 assertTrue(dna.getDataset().getSequences()
1355    .contains(cds.get(0).getDatasetSequence()));
1356  1 assertTrue(dna.getDataset().getSequences()
1357    .contains(cds.get(1).getDatasetSequence()));
1358  1 assertTrue(dna.getDataset().getSequences()
1359    .contains(cds.get(2).getDatasetSequence()));
1360   
1361    /*
1362    * verify aligned cds sequences and their xrefs
1363    */
1364  1 SequenceI cdsSeq = cds.get(0);
1365  1 assertEquals("GGGTTT", cdsSeq.getSequenceAsString());
1366    // assertEquals("dna1|A12345", cdsSeq.getName());
1367  1 assertEquals("CDS|dna1", cdsSeq.getName());
1368    // assertEquals(1, cdsSeq.getDBRefs().length);
1369    // DBRefEntry cdsRef = cdsSeq.getDBRefs()[0];
1370    // assertEquals("EMBLCDS", cdsRef.getSource());
1371    // assertEquals("2", cdsRef.getVersion());
1372    // assertEquals("A12345", cdsRef.getAccessionId());
1373   
1374  1 cdsSeq = cds.get(1);
1375  1 assertEquals("aaaccc", cdsSeq.getSequenceAsString());
1376    // assertEquals("dna1|A12346", cdsSeq.getName());
1377  1 assertEquals("CDS|dna1", cdsSeq.getName());
1378    // assertEquals(1, cdsSeq.getDBRefs().length);
1379    // cdsRef = cdsSeq.getDBRefs()[0];
1380    // assertEquals("EMBLCDS", cdsRef.getSource());
1381    // assertEquals("3", cdsRef.getVersion());
1382    // assertEquals("A12346", cdsRef.getAccessionId());
1383   
1384  1 cdsSeq = cds.get(2);
1385  1 assertEquals("aaaTTT", cdsSeq.getSequenceAsString());
1386    // assertEquals("dna1|A12347", cdsSeq.getName());
1387  1 assertEquals("CDS|dna1", cdsSeq.getName());
1388    // assertEquals(1, cdsSeq.getDBRefs().length);
1389    // cdsRef = cdsSeq.getDBRefs()[0];
1390    // assertEquals("EMBLCDS", cdsRef.getSource());
1391    // assertEquals("4", cdsRef.getVersion());
1392    // assertEquals("A12347", cdsRef.getAccessionId());
1393   
1394    /*
1395    * Verify there are mappings from each cds sequence to its protein product
1396    * and also to its dna source
1397    */
1398  1 List<AlignedCodonFrame> newMappings = cdsal.getCodonFrames();
1399   
1400    /*
1401    * 6 mappings involve dna1 (to pep1/2/3, cds1/2/3)
1402    */
1403  1 List<AlignedCodonFrame> dnaMappings = MappingUtils
1404    .findMappingsForSequence(dna1, newMappings);
1405  1 assertEquals(6, dnaMappings.size());
1406   
1407    /*
1408    * dna1 to pep1
1409    */
1410  1 List<AlignedCodonFrame> mappings = MappingUtils
1411    .findMappingsForSequence(pep1, dnaMappings);
1412  1 assertEquals(1, mappings.size());
1413  1 assertEquals(1, mappings.get(0).getMappings().size());
1414  1 assertSame(pep1.getDatasetSequence(),
1415    mappings.get(0).getMappings().get(0).getMapping().getTo());
1416   
1417    /*
1418    * dna1 to cds1
1419    */
1420  1 List<AlignedCodonFrame> dnaToCds1Mappings = MappingUtils
1421    .findMappingsForSequence(cds.get(0), dnaMappings);
1422  1 Mapping mapping = dnaToCds1Mappings.get(0).getMappings().get(0)
1423    .getMapping();
1424  1 assertSame(cds.get(0).getDatasetSequence(), mapping.getTo());
1425  1 assertEquals("G(1) in CDS should map to G(4) in DNA", 4,
1426    mapping.getMap().getToPosition(1));
1427   
1428    /*
1429    * dna1 to pep2
1430    */
1431  1 mappings = MappingUtils.findMappingsForSequence(pep2, dnaMappings);
1432  1 assertEquals(1, mappings.size());
1433  1 assertEquals(1, mappings.get(0).getMappings().size());
1434  1 assertSame(pep2.getDatasetSequence(),
1435    mappings.get(0).getMappings().get(0).getMapping().getTo());
1436   
1437    /*
1438    * dna1 to cds2
1439    */
1440  1 List<AlignedCodonFrame> dnaToCds2Mappings = MappingUtils
1441    .findMappingsForSequence(cds.get(1), dnaMappings);
1442  1 mapping = dnaToCds2Mappings.get(0).getMappings().get(0).getMapping();
1443  1 assertSame(cds.get(1).getDatasetSequence(), mapping.getTo());
1444  1 assertEquals("c(4) in CDS should map to c(7) in DNA", 7,
1445    mapping.getMap().getToPosition(4));
1446   
1447    /*
1448    * dna1 to pep3
1449    */
1450  1 mappings = MappingUtils.findMappingsForSequence(pep3, dnaMappings);
1451  1 assertEquals(1, mappings.size());
1452  1 assertEquals(1, mappings.get(0).getMappings().size());
1453  1 assertSame(pep3.getDatasetSequence(),
1454    mappings.get(0).getMappings().get(0).getMapping().getTo());
1455   
1456    /*
1457    * dna1 to cds3
1458    */
1459  1 List<AlignedCodonFrame> dnaToCds3Mappings = MappingUtils
1460    .findMappingsForSequence(cds.get(2), dnaMappings);
1461  1 mapping = dnaToCds3Mappings.get(0).getMappings().get(0).getMapping();
1462  1 assertSame(cds.get(2).getDatasetSequence(), mapping.getTo());
1463  1 assertEquals("T(4) in CDS should map to T(10) in DNA", 10,
1464    mapping.getMap().getToPosition(4));
1465    }
1466   
 
1467  1 toggle @Test(groups = { "Functional" })
1468    public void testIsMappable()
1469    {
1470  1 SequenceI dna1 = new Sequence("dna1", "cgCAGtgGT");
1471  1 SequenceI aa1 = new Sequence("aa1", "RSG");
1472  1 SequenceI td1 = new Sequence("aa1", "QRV");
1473  1 SequenceI td2 = new Sequence("aa2", "QRV");
1474  1 AlignmentI al1 = new Alignment(new SequenceI[] { dna1 });
1475  1 AlignmentI al2 = new Alignment(new SequenceI[] { aa1 });
1476  1 AlignmentI al3 = new Alignment(new SequenceI[] { td1 });
1477  1 AlignmentI al4 = new Alignment(new SequenceI[] { td2 });
1478   
1479  1 assertFalse(AlignmentUtils.isMappable(null, null));
1480  1 assertFalse(AlignmentUtils.isMappable(al1, null));
1481  1 assertFalse(AlignmentUtils.isMappable(null, al1));
1482  1 assertFalse(AlignmentUtils.isMappable(al1, al1));
1483  1 assertFalse(AlignmentUtils.isMappable(al2, al2));
1484   
1485   
1486  1 assertTrue(AlignmentUtils.isMappable(al1, al2));
1487  1 assertTrue(AlignmentUtils.isMappable(al2, al1));
1488   
1489   
1490    // test 3di/peptide mappability
1491  1 assertFalse(AlignmentUtils.isMappable(al1, al3));
1492  1 assertFalse(AlignmentUtils.isMappable(al2, al4));
1493  1 assertFalse(AlignmentUtils.isMappable(al3, al4));
1494   
1495   
1496  1 assertTrue(AlignmentUtils.isMappable(al2, al3));
1497  1 assertTrue(AlignmentUtils.isMappable(al3, al2));
1498    }
1499   
1500    /**
1501    * Test creating a mapping when the sequences involved do not start at residue
1502    * 1
1503    *
1504    * @throws IOException
1505    */
 
1506  1 toggle @Test(groups = { "Functional" })
1507    public void testMapCdnaToProtein_forSubsequence() throws IOException
1508    {
1509  1 SequenceI prot = new Sequence("UNIPROT|V12345", "E-I--Q", 10, 12);
1510  1 prot.createDatasetSequence();
1511   
1512  1 SequenceI dna = new Sequence("EMBL|A33333", "GAA--AT-C-CAG", 40, 48);
1513  1 dna.createDatasetSequence();
1514   
1515  1 MapList map = AlignmentUtils.mapCdnaToProtein(prot, dna);
1516  1 assertEquals(10, map.getToLowest());
1517  1 assertEquals(12, map.getToHighest());
1518  1 assertEquals(40, map.getFromLowest());
1519  1 assertEquals(48, map.getFromHighest());
1520    }
1521   
1522    /**
1523    * Test for the alignSequenceAs method where we have protein mapped to protein
1524    */
 
1525  1 toggle @Test(groups = { "Functional" })
1526    public void testAlignSequenceAs_mappedProteinProtein()
1527    {
1528   
1529  1 SequenceI alignMe = new Sequence("Match", "MGAASEV");
1530  1 alignMe.createDatasetSequence();
1531  1 SequenceI alignFrom = new Sequence("Query", "LQTGYMGAASEVMFSPTRR");
1532  1 alignFrom.createDatasetSequence();
1533   
1534  1 AlignedCodonFrame acf = new AlignedCodonFrame();
1535    // this is like a domain or motif match of part of a peptide sequence
1536  1 MapList map = new MapList(new int[] { 6, 12 }, new int[] { 1, 7 }, 1,
1537    1);
1538  1 acf.addMap(alignFrom.getDatasetSequence(), alignMe.getDatasetSequence(),
1539    map);
1540   
1541  1 AlignmentUtils.alignSequenceAs(alignMe, alignFrom, acf, "-", '-', true,
1542    true);
1543  1 assertEquals("-----MGAASEV-------", alignMe.getSequenceAsString());
1544    }
1545   
1546    /**
1547    * Test for the alignSequenceAs method where there are trailing unmapped
1548    * residues in the model sequence
1549    */
 
1550  1 toggle @Test(groups = { "Functional" })
1551    public void testAlignSequenceAs_withTrailingPeptide()
1552    {
1553    // map first 3 codons to KPF; G is a trailing unmapped residue
1554  1 MapList map = new MapList(new int[] { 1, 9 }, new int[] { 1, 3 }, 3, 1);
1555   
1556  1 checkAlignSequenceAs("AAACCCTTT", "K-PFG", true, true, map,
1557    "AAA---CCCTTT---");
1558    }
1559   
1560    /**
1561    * Tests for transferring features between mapped sequences
1562    */
 
1563  1 toggle @Test(groups = { "Functional" })
1564    public void testTransferFeatures()
1565    {
1566  1 SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt");
1567  1 SequenceI cds = new Sequence("cds/10-15", "TAGGCC");
1568   
1569    // no overlap
1570  1 dna.addSequenceFeature(
1571    new SequenceFeature("type1", "desc1", 1, 2, 1f, null));
1572    // partial overlap - to [1, 1]
1573  1 dna.addSequenceFeature(
1574    new SequenceFeature("type2", "desc2", 3, 4, 2f, null));
1575    // exact overlap - to [1, 3]
1576  1 dna.addSequenceFeature(
1577    new SequenceFeature("type3", "desc3", 4, 6, 3f, null));
1578    // spanning overlap - to [2, 5]
1579  1 dna.addSequenceFeature(
1580    new SequenceFeature("type4", "desc4", 5, 11, 4f, null));
1581    // exactly overlaps whole mapped range [1, 6]
1582  1 dna.addSequenceFeature(
1583    new SequenceFeature("type5", "desc5", 4, 12, 5f, null));
1584    // no overlap (internal)
1585  1 dna.addSequenceFeature(
1586    new SequenceFeature("type6", "desc6", 7, 9, 6f, null));
1587    // no overlap (3' end)
1588  1 dna.addSequenceFeature(
1589    new SequenceFeature("type7", "desc7", 13, 15, 7f, null));
1590    // overlap (3' end) - to [6, 6]
1591  1 dna.addSequenceFeature(
1592    new SequenceFeature("type8", "desc8", 12, 12, 8f, null));
1593    // extended overlap - to [6, +]
1594  1 dna.addSequenceFeature(
1595    new SequenceFeature("type9", "desc9", 12, 13, 9f, null));
1596   
1597  1 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
1598    new int[]
1599    { 1, 6 }, 1, 1);
1600   
1601    /*
1602    * transferFeatures() will build 'partial overlap' for regions
1603    * that partially overlap 5' or 3' (start or end) of target sequence
1604    */
1605  1 AlignmentUtils.transferFeatures(dna, cds, map, null);
1606  1 List<SequenceFeature> sfs = cds.getSequenceFeatures();
1607  1 assertEquals(6, sfs.size());
1608   
1609  1 SequenceFeature sf = sfs.get(0);
1610  1 assertEquals("type2", sf.getType());
1611  1 assertEquals("desc2", sf.getDescription());
1612  1 assertEquals(2f, sf.getScore());
1613  1 assertEquals(1, sf.getBegin());
1614  1 assertEquals(1, sf.getEnd());
1615   
1616  1 sf = sfs.get(1);
1617  1 assertEquals("type3", sf.getType());
1618  1 assertEquals("desc3", sf.getDescription());
1619  1 assertEquals(3f, sf.getScore());
1620  1 assertEquals(1, sf.getBegin());
1621  1 assertEquals(3, sf.getEnd());
1622   
1623  1 sf = sfs.get(2);
1624  1 assertEquals("type4", sf.getType());
1625  1 assertEquals(2, sf.getBegin());
1626  1 assertEquals(5, sf.getEnd());
1627   
1628  1 sf = sfs.get(3);
1629  1 assertEquals("type5", sf.getType());
1630  1 assertEquals(1, sf.getBegin());
1631  1 assertEquals(6, sf.getEnd());
1632   
1633  1 sf = sfs.get(4);
1634  1 assertEquals("type8", sf.getType());
1635  1 assertEquals(6, sf.getBegin());
1636  1 assertEquals(6, sf.getEnd());
1637   
1638  1 sf = sfs.get(5);
1639  1 assertEquals("type9", sf.getType());
1640  1 assertEquals(6, sf.getBegin());
1641  1 assertEquals(6, sf.getEnd());
1642    }
1643   
1644    /**
1645    * Tests for transferring features between mapped sequences
1646    */
 
1647  1 toggle @Test(groups = { "Functional" })
1648    public void testTransferFeatures_withOmit()
1649    {
1650  1 SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt");
1651  1 SequenceI cds = new Sequence("cds/10-15", "TAGGCC");
1652   
1653  1 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
1654    new int[]
1655    { 1, 6 }, 1, 1);
1656   
1657    // [5, 11] maps to [2, 5]
1658  1 dna.addSequenceFeature(
1659    new SequenceFeature("type4", "desc4", 5, 11, 4f, null));
1660    // [4, 12] maps to [1, 6]
1661  1 dna.addSequenceFeature(
1662    new SequenceFeature("type5", "desc5", 4, 12, 5f, null));
1663    // [12, 12] maps to [6, 6]
1664  1 dna.addSequenceFeature(
1665    new SequenceFeature("type8", "desc8", 12, 12, 8f, null));
1666   
1667    // desc4 and desc8 are the 'omit these' varargs
1668  1 AlignmentUtils.transferFeatures(dna, cds, map, null, "type4", "type8");
1669  1 List<SequenceFeature> sfs = cds.getSequenceFeatures();
1670  1 assertEquals(1, sfs.size());
1671   
1672  1 SequenceFeature sf = sfs.get(0);
1673  1 assertEquals("type5", sf.getType());
1674  1 assertEquals(1, sf.getBegin());
1675  1 assertEquals(6, sf.getEnd());
1676    }
1677   
1678    /**
1679    * Tests for transferring features between mapped sequences
1680    */
 
1681  1 toggle @Test(groups = { "Functional" })
1682    public void testTransferFeatures_withSelect()
1683    {
1684  1 SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt");
1685  1 SequenceI cds = new Sequence("cds/10-15", "TAGGCC");
1686   
1687  1 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
1688    new int[]
1689    { 1, 6 }, 1, 1);
1690   
1691    // [5, 11] maps to [2, 5]
1692  1 dna.addSequenceFeature(
1693    new SequenceFeature("type4", "desc4", 5, 11, 4f, null));
1694    // [4, 12] maps to [1, 6]
1695  1 dna.addSequenceFeature(
1696    new SequenceFeature("type5", "desc5", 4, 12, 5f, null));
1697    // [12, 12] maps to [6, 6]
1698  1 dna.addSequenceFeature(
1699    new SequenceFeature("type8", "desc8", 12, 12, 8f, null));
1700   
1701    // "type5" is the 'select this type' argument
1702  1 AlignmentUtils.transferFeatures(dna, cds, map, "type5");
1703  1 List<SequenceFeature> sfs = cds.getSequenceFeatures();
1704  1 assertEquals(1, sfs.size());
1705   
1706  1 SequenceFeature sf = sfs.get(0);
1707  1 assertEquals("type5", sf.getType());
1708  1 assertEquals(1, sf.getBegin());
1709  1 assertEquals(6, sf.getEnd());
1710    }
1711   
1712    /**
1713    * Test the method that extracts the cds-only part of a dna alignment, for the
1714    * case where the cds should be aligned to match its nucleotide sequence.
1715    */
 
1716  1 toggle @Test(groups = { "Functional" })
1717    public void testMakeCdsAlignment_alternativeTranscripts()
1718    {
1719  1 SequenceI dna1 = new Sequence("dna1", "aaaGGGCC-----CTTTaaaGGG");
1720    // alternative transcript of same dna skips CCC codon
1721  1 SequenceI dna2 = new Sequence("dna2", "aaaGGGCC-----cttTaaaGGG");
1722    // dna3 has no mapping (protein product) so should be ignored here
1723  1 SequenceI dna3 = new Sequence("dna3", "aaaGGGCCCCCGGGcttTaaaGGG");
1724  1 SequenceI pep1 = new Sequence("pep1", "GPFG");
1725  1 SequenceI pep2 = new Sequence("pep2", "GPG");
1726  1 dna1.createDatasetSequence();
1727  1 dna2.createDatasetSequence();
1728  1 dna3.createDatasetSequence();
1729  1 pep1.createDatasetSequence();
1730  1 pep2.createDatasetSequence();
1731   
1732  1 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 });
1733  1 dna.setDataset(null);
1734   
1735  1 MapList map = new MapList(new int[] { 4, 12, 16, 18 },
1736    new int[]
1737    { 1, 4 }, 3, 1);
1738  1 AlignedCodonFrame acf = new AlignedCodonFrame();
1739  1 acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
1740  1 dna.addCodonFrame(acf);
1741  1 map = new MapList(new int[] { 4, 8, 12, 12, 16, 18 },
1742    new int[]
1743    { 1, 3 }, 3, 1);
1744  1 acf = new AlignedCodonFrame();
1745  1 acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
1746  1 dna.addCodonFrame(acf);
1747   
1748  1 AlignmentI cds = AlignmentUtils
1749    .makeCdsAlignment(new SequenceI[]
1750    { dna1, dna2, dna3 }, dna.getDataset(), null);
1751  1 List<SequenceI> cdsSeqs = cds.getSequences();
1752  1 assertEquals(2, cdsSeqs.size());
1753  1 assertEquals("GGGCCCTTTGGG", cdsSeqs.get(0).getSequenceAsString());
1754  1 assertEquals("GGGCCTGGG", cdsSeqs.get(1).getSequenceAsString());
1755   
1756    /*
1757    * verify shared, extended alignment dataset
1758    */
1759  1 assertSame(dna.getDataset(), cds.getDataset());
1760  1 assertTrue(dna.getDataset().getSequences()
1761    .contains(cdsSeqs.get(0).getDatasetSequence()));
1762  1 assertTrue(dna.getDataset().getSequences()
1763    .contains(cdsSeqs.get(1).getDatasetSequence()));
1764   
1765    /*
1766    * Verify 6 mappings: dna1 to cds1, cds1 to pep1, dna1 to pep1
1767    * and the same for dna2/cds2/pep2
1768    */
1769  1 List<AlignedCodonFrame> mappings = cds.getCodonFrames();
1770  1 assertEquals(6, mappings.size());
1771   
1772    /*
1773    * 2 mappings involve pep1
1774    */
1775  1 List<AlignedCodonFrame> pep1Mappings = MappingUtils
1776    .findMappingsForSequence(pep1, mappings);
1777  1 assertEquals(2, pep1Mappings.size());
1778   
1779    /*
1780    * Get mapping of pep1 to cds1 and verify it
1781    * maps GPFG to 1-3,4-6,7-9,10-12
1782    */
1783  1 List<AlignedCodonFrame> pep1CdsMappings = MappingUtils
1784    .findMappingsForSequence(cds.getSequenceAt(0), pep1Mappings);
1785  1 assertEquals(1, pep1CdsMappings.size());
1786  1 SearchResultsI sr = MappingUtils.buildSearchResults(pep1, 1,
1787    pep1CdsMappings);
1788  1 assertEquals(1, sr.getResults().size());
1789  1 SearchResultMatchI m = sr.getResults().get(0);
1790  1 assertEquals(cds.getSequenceAt(0).getDatasetSequence(),
1791    m.getSequence());
1792  1 assertEquals(1, m.getStart());
1793  1 assertEquals(3, m.getEnd());
1794  1 sr = MappingUtils.buildSearchResults(pep1, 2, pep1CdsMappings);
1795  1 m = sr.getResults().get(0);
1796  1 assertEquals(4, m.getStart());
1797  1 assertEquals(6, m.getEnd());
1798  1 sr = MappingUtils.buildSearchResults(pep1, 3, pep1CdsMappings);
1799  1 m = sr.getResults().get(0);
1800  1 assertEquals(7, m.getStart());
1801  1 assertEquals(9, m.getEnd());
1802  1 sr = MappingUtils.buildSearchResults(pep1, 4, pep1CdsMappings);
1803  1 m = sr.getResults().get(0);
1804  1 assertEquals(10, m.getStart());
1805  1 assertEquals(12, m.getEnd());
1806   
1807    /*
1808    * Get mapping of pep2 to cds2 and verify it
1809    * maps GPG in pep2 to 1-3,4-6,7-9 in second CDS sequence
1810    */
1811  1 List<AlignedCodonFrame> pep2Mappings = MappingUtils
1812    .findMappingsForSequence(pep2, mappings);
1813  1 assertEquals(2, pep2Mappings.size());
1814  1 List<AlignedCodonFrame> pep2CdsMappings = MappingUtils
1815    .findMappingsForSequence(cds.getSequenceAt(1), pep2Mappings);
1816  1 assertEquals(1, pep2CdsMappings.size());
1817  1 sr = MappingUtils.buildSearchResults(pep2, 1, pep2CdsMappings);
1818  1 assertEquals(1, sr.getResults().size());
1819  1 m = sr.getResults().get(0);
1820  1 assertEquals(cds.getSequenceAt(1).getDatasetSequence(),
1821    m.getSequence());
1822  1 assertEquals(1, m.getStart());
1823  1 assertEquals(3, m.getEnd());
1824  1 sr = MappingUtils.buildSearchResults(pep2, 2, pep2CdsMappings);
1825  1 m = sr.getResults().get(0);
1826  1 assertEquals(4, m.getStart());
1827  1 assertEquals(6, m.getEnd());
1828  1 sr = MappingUtils.buildSearchResults(pep2, 3, pep2CdsMappings);
1829  1 m = sr.getResults().get(0);
1830  1 assertEquals(7, m.getStart());
1831  1 assertEquals(9, m.getEnd());
1832    }
1833   
1834    /**
1835    * Test the method that realigns protein to match mapped codon alignment.
1836    */
 
1837  1 toggle @Test(groups = { "Functional" })
1838    public void testAlignProteinAsDna_incompleteStartCodon()
1839    {
1840    // seq1: incomplete start codon (not mapped), then [3, 11]
1841  1 SequenceI dna1 = new Sequence("Seq1", "ccAAA-TTT-GGG-");
1842    // seq2 codons are [4, 5], [8, 11]
1843  1 SequenceI dna2 = new Sequence("Seq2", "ccaAA-ttT-GGG-");
1844    // seq3 incomplete start codon at 'tt'
1845  1 SequenceI dna3 = new Sequence("Seq3", "ccaaa-ttt-GGG-");
1846  1 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 });
1847  1 dna.setDataset(null);
1848   
1849    // prot1 has 'X' for incomplete start codon (not mapped)
1850  1 SequenceI prot1 = new Sequence("Seq1", "XKFG"); // X for incomplete start
1851  1 SequenceI prot2 = new Sequence("Seq2", "NG");
1852  1 SequenceI prot3 = new Sequence("Seq3", "XG"); // X for incomplete start
1853  1 AlignmentI protein = new Alignment(
1854    new SequenceI[]
1855    { prot1, prot2, prot3 });
1856  1 protein.setDataset(null);
1857   
1858    // map dna1 [3, 11] to prot1 [2, 4] KFG
1859  1 MapList map = new MapList(new int[] { 3, 11 }, new int[] { 2, 4 }, 3,
1860    1);
1861  1 AlignedCodonFrame acf = new AlignedCodonFrame();
1862  1 acf.addMap(dna1.getDatasetSequence(), prot1.getDatasetSequence(), map);
1863   
1864    // map dna2 [4, 5] [8, 11] to prot2 [1, 2] NG
1865  1 map = new MapList(new int[] { 4, 5, 8, 11 }, new int[] { 1, 2 }, 3, 1);
1866  1 acf.addMap(dna2.getDatasetSequence(), prot2.getDatasetSequence(), map);
1867   
1868    // map dna3 [9, 11] to prot3 [2, 2] G
1869  1 map = new MapList(new int[] { 9, 11 }, new int[] { 2, 2 }, 3, 1);
1870  1 acf.addMap(dna3.getDatasetSequence(), prot3.getDatasetSequence(), map);
1871   
1872  1 ArrayList<AlignedCodonFrame> acfs = new ArrayList<>();
1873  1 acfs.add(acf);
1874  1 protein.setCodonFrames(acfs);
1875   
1876    /*
1877    * verify X is included in the aligned proteins, and placed just
1878    * before the first mapped residue
1879    * CCT is between CCC and TTT
1880    */
1881  1 AlignmentUtils.alignProteinAsDna(protein, dna);
1882  1 assertEquals("XK-FG", prot1.getSequenceAsString());
1883  1 assertEquals("--N-G", prot2.getSequenceAsString());
1884  1 assertEquals("---XG", prot3.getSequenceAsString());
1885    }
1886   
1887    /**
1888    * Tests for the method that maps the subset of a dna sequence that has CDS
1889    * (or subtype) feature - case where the start codon is incomplete.
1890    */
 
1891  1 toggle @Test(groups = "Functional")
1892    public void testFindCdsPositions_fivePrimeIncomplete()
1893    {
1894  1 SequenceI dnaSeq = new Sequence("dna", "aaagGGCCCaaaTTTttt");
1895  1 dnaSeq.createDatasetSequence();
1896  1 SequenceI ds = dnaSeq.getDatasetSequence();
1897   
1898    // CDS for dna 5-6 (incomplete codon), 7-9
1899  1 SequenceFeature sf = new SequenceFeature("CDS", "", 5, 9, 0f, null);
1900  1 sf.setPhase("2"); // skip 2 bases to start of next codon
1901  1 ds.addSequenceFeature(sf);
1902    // CDS for dna 13-15
1903  1 sf = new SequenceFeature("CDS_predicted", "", 13, 15, 0f, null);
1904  1 ds.addSequenceFeature(sf);
1905   
1906  1 List<int[]> ranges = AlignmentUtils.findCdsPositions(dnaSeq);
1907   
1908    /*
1909    * check the mapping starts with the first complete codon
1910    */
1911  1 assertEquals(6, MappingUtils.getLength(ranges));
1912  1 assertEquals(2, ranges.size());
1913  1 assertEquals(7, ranges.get(0)[0]);
1914  1 assertEquals(9, ranges.get(0)[1]);
1915  1 assertEquals(13, ranges.get(1)[0]);
1916  1 assertEquals(15, ranges.get(1)[1]);
1917    }
1918   
1919    /**
1920    * Tests for the method that maps the subset of a dna sequence that has CDS
1921    * (or subtype) feature.
1922    */
 
1923  1 toggle @Test(groups = "Functional")
1924    public void testFindCdsPositions()
1925    {
1926  1 SequenceI dnaSeq = new Sequence("dna", "aaaGGGcccAAATTTttt");
1927  1 dnaSeq.createDatasetSequence();
1928  1 SequenceI ds = dnaSeq.getDatasetSequence();
1929   
1930    // CDS for dna 10-12
1931  1 SequenceFeature sf = new SequenceFeature("CDS_predicted", "", 10, 12,
1932    0f, null);
1933  1 sf.setStrand("+");
1934  1 ds.addSequenceFeature(sf);
1935    // CDS for dna 4-6
1936  1 sf = new SequenceFeature("CDS", "", 4, 6, 0f, null);
1937  1 sf.setStrand("+");
1938  1 ds.addSequenceFeature(sf);
1939    // exon feature should be ignored here
1940  1 sf = new SequenceFeature("exon", "", 7, 9, 0f, null);
1941  1 ds.addSequenceFeature(sf);
1942   
1943  1 List<int[]> ranges = AlignmentUtils.findCdsPositions(dnaSeq);
1944    /*
1945    * verify ranges { [4-6], [12-10] }
1946    * note CDS ranges are ordered ascending even if the CDS
1947    * features are not
1948    */
1949  1 assertEquals(6, MappingUtils.getLength(ranges));
1950  1 assertEquals(2, ranges.size());
1951  1 assertEquals(4, ranges.get(0)[0]);
1952  1 assertEquals(6, ranges.get(0)[1]);
1953  1 assertEquals(10, ranges.get(1)[0]);
1954  1 assertEquals(12, ranges.get(1)[1]);
1955    }
1956   
1957    /**
1958    * Tests for the method that maps the subset of a dna sequence that has CDS
1959    * (or subtype) feature, with CDS strand = '-' (reverse)
1960    */
1961    // test turned off as currently findCdsPositions is not strand-dependent
1962    // left in case it comes around again...
 
1963  0 toggle @Test(groups = "Functional", enabled = false)
1964    public void testFindCdsPositions_reverseStrand()
1965    {
1966  0 SequenceI dnaSeq = new Sequence("dna", "aaaGGGcccAAATTTttt");
1967  0 dnaSeq.createDatasetSequence();
1968  0 SequenceI ds = dnaSeq.getDatasetSequence();
1969   
1970    // CDS for dna 4-6
1971  0 SequenceFeature sf = new SequenceFeature("CDS", "", 4, 6, 0f, null);
1972  0 sf.setStrand("-");
1973  0 ds.addSequenceFeature(sf);
1974    // exon feature should be ignored here
1975  0 sf = new SequenceFeature("exon", "", 7, 9, 0f, null);
1976  0 ds.addSequenceFeature(sf);
1977    // CDS for dna 10-12
1978  0 sf = new SequenceFeature("CDS_predicted", "", 10, 12, 0f, null);
1979  0 sf.setStrand("-");
1980  0 ds.addSequenceFeature(sf);
1981   
1982  0 List<int[]> ranges = AlignmentUtils.findCdsPositions(dnaSeq);
1983    /*
1984    * verify ranges { [12-10], [6-4] }
1985    */
1986  0 assertEquals(6, MappingUtils.getLength(ranges));
1987  0 assertEquals(2, ranges.size());
1988  0 assertEquals(12, ranges.get(0)[0]);
1989  0 assertEquals(10, ranges.get(0)[1]);
1990  0 assertEquals(6, ranges.get(1)[0]);
1991  0 assertEquals(4, ranges.get(1)[1]);
1992    }
1993   
1994    /**
1995    * Tests for the method that maps the subset of a dna sequence that has CDS
1996    * (or subtype) feature - reverse strand case where the start codon is
1997    * incomplete.
1998    */
 
1999  0 toggle @Test(groups = "Functional", enabled = false)
2000    // test turned off as currently findCdsPositions is not strand-dependent
2001    // left in case it comes around again...
2002    public void testFindCdsPositions_reverseStrandThreePrimeIncomplete()
2003    {
2004  0 SequenceI dnaSeq = new Sequence("dna", "aaagGGCCCaaaTTTttt");
2005  0 dnaSeq.createDatasetSequence();
2006  0 SequenceI ds = dnaSeq.getDatasetSequence();
2007   
2008    // CDS for dna 5-9
2009  0 SequenceFeature sf = new SequenceFeature("CDS", "", 5, 9, 0f, null);
2010  0 sf.setStrand("-");
2011  0 ds.addSequenceFeature(sf);
2012    // CDS for dna 13-15
2013  0 sf = new SequenceFeature("CDS_predicted", "", 13, 15, 0f, null);
2014  0 sf.setStrand("-");
2015  0 sf.setPhase("2"); // skip 2 bases to start of next codon
2016  0 ds.addSequenceFeature(sf);
2017   
2018  0 List<int[]> ranges = AlignmentUtils.findCdsPositions(dnaSeq);
2019   
2020    /*
2021    * check the mapping starts with the first complete codon
2022    * expect ranges [13, 13], [9, 5]
2023    */
2024  0 assertEquals(6, MappingUtils.getLength(ranges));
2025  0 assertEquals(2, ranges.size());
2026  0 assertEquals(13, ranges.get(0)[0]);
2027  0 assertEquals(13, ranges.get(0)[1]);
2028  0 assertEquals(9, ranges.get(1)[0]);
2029  0 assertEquals(5, ranges.get(1)[1]);
2030    }
2031   
 
2032  1 toggle @Test(groups = "Functional")
2033    public void testAlignAs_alternateTranscriptsUngapped()
2034    {
2035  1 SequenceI dna1 = new Sequence("dna1", "cccGGGTTTaaa");
2036  1 SequenceI dna2 = new Sequence("dna2", "CCCgggtttAAA");
2037  1 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 });
2038  1 ((Alignment) dna).createDatasetAlignment();
2039  1 SequenceI cds1 = new Sequence("cds1", "GGGTTT");
2040  1 SequenceI cds2 = new Sequence("cds2", "CCCAAA");
2041  1 AlignmentI cds = new Alignment(new SequenceI[] { cds1, cds2 });
2042  1 ((Alignment) cds).createDatasetAlignment();
2043   
2044  1 AlignedCodonFrame acf = new AlignedCodonFrame();
2045  1 MapList map = new MapList(new int[] { 4, 9 }, new int[] { 1, 6 }, 1, 1);
2046  1 acf.addMap(dna1.getDatasetSequence(), cds1.getDatasetSequence(), map);
2047  1 map = new MapList(new int[] { 1, 3, 10, 12 }, new int[] { 1, 6 }, 1, 1);
2048  1 acf.addMap(dna2.getDatasetSequence(), cds2.getDatasetSequence(), map);
2049   
2050    /*
2051    * verify CDS alignment is as:
2052    * cccGGGTTTaaa (cdna)
2053    * CCCgggtttAAA (cdna)
2054    *
2055    * ---GGGTTT--- (cds)
2056    * CCC------AAA (cds)
2057    */
2058  1 dna.addCodonFrame(acf);
2059  1 AlignmentUtils.alignAs(cds, dna);
2060  1 assertEquals("---GGGTTT", cds.getSequenceAt(0).getSequenceAsString());
2061  1 assertEquals("CCC------AAA",
2062    cds.getSequenceAt(1).getSequenceAsString());
2063    }
2064   
 
2065  1 toggle @Test(groups = { "Functional" })
2066    public void testAddMappedPositions()
2067    {
2068  1 SequenceI from = new Sequence("dna", "ggAA-ATcc-TT-g");
2069  1 SequenceI seq1 = new Sequence("cds", "AAATTT");
2070  1 from.createDatasetSequence();
2071  1 seq1.createDatasetSequence();
2072  1 Mapping mapping = new Mapping(seq1,
2073    new MapList(new int[]
2074    { 3, 6, 9, 10 }, new int[] { 1, 6 }, 1, 1));
2075  1 Map<Integer, Map<SequenceI, Character>> map = new TreeMap<>();
2076  1 AlignmentUtils.addMappedPositions(seq1, from, mapping, map);
2077   
2078    /*
2079    * verify map has seq1 residues in columns 3,4,6,7,11,12
2080    */
2081  1 assertEquals(6, map.size());
2082  1 assertEquals('A', map.get(3).get(seq1).charValue());
2083  1 assertEquals('A', map.get(4).get(seq1).charValue());
2084  1 assertEquals('A', map.get(6).get(seq1).charValue());
2085  1 assertEquals('T', map.get(7).get(seq1).charValue());
2086  1 assertEquals('T', map.get(11).get(seq1).charValue());
2087  1 assertEquals('T', map.get(12).get(seq1).charValue());
2088   
2089    /*
2090    *
2091    */
2092    }
2093   
2094    /**
2095    * Test case where the mapping 'from' range includes a stop codon which is
2096    * absent in the 'to' range
2097    */
 
2098  1 toggle @Test(groups = { "Functional" })
2099    public void testAddMappedPositions_withStopCodon()
2100    {
2101  1 SequenceI from = new Sequence("dna", "ggAA-ATcc-TT-g");
2102  1 SequenceI seq1 = new Sequence("cds", "AAATTT");
2103  1 from.createDatasetSequence();
2104  1 seq1.createDatasetSequence();
2105  1 Mapping mapping = new Mapping(seq1,
2106    new MapList(new int[]
2107    { 3, 6, 9, 10 }, new int[] { 1, 6 }, 1, 1));
2108  1 Map<Integer, Map<SequenceI, Character>> map = new TreeMap<>();
2109  1 AlignmentUtils.addMappedPositions(seq1, from, mapping, map);
2110   
2111    /*
2112    * verify map has seq1 residues in columns 3,4,6,7,11,12
2113    */
2114  1 assertEquals(6, map.size());
2115  1 assertEquals('A', map.get(3).get(seq1).charValue());
2116  1 assertEquals('A', map.get(4).get(seq1).charValue());
2117  1 assertEquals('A', map.get(6).get(seq1).charValue());
2118  1 assertEquals('T', map.get(7).get(seq1).charValue());
2119  1 assertEquals('T', map.get(11).get(seq1).charValue());
2120  1 assertEquals('T', map.get(12).get(seq1).charValue());
2121    }
2122   
2123    /**
2124    * Test for the case where the products for which we want CDS are specified.
2125    * This is to represent the case where EMBL has CDS mappings to both Uniprot
2126    * and EMBLCDSPROTEIN. makeCdsAlignment() should only return the mappings for
2127    * the protein sequences specified.
2128    */
 
2129  1 toggle @Test(groups = { "Functional" })
2130    public void testMakeCdsAlignment_filterProducts()
2131    {
2132  1 SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
2133  1 SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC");
2134  1 SequenceI pep1 = new Sequence("Uniprot|pep1", "GF");
2135  1 SequenceI pep2 = new Sequence("Uniprot|pep2", "GFP");
2136  1 SequenceI pep3 = new Sequence("EMBL|pep3", "GF");
2137  1 SequenceI pep4 = new Sequence("EMBL|pep4", "GFP");
2138  1 dna1.createDatasetSequence();
2139  1 dna2.createDatasetSequence();
2140  1 pep1.createDatasetSequence();
2141  1 pep2.createDatasetSequence();
2142  1 pep3.createDatasetSequence();
2143  1 pep4.createDatasetSequence();
2144  1 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 });
2145  1 dna.setDataset(null);
2146  1 AlignmentI emblPeptides = new Alignment(new SequenceI[] { pep3, pep4 });
2147  1 emblPeptides.setDataset(null);
2148   
2149  1 AlignedCodonFrame acf = new AlignedCodonFrame();
2150  1 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
2151    new int[]
2152    { 1, 2 }, 3, 1);
2153  1 acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
2154  1 acf.addMap(dna1.getDatasetSequence(), pep3.getDatasetSequence(), map);
2155  1 dna.addCodonFrame(acf);
2156   
2157  1 acf = new AlignedCodonFrame();
2158  1 map = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, new int[] { 1, 3 },
2159    3, 1);
2160  1 acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
2161  1 acf.addMap(dna2.getDatasetSequence(), pep4.getDatasetSequence(), map);
2162  1 dna.addCodonFrame(acf);
2163   
2164    /*
2165    * execute method under test to find CDS for EMBL peptides only
2166    */
2167  1 AlignmentI cds = AlignmentUtils
2168    .makeCdsAlignment(new SequenceI[]
2169    { dna1, dna2 }, dna.getDataset(),
2170    emblPeptides.getSequencesArray());
2171   
2172  1 assertEquals(2, cds.getSequences().size());
2173  1 assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString());
2174  1 assertEquals("GGGTTTCCC", cds.getSequenceAt(1).getSequenceAsString());
2175   
2176    /*
2177    * verify shared, extended alignment dataset
2178    */
2179  1 assertSame(dna.getDataset(), cds.getDataset());
2180  1 assertTrue(dna.getDataset().getSequences()
2181    .contains(cds.getSequenceAt(0).getDatasetSequence()));
2182  1 assertTrue(dna.getDataset().getSequences()
2183    .contains(cds.getSequenceAt(1).getDatasetSequence()));
2184   
2185    /*
2186    * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide
2187    * the mappings are on the shared alignment dataset
2188    */
2189  1 List<AlignedCodonFrame> cdsMappings = cds.getDataset().getCodonFrames();
2190    /*
2191    * 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep)
2192    */
2193  1 assertEquals(6, cdsMappings.size());
2194   
2195    /*
2196    * verify that mapping sets for dna and cds alignments are different
2197    * [not current behaviour - all mappings are on the alignment dataset]
2198    */
2199    // select -> subselect type to test.
2200    // Assert.assertNotSame(dna.getCodonFrames(), cds.getCodonFrames());
2201    // assertEquals(4, dna.getCodonFrames().size());
2202    // assertEquals(4, cds.getCodonFrames().size());
2203   
2204    /*
2205    * Two mappings involve pep3 (dna to pep3, cds to pep3)
2206    * Mapping from pep3 to GGGTTT in first new exon sequence
2207    */
2208  1 List<AlignedCodonFrame> pep3Mappings = MappingUtils
2209    .findMappingsForSequence(pep3, cdsMappings);
2210  1 assertEquals(2, pep3Mappings.size());
2211  1 List<AlignedCodonFrame> mappings = MappingUtils
2212    .findMappingsForSequence(cds.getSequenceAt(0), pep3Mappings);
2213  1 assertEquals(1, mappings.size());
2214   
2215    // map G to GGG
2216  1 SearchResultsI sr = MappingUtils.buildSearchResults(pep3, 1, mappings);
2217  1 assertEquals(1, sr.getResults().size());
2218  1 SearchResultMatchI m = sr.getResults().get(0);
2219  1 assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence());
2220  1 assertEquals(1, m.getStart());
2221  1 assertEquals(3, m.getEnd());
2222    // map F to TTT
2223  1 sr = MappingUtils.buildSearchResults(pep3, 2, mappings);
2224  1 m = sr.getResults().get(0);
2225  1 assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence());
2226  1 assertEquals(4, m.getStart());
2227  1 assertEquals(6, m.getEnd());
2228   
2229    /*
2230    * Two mappings involve pep4 (dna to pep4, cds to pep4)
2231    * Verify mapping from pep4 to GGGTTTCCC in second new exon sequence
2232    */
2233  1 List<AlignedCodonFrame> pep4Mappings = MappingUtils
2234    .findMappingsForSequence(pep4, cdsMappings);
2235  1 assertEquals(2, pep4Mappings.size());
2236  1 mappings = MappingUtils.findMappingsForSequence(cds.getSequenceAt(1),
2237    pep4Mappings);
2238  1 assertEquals(1, mappings.size());
2239    // map G to GGG
2240  1 sr = MappingUtils.buildSearchResults(pep4, 1, mappings);
2241  1 assertEquals(1, sr.getResults().size());
2242  1 m = sr.getResults().get(0);
2243  1 assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
2244  1 assertEquals(1, m.getStart());
2245  1 assertEquals(3, m.getEnd());
2246    // map F to TTT
2247  1 sr = MappingUtils.buildSearchResults(pep4, 2, mappings);
2248  1 m = sr.getResults().get(0);
2249  1 assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
2250  1 assertEquals(4, m.getStart());
2251  1 assertEquals(6, m.getEnd());
2252    // map P to CCC
2253  1 sr = MappingUtils.buildSearchResults(pep4, 3, mappings);
2254  1 m = sr.getResults().get(0);
2255  1 assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
2256  1 assertEquals(7, m.getStart());
2257  1 assertEquals(9, m.getEnd());
2258    }
2259   
2260    /**
2261    * Test the method that just copies aligned sequences, provided all sequences
2262    * to be aligned share the aligned sequence's dataset
2263    */
 
2264  1 toggle @Test(groups = "Functional")
2265    public void testAlignAsSameSequences()
2266    {
2267  1 SequenceI dna1 = new Sequence("dna1", "cccGGGTTTaaa");
2268  1 SequenceI dna2 = new Sequence("dna2", "CCCgggtttAAA");
2269  1 AlignmentI al1 = new Alignment(new SequenceI[] { dna1, dna2 });
2270  1 ((Alignment) al1).createDatasetAlignment();
2271   
2272  1 SequenceI dna3 = new Sequence(dna1);
2273  1 SequenceI dna4 = new Sequence(dna2);
2274  1 assertSame(dna3.getDatasetSequence(), dna1.getDatasetSequence());
2275  1 assertSame(dna4.getDatasetSequence(), dna2.getDatasetSequence());
2276  1 String seq1 = "-cc-GG-GT-TT--aaa";
2277  1 dna3.setSequence(seq1);
2278  1 String seq2 = "C--C-Cgg--gtt-tAA-A-";
2279  1 dna4.setSequence(seq2);
2280  1 AlignmentI al2 = new Alignment(new SequenceI[] { dna3, dna4 });
2281  1 ((Alignment) al2).createDatasetAlignment();
2282   
2283    /*
2284    * alignment removes gapped columns (two internal, two trailing)
2285    */
2286  1 assertTrue(AlignmentUtils.alignAsSameSequences(al1, al2));
2287  1 String aligned1 = "-cc-GG-GTTT-aaa";
2288  1 assertEquals(aligned1, al1.getSequenceAt(0).getSequenceAsString());
2289  1 String aligned2 = "C--C-Cgg-gtttAAA";
2290  1 assertEquals(aligned2, al1.getSequenceAt(1).getSequenceAsString());
2291   
2292    /*
2293    * add another sequence to 'aligned' - should still succeed, since
2294    * unaligned sequences still share a dataset with aligned sequences
2295    */
2296  1 SequenceI dna5 = new Sequence("dna5", "CCCgggtttAAA");
2297  1 dna5.createDatasetSequence();
2298  1 al2.addSequence(dna5);
2299  1 assertTrue(AlignmentUtils.alignAsSameSequences(al1, al2));
2300  1 assertEquals(aligned1, al1.getSequenceAt(0).getSequenceAsString());
2301  1 assertEquals(aligned2, al1.getSequenceAt(1).getSequenceAsString());
2302   
2303    /*
2304    * add another sequence to 'unaligned' - should fail, since now not
2305    * all unaligned sequences share a dataset with aligned sequences
2306    */
2307  1 SequenceI dna6 = new Sequence("dna6", "CCCgggtttAAA");
2308  1 dna6.createDatasetSequence();
2309  1 al1.addSequence(dna6);
2310    // JAL-2110 JBP Comment: what's the use case for this behaviour ?
2311  1 assertFalse(AlignmentUtils.alignAsSameSequences(al1, al2));
2312    }
2313   
 
2314  1 toggle @Test(groups = "Functional")
2315    public void testAlignAsSameSequencesMultipleSubSeq()
2316    {
2317  1 SequenceI dna1 = new Sequence("dna1", "cccGGGTTTaaa");
2318  1 SequenceI dna2 = new Sequence("dna2", "CCCgggtttAAA");
2319  1 SequenceI as1 = dna1.deriveSequence(); // cccGGGTTTaaa/1-12
2320  1 SequenceI as2 = dna1.deriveSequence().getSubSequence(3, 7); // GGGT/4-7
2321  1 SequenceI as3 = dna2.deriveSequence(); // CCCgggtttAAA/1-12
2322  1 as1.insertCharAt(6, 5, '-');
2323  1 assertEquals("cccGGG-----TTTaaa", as1.getSequenceAsString());
2324  1 as2.insertCharAt(6, 5, '-');
2325  1 assertEquals("GGGT-----", as2.getSequenceAsString());
2326  1 as3.insertCharAt(3, 5, '-');
2327  1 assertEquals("CCC-----gggtttAAA", as3.getSequenceAsString());
2328  1 AlignmentI aligned = new Alignment(new SequenceI[] { as1, as2, as3 });
2329   
2330    // why do we need to cast this still ?
2331  1 ((Alignment) aligned).createDatasetAlignment();
2332  1 SequenceI uas1 = dna1.deriveSequence();
2333  1 SequenceI uas2 = dna1.deriveSequence().getSubSequence(3, 7);
2334  1 SequenceI uas3 = dna2.deriveSequence();
2335  1 AlignmentI tobealigned = new Alignment(
2336    new SequenceI[]
2337    { uas1, uas2, uas3 });
2338  1 ((Alignment) tobealigned).createDatasetAlignment();
2339   
2340    /*
2341    * alignAs lines up dataset sequences and removes empty columns (two)
2342    */
2343  1 assertTrue(AlignmentUtils.alignAsSameSequences(tobealigned, aligned));
2344  1 assertEquals("cccGGG---TTTaaa", uas1.getSequenceAsString());
2345  1 assertEquals("GGGT", uas2.getSequenceAsString());
2346  1 assertEquals("CCC---gggtttAAA", uas3.getSequenceAsString());
2347    }
2348   
 
2349  1 toggle @Test(groups = { "Functional" })
2350    public void testTransferGeneLoci()
2351    {
2352  1 SequenceI from = new Sequence("transcript",
2353    "aaacccgggTTTAAACCCGGGtttaaacccgggttt");
2354  1 SequenceI to = new Sequence("CDS", "TTTAAACCCGGG");
2355  1 MapList map = new MapList(new int[] { 1, 12 }, new int[] { 10, 21 }, 1,
2356    1);
2357   
2358    /*
2359    * first with nothing to transfer
2360    */
2361  1 AlignmentUtils.transferGeneLoci(from, map, to);
2362  1 assertNull(to.getGeneLoci());
2363   
2364    /*
2365    * next with gene loci set on 'from' sequence
2366    */
2367  1 int[] exons = new int[] { 100, 105, 155, 164, 210, 229 };
2368  1 MapList geneMap = new MapList(new int[] { 1, 36 }, exons, 1, 1);
2369  1 from.setGeneLoci("human", "GRCh38", "7", geneMap);
2370  1 AlignmentUtils.transferGeneLoci(from, map, to);
2371   
2372  1 GeneLociI toLoci = to.getGeneLoci();
2373  1 assertNotNull(toLoci);
2374    // DBRefEntry constructor upper-cases 'source'
2375  1 assertEquals("HUMAN", toLoci.getSpeciesId());
2376  1 assertEquals("GRCh38", toLoci.getAssemblyId());
2377  1 assertEquals("7", toLoci.getChromosomeId());
2378   
2379    /*
2380    * transcript 'exons' are 1-6, 7-16, 17-36
2381    * CDS 1:12 is transcript 10-21
2382    * transcript 'CDS' is 10-16, 17-21
2383    * which is 'gene' 158-164, 210-214
2384    */
2385  1 MapList toMap = toLoci.getMapping();
2386  1 assertEquals(1, toMap.getFromRanges().size());
2387  1 assertEquals(2, toMap.getFromRanges().get(0).length);
2388  1 assertEquals(1, toMap.getFromRanges().get(0)[0]);
2389  1 assertEquals(12, toMap.getFromRanges().get(0)[1]);
2390  1 assertEquals(2, toMap.getToRanges().size());
2391  1 assertEquals(2, toMap.getToRanges().get(0).length);
2392  1 assertEquals(158, toMap.getToRanges().get(0)[0]);
2393  1 assertEquals(164, toMap.getToRanges().get(0)[1]);
2394  1 assertEquals(210, toMap.getToRanges().get(1)[0]);
2395  1 assertEquals(214, toMap.getToRanges().get(1)[1]);
2396    // or summarised as (but toString might change in future):
2397  1 assertEquals("[ [1, 12] ] 1:1 to [ [158, 164] [210, 214] ]",
2398    toMap.toString());
2399   
2400    /*
2401    * an existing value is not overridden
2402    */
2403  1 geneMap = new MapList(new int[] { 1, 36 }, new int[] { 36, 1 }, 1, 1);
2404  1 from.setGeneLoci("inhuman", "GRCh37", "6", geneMap);
2405  1 AlignmentUtils.transferGeneLoci(from, map, to);
2406  1 assertEquals("GRCh38", toLoci.getAssemblyId());
2407  1 assertEquals("7", toLoci.getChromosomeId());
2408  1 toMap = toLoci.getMapping();
2409  1 assertEquals("[ [1, 12] ] 1:1 to [ [158, 164] [210, 214] ]",
2410    toMap.toString());
2411    }
2412   
2413    /**
2414    * Tests for the method that maps nucleotide to protein based on CDS features
2415    */
 
2416  1 toggle @Test(groups = "Functional")
2417    public void testMapCdsToProtein()
2418    {
2419  1 SequenceI peptide = new Sequence("pep", "KLQ");
2420   
2421    /*
2422    * Case 1: CDS 3 times length of peptide
2423    * NB method only checks lengths match, not translation
2424    */
2425  1 SequenceI dna = new Sequence("dna", "AACGacgtCTCCT");
2426  1 dna.createDatasetSequence();
2427  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
2428  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 13, null));
2429  1 MapList ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
2430  1 assertEquals(3, ml.getFromRatio());
2431  1 assertEquals(1, ml.getToRatio());
2432  1 assertEquals("[[1, 3]]",
2433    Arrays.deepToString(ml.getToRanges().toArray()));
2434  1 assertEquals("[[1, 4], [9, 13]]",
2435    Arrays.deepToString(ml.getFromRanges().toArray()));
2436   
2437    /*
2438    * Case 2: CDS 3 times length of peptide + stop codon
2439    * (note code does not currently check trailing codon is a stop codon)
2440    */
2441  1 dna = new Sequence("dna", "AACGacgtCTCCTCCC");
2442  1 dna.createDatasetSequence();
2443  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
2444  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 16, null));
2445  1 ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
2446  1 assertEquals(3, ml.getFromRatio());
2447  1 assertEquals(1, ml.getToRatio());
2448  1 assertEquals("[[1, 3]]",
2449    Arrays.deepToString(ml.getToRanges().toArray()));
2450  1 assertEquals("[[1, 4], [9, 13]]",
2451    Arrays.deepToString(ml.getFromRanges().toArray()));
2452   
2453    /*
2454    * Case 3: CDS longer than 3 * peptide + stop codon - no mapping is made
2455    */
2456  1 dna = new Sequence("dna", "AACGacgtCTCCTTGATCA");
2457  1 dna.createDatasetSequence();
2458  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
2459  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 19, null));
2460  1 ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
2461  1 assertNull(ml);
2462   
2463    /*
2464    * Case 4: CDS shorter than 3 * peptide - no mapping is made
2465    */
2466  1 dna = new Sequence("dna", "AACGacgtCTCC");
2467  1 dna.createDatasetSequence();
2468  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
2469  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 12, null));
2470  1 ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
2471  1 assertNull(ml);
2472   
2473    /*
2474    * Case 5: CDS 3 times length of peptide + part codon - mapping is truncated
2475    */
2476  1 dna = new Sequence("dna", "AACGacgtCTCCTTG");
2477  1 dna.createDatasetSequence();
2478  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
2479  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 15, null));
2480  1 ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
2481  1 assertEquals(3, ml.getFromRatio());
2482  1 assertEquals(1, ml.getToRatio());
2483  1 assertEquals("[[1, 3]]",
2484    Arrays.deepToString(ml.getToRanges().toArray()));
2485  1 assertEquals("[[1, 4], [9, 13]]",
2486    Arrays.deepToString(ml.getFromRanges().toArray()));
2487   
2488    /*
2489    * Case 6: incomplete start codon corresponding to X in peptide
2490    */
2491  1 dna = new Sequence("dna", "ACGacgtCTCCTTGG");
2492  1 dna.createDatasetSequence();
2493  1 SequenceFeature sf = new SequenceFeature("CDS", "", 1, 3, null);
2494  1 sf.setPhase("2"); // skip 2 positions (AC) to start of next codon (GCT)
2495  1 dna.addSequenceFeature(sf);
2496  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 8, 15, null));
2497  1 peptide = new Sequence("pep", "XLQ");
2498  1 ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
2499  1 assertEquals("[[2, 3]]",
2500    Arrays.deepToString(ml.getToRanges().toArray()));
2501  1 assertEquals("[[3, 3], [8, 12]]",
2502    Arrays.deepToString(ml.getFromRanges().toArray()));
2503    }
2504   
2505    /**
2506    * Tests for the method that locates the CDS sequence that has a mapping to
2507    * the given protein. That is, given a transcript-to-peptide mapping, find the
2508    * cds-to-peptide mapping that relates to both, and return the CDS sequence.
2509    */
 
2510  1 toggle @Test(groups = "Functional")
2511    public void testFindCdsForProtein()
2512    {
2513  1 List<AlignedCodonFrame> mappings = new ArrayList<>();
2514  1 AlignedCodonFrame acf1 = new AlignedCodonFrame();
2515  1 mappings.add(acf1);
2516   
2517  1 SequenceI dna1 = new Sequence("dna1", "cgatATcgGCTATCTATGacg");
2518  1 dna1.createDatasetSequence();
2519   
2520    // NB we currently exclude STOP codon from CDS sequences
2521    // the test would need to change if this changes in future
2522  1 SequenceI cds1 = new Sequence("cds1", "ATGCTATCT");
2523  1 cds1.createDatasetSequence();
2524   
2525  1 SequenceI pep1 = new Sequence("pep1", "MLS");
2526  1 pep1.createDatasetSequence();
2527  1 List<AlignedCodonFrame> seqMappings = new ArrayList<>();
2528  1 MapList mapList = new MapList(new int[] { 5, 6, 9, 15 },
2529    new int[]
2530    { 1, 3 }, 3, 1);
2531  1 Mapping dnaToPeptide = new Mapping(pep1.getDatasetSequence(), mapList);
2532   
2533    // add dna to peptide mapping
2534  1 seqMappings.add(acf1);
2535  1 acf1.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(),
2536    mapList);
2537   
2538    /*
2539    * first case - no dna-to-CDS mapping exists - search fails
2540    */
2541  1 SequenceI seq = AlignmentUtils.findCdsForProtein(mappings, dna1,
2542    seqMappings, dnaToPeptide);
2543  1 assertNull(seq);
2544   
2545    /*
2546    * second case - CDS-to-peptide mapping exists but no dna-to-CDS
2547    * - search fails
2548    */
2549    // todo this test fails if the mapping is added to acf1, not acf2
2550    // need to tidy up use of lists of mappings in AlignedCodonFrame
2551  1 AlignedCodonFrame acf2 = new AlignedCodonFrame();
2552  1 mappings.add(acf2);
2553  1 MapList cdsToPeptideMapping = new MapList(new int[] { 1, 9 },
2554    new int[]
2555    { 1, 3 }, 3, 1);
2556  1 acf2.addMap(cds1.getDatasetSequence(), pep1.getDatasetSequence(),
2557    cdsToPeptideMapping);
2558  1 assertNull(AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,
2559    dnaToPeptide));
2560   
2561    /*
2562    * third case - add dna-to-CDS mapping - CDS is now found!
2563    */
2564  1 MapList dnaToCdsMapping = new MapList(new int[] { 5, 6, 9, 15 },
2565    new int[]
2566    { 1, 9 }, 1, 1);
2567  1 acf1.addMap(dna1.getDatasetSequence(), cds1.getDatasetSequence(),
2568    dnaToCdsMapping);
2569  1 seq = AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,
2570    dnaToPeptide);
2571  1 assertSame(seq, cds1.getDatasetSequence());
2572    }
2573   
2574    /**
2575    * Tests for the method that locates the CDS sequence that has a mapping to
2576    * the given protein. That is, given a transcript-to-peptide mapping, find the
2577    * cds-to-peptide mapping that relates to both, and return the CDS sequence.
2578    * This test is for the case where transcript and CDS are the same length.
2579    */
 
2580  1 toggle @Test(groups = "Functional")
2581    public void testFindCdsForProtein_noUTR()
2582    {
2583  1 List<AlignedCodonFrame> mappings = new ArrayList<>();
2584  1 AlignedCodonFrame acf1 = new AlignedCodonFrame();
2585  1 mappings.add(acf1);
2586   
2587  1 SequenceI dna1 = new Sequence("dna1", "ATGCTATCTTAA");
2588  1 dna1.createDatasetSequence();
2589   
2590    // NB we currently exclude STOP codon from CDS sequences
2591    // the test would need to change if this changes in future
2592  1 SequenceI cds1 = new Sequence("cds1", "ATGCTATCT");
2593  1 cds1.createDatasetSequence();
2594   
2595  1 SequenceI pep1 = new Sequence("pep1", "MLS");
2596  1 pep1.createDatasetSequence();
2597  1 List<AlignedCodonFrame> seqMappings = new ArrayList<>();
2598  1 MapList mapList = new MapList(new int[] { 1, 9 }, new int[] { 1, 3 }, 3,
2599    1);
2600  1 Mapping dnaToPeptide = new Mapping(pep1.getDatasetSequence(), mapList);
2601   
2602    // add dna to peptide mapping
2603  1 seqMappings.add(acf1);
2604  1 acf1.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(),
2605    mapList);
2606   
2607    /*
2608    * first case - transcript lacks CDS features - it appears to be
2609    * the CDS sequence and is returned
2610    */
2611  1 SequenceI seq = AlignmentUtils.findCdsForProtein(mappings, dna1,
2612    seqMappings, dnaToPeptide);
2613  1 assertSame(seq, dna1.getDatasetSequence());
2614   
2615    /*
2616    * second case - transcript has CDS feature - this means it is
2617    * not returned as a match for CDS (CDS sequences don't have CDS features)
2618    */
2619  1 dna1.addSequenceFeature(
2620    new SequenceFeature(SequenceOntologyI.CDS, "cds", 1, 12, null));
2621  1 seq = AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,
2622    dnaToPeptide);
2623  1 assertNull(seq);
2624   
2625    /*
2626    * third case - CDS-to-peptide mapping exists but no dna-to-CDS
2627    * - search fails
2628    */
2629    // todo this test fails if the mapping is added to acf1, not acf2
2630    // need to tidy up use of lists of mappings in AlignedCodonFrame
2631  1 AlignedCodonFrame acf2 = new AlignedCodonFrame();
2632  1 mappings.add(acf2);
2633  1 MapList cdsToPeptideMapping = new MapList(new int[] { 1, 9 },
2634    new int[]
2635    { 1, 3 }, 3, 1);
2636  1 acf2.addMap(cds1.getDatasetSequence(), pep1.getDatasetSequence(),
2637    cdsToPeptideMapping);
2638  1 assertNull(AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,
2639    dnaToPeptide));
2640   
2641    /*
2642    * fourth case - add dna-to-CDS mapping - CDS is now found!
2643    */
2644  1 MapList dnaToCdsMapping = new MapList(new int[] { 1, 9 },
2645    new int[]
2646    { 1, 9 }, 1, 1);
2647  1 acf1.addMap(dna1.getDatasetSequence(), cds1.getDatasetSequence(),
2648    dnaToCdsMapping);
2649  1 seq = AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,
2650    dnaToPeptide);
2651  1 assertSame(seq, cds1.getDatasetSequence());
2652    }
2653   
 
2654  1 toggle @Test(groups = "Functional")
2655    public void testAddReferenceAnnotations()
2656    {
2657  1 SequenceI longseq = new Sequence("longA", "ASDASDASDASDAASDASDASDASDA");
2658  1 Annotation[] aa = new Annotation[longseq.getLength()];
2659   
2660  27 for (int p = 0; p < aa.length; p++)
2661    {
2662  26 aa[p] = new Annotation("P", "pos " + (p + 1), (char) 0,
2663    (float) p + 1);
2664    }
2665  1 AlignmentAnnotation refAnnot = new AlignmentAnnotation("LongSeqAnnot",
2666    "Annotations", aa);
2667  1 refAnnot.setCalcId("Test");
2668  1 longseq.addAlignmentAnnotation(refAnnot);
2669  1 verifyExpectedSequenceAnnotation(refAnnot);
2670   
2671  1 Alignment ourAl = new Alignment(
2672    new SequenceI[]
2673    { longseq.getSubSequence(5, 10),
2674    longseq.getSubSequence(7, 12) });
2675  1 ourAl.createDatasetAlignment();
2676   
2677    // transfer annotation
2678  1 SortedMap<String, String> tipEntries = new TreeMap<>();
2679  1 Map<SequenceI, List<AlignmentAnnotation>> candidates = new LinkedHashMap<>();
2680   
2681  1 AlignmentUtils.findAddableReferenceAnnotations(ourAl.getSequences(),
2682    tipEntries, candidates, ourAl);
2683  1 AlignmentUtils.addReferenceAnnotations(candidates, ourAl, null);
2684   
2685  1 assertNotNull(ourAl.getAlignmentAnnotation());
2686  1 assertEquals(ourAl.getAlignmentAnnotation().length, 2);
2687   
2688  1 for (AlignmentAnnotation alan : ourAl.getAlignmentAnnotation())
2689    {
2690  2 verifyExpectedSequenceAnnotation(alan);
2691    }
2692    // Everything above works for 2.11.3 and 2.11.2.x.
2693    // now simulate copy/paste to new alignment
2694  1 SequenceI[] newSeqAl = new SequenceI[2];
2695    // copy sequences but no annotation
2696  1 newSeqAl[0] = new Sequence(ourAl.getSequenceAt(0),
2697    ourAl.getSequenceAt(0).getAnnotation());
2698  1 newSeqAl[1] = new Sequence(ourAl.getSequenceAt(1),
2699    ourAl.getSequenceAt(1).getAnnotation());
2700   
2701  1 Alignment newAl = new Alignment(newSeqAl);
2702    // delete annotation
2703  1 for (SequenceI sq : newAl.getSequences())
2704    {
2705  2 sq.setAlignmentAnnotation(new AlignmentAnnotation[0]);
2706    }
2707    // JAL-4182 scenario test
2708  1 SequenceGroup sg = new SequenceGroup(Arrays.asList(newSeqAl));
2709  1 sg.setStartRes(0);
2710  1 sg.setEndRes(newAl.getWidth());
2711  1 AlignmentUtils.addReferenceAnnotationTo(newAl, newSeqAl[0],
2712    newSeqAl[0].getDatasetSequence().getAnnotation()[0], sg);
2713  1 AlignmentUtils.addReferenceAnnotationTo(newAl, newSeqAl[1],
2714    newSeqAl[1].getDatasetSequence().getAnnotation()[0], sg);
2715  1 for (AlignmentAnnotation alan : newAl.getAlignmentAnnotation())
2716    {
2717  2 verifyExpectedSequenceAnnotation(alan);
2718    }
2719    }
2720   
2721    /**
2722    * helper - tests annotation is mapped to position it was originally created
2723    * for
2724    *
2725    * @param alan
2726    */
 
2727  5 toggle private void verifyExpectedSequenceAnnotation(AlignmentAnnotation alan)
2728    {
2729  51 for (int c = 0; c < alan.annotations.length; c++)
2730    {
2731  46 Annotation a = alan.annotations[c];
2732  46 if (a != null)
2733    {
2734  46 assertEquals("Misaligned annotation at " + c,
2735    (float) alan.sequenceRef.findPosition(c), a.value);
2736    }
2737    else
2738    {
2739  0 assertTrue("Unexpected Null at position " + c,
2740    c >= alan.sequenceRef.getLength()
2741    || Comparison.isGap(alan.sequenceRef.getCharAt(c)));
2742    }
2743    }
2744    }
2745   
 
2746  1 toggle @Test(groups = "Functional")
2747    public void testAddReferenceContactMap()
2748    {
2749  1 SequenceI sq = new Sequence("a", "SSSQ");
2750  1 ContactMatrixI cm = new SeqDistanceContactMatrix(4);
2751  1 AlignmentAnnotation cm_aan = sq.addContactList(cm);
2752  1 cm_aan.description = cm_aan.description + " cm1";
2753  1 SequenceI dssq = sq.createDatasetSequence();
2754   
2755    // remove annotation on our non-dataset sequence
2756  1 sq.removeAlignmentAnnotation(sq.getAnnotation()[0]);
2757    // test transfer
2758  1 Alignment al = new Alignment(new SequenceI[] { sq });
2759  1 SortedMap<String, String> tipEntries = new TreeMap<>();
2760  1 Map<SequenceI, List<AlignmentAnnotation>> candidates = new LinkedHashMap<>();
2761   
2762  1 AlignmentUtils.findAddableReferenceAnnotations(al.getSequences(),
2763    tipEntries, candidates, al);
2764  1 AlignmentUtils.addReferenceAnnotations(candidates, al, null);
2765  1 assertTrue("No contact map annotation transferred",
2766    al.getAlignmentAnnotation() != null
2767    && al.getAlignmentAnnotation().length == 1);
2768  1 AlignmentAnnotation alan = al.findAnnotations(sq, null, cm_aan.label)
2769    .iterator().next();
2770  1 ContactMatrixI t_cm = al.getContactMatrixFor(alan);
2771  1 assertNotNull("No contact map for the transferred annotation row.",
2772    t_cm);
2773  1 assertTrue(t_cm instanceof SeqDistanceContactMatrix);
2774  1 assertTrue(((SeqDistanceContactMatrix) t_cm).hasReferenceSeq());
2775   
2776  1 ContactListI cl = al.getContactListFor(alan, 1);
2777  1 assertNotNull(
2778    "No contact matrix recovered after reference annotation transfer",
2779    cl);
2780    // semantics of sequence associated contact list is slightly tricky - column
2781    // 3 in alignment should have data
2782  1 cl = al.getContactListFor(alan, 3);
2783  1 assertNotNull(
2784    "Contact matrix should have data for last position in sequence",
2785    cl);
2786   
2787  1 ContactMatrixI cm2 = new SeqDistanceContactMatrix(4);
2788  1 dssq.addContactList(cm2);
2789  1 tipEntries = new TreeMap<>();
2790  1 candidates = new LinkedHashMap<>();
2791   
2792  1 AlignmentUtils.findAddableReferenceAnnotations(al.getSequences(),
2793    tipEntries, candidates, al);
2794  1 AlignmentUtils.addReferenceAnnotations(candidates, al, null);
2795  1 assertTrue("Expected two contact map annotation transferred",
2796    al.getAlignmentAnnotation() != null
2797    && al.getAlignmentAnnotation().length == 2);
2798   
2799    }
2800   
 
2801  5 toggle @Test(
2802    groups = "Functional",
2803    dataProvider = "SecondaryStructureAnnotations")
2804    public void testSecondaryStructurePresentAndSources(
2805    AlignmentAnnotation[] annotations, boolean expectedSSPresent,
2806    ArrayList<String> expectedSSSources)
2807    {
2808  5 Assert.assertEquals(expectedSSPresent,
2809    AlignmentUtils.isSecondaryStructurePresent(annotations));
2810    }
2811   
 
2812  1 toggle @DataProvider(name = "SecondaryStructureAnnotations")
2813    public static Object[][] provideSecondaryStructureAnnotations()
2814    {
2815  1 AlignmentAnnotation ann1 = new AlignmentAnnotation(
2816    "Secondary Structure", "Secondary Structure",
2817    new Annotation[] {});
2818  1 AlignmentAnnotation ann2 = new AlignmentAnnotation("jnetpred",
2819    "jnetpred", new Annotation[] {});
2820  1 AlignmentAnnotation ann3 = new AlignmentAnnotation("Temp", "Temp",
2821    new Annotation[] {});
2822  1 AlignmentAnnotation ann4 = new AlignmentAnnotation("Temp", "Temp",
2823    new Annotation[] {});
2824   
2825  1 List<String> ssSources1 = new ArrayList<>(
2826    Arrays.asList("3D Structures"));
2827  1 List<String> ssSources2 = new ArrayList<>(Arrays.asList("JPred"));
2828  1 List<String> ssSources3 = new ArrayList<>(
2829    Arrays.asList("3D Structures", "JPred"));
2830  1 List<String> ssSources4 = new ArrayList<>();
2831   
2832  1 return new Object[][] {
2833    { new AlignmentAnnotation[]
2834    { ann1, ann3, ann4 }, true, ssSources1 },
2835    { new AlignmentAnnotation[]
2836    { ann2, ann3, ann4 }, true, ssSources2 },
2837    { new AlignmentAnnotation[]
2838    { ann3, ann4 }, false, ssSources4 },
2839    { new AlignmentAnnotation[] {}, false, ssSources4 },
2840    { new AlignmentAnnotation[]
2841    { ann1, ann2, ann3, ann4 }, true, ssSources3 } };
2842    }
2843   
 
2844  4 toggle @Test(dataProvider = "SecondaryStructureAnnotationColours", groups = { "Functional" })
2845    public void testSecondaryStructureAnnotationColour(char symbol,
2846    Color expectedColor)
2847    {
2848  4 Color actualColor = AlignmentUtils
2849    .getSecondaryStructureAnnotationColour(symbol);
2850  4 Assert.assertEquals(actualColor, expectedColor);
2851    }
2852   
 
2853  1 toggle @DataProvider(name = "SecondaryStructureAnnotationColours")
2854    public static Object[][] provideSecondaryStructureAnnotationColours()
2855    {
2856  1 return new Object[][] { { 'C', Color.gray }, { 'E', Color.green },
2857    { 'H', Color.red },
2858    { '-', Color.white } };
2859    }
2860   
 
2861  4 toggle @Test(dataProvider = "SSAnnotationPresence", groups = { "Functional" })
2862    public void testIsSSAnnotationPresent(
2863    Map<SequenceI, List<AlignmentAnnotation>> annotations,
2864    boolean expectedPresence)
2865    {
2866  4 boolean actualPresence = AlignmentUtils
2867    .isSSAnnotationPresent(annotations);
2868  4 Assert.assertEquals(actualPresence, expectedPresence);
2869    }
2870   
 
2871  1 toggle @DataProvider(name = "SSAnnotationPresence")
2872    public static Object[][] provideSSAnnotationPresence()
2873    {
2874  1 Map<SequenceI, List<AlignmentAnnotation>> annotations1 = new HashMap<>();
2875  1 SequenceI seq1 = new Sequence("Seq1", "ASD---ASD---ASD", 37, 45);
2876  1 List<AlignmentAnnotation> annotationsList1 = new ArrayList<>();
2877  1 annotationsList1.add(new AlignmentAnnotation("Secondary Structure",
2878    "Secondary Structure", new Annotation[] {}));
2879  1 annotations1.put(seq1, annotationsList1); // Annotation present secondary
2880    // structure for seq1
2881   
2882  1 Map<SequenceI, List<AlignmentAnnotation>> annotations2 = new HashMap<>();
2883  1 SequenceI seq2 = new Sequence("Seq2", "ASD---ASD------", 37, 42);
2884  1 List<AlignmentAnnotation> annotationsList2 = new ArrayList<>();
2885  1 annotationsList2.add(new AlignmentAnnotation("Other Annotation",
2886    "Other Annotation", new Annotation[] {}));
2887  1 annotations2.put(seq2, annotationsList2); // Annotation not related to any
2888    // of secondary structure for seq2
2889   
2890  1 Map<SequenceI, List<AlignmentAnnotation>> annotations3 = new HashMap<>();
2891    // Empty annotation map
2892   
2893  1 Map<SequenceI, List<AlignmentAnnotation>> annotations4 = new HashMap<>();
2894  1 SequenceI seq4 = new Sequence("Seq4", "ASD---ASD---AS-", 37, 44);
2895  1 List<AlignmentAnnotation> annotationsList4 = new ArrayList<>();
2896  1 annotationsList4.add(new AlignmentAnnotation("jnetpred", "jnetpred",
2897    new Annotation[] {}));
2898  1 annotations4.put(seq4, annotationsList4); // Annotation present from JPred
2899    // for seq4
2900   
2901  1 return new Object[][] { { annotations1, true }, // Annotations present
2902    // secondary structure
2903    // present
2904    { annotations2, false }, // No annotations related to any of the
2905    // secondary structure present
2906    { annotations3, false }, // Empty annotation map
2907    { annotations4, true }, // Annotations present from JPred secondary
2908    // structure present
2909    };
2910    }
2911   
2912   
 
2913  1 toggle @Test(groups = "Functional")
2914    public void testGetAlignmentAnnotationForSource()
2915    {
2916   
2917  1 SequenceI seq = new Sequence("testSeq", "ACDEFGHIKLMNPQRSTVWY");
2918   
2919  1 AlignmentAnnotation annot1 =
2920    new AlignmentAnnotation("Secondary Structure",
2921    "Secondary Structure for 4zhpA", new Annotation[] {}); //PDB
2922  1 annot1.setProperty(Constants.SS_PROVIDER_PROPERTY, "PDB");
2923  1 AlignmentAnnotation annot2 =
2924    new AlignmentAnnotation("Secondary Structure",
2925    "Secondary Structure for 5zhpA", new Annotation[] {}); //PDB
2926  1 annot2.setProperty(Constants.SS_PROVIDER_PROPERTY, "PDB");
2927  1 AlignmentAnnotation annot3 = new AlignmentAnnotation("Secondary Structure",
2928    "Secondary Structure for p09911_54-147__3a7wzn.1.p3502557454997462030P",
2929    new Annotation[] {}); //Swiss model
2930  1 annot3.setProperty(Constants.SS_PROVIDER_PROPERTY, "SWISS-MODEL");
2931  1 AlignmentAnnotation annot4 = new AlignmentAnnotation("Secondary Structure",
2932    "Secondary Structure for af-q43517-f1A", new Annotation[] {}); //Alphafold
2933  1 annot4.setProperty(Constants.SS_PROVIDER_PROPERTY, "AlphaFold DB");
2934  1 AlignmentAnnotation annot5 = new AlignmentAnnotation("Secondary Structure",
2935    "Secondary Structure for af-q43517-f1A", new Annotation[] {}); //Alphafold
2936  1 annot5.setProperty(Constants.SS_PROVIDER_PROPERTY, "AlphaFold DB");
2937   
2938  1 seq.addAlignmentAnnotation(annot1);
2939  1 seq.addAlignmentAnnotation(annot2);
2940  1 seq.addAlignmentAnnotation(annot3);
2941  1 seq.addAlignmentAnnotation(annot4);
2942  1 seq.addAlignmentAnnotation(annot5);
2943   
2944  1 List<AlignmentAnnotation> all = AlignmentUtils.getAlignmentAnnotationForSource(
2945    seq, Constants.SS_ALL_PROVIDERS);
2946  1 assertTrue("Expected non-null result for SS_ALL_PROVIDERS",
2947    all != null);
2948  1 Assert.assertEquals(all.size(), 5, "Expected all annotations");
2949   
2950  1 List<AlignmentAnnotation> pdb = AlignmentUtils.getAlignmentAnnotationForSource(
2951    seq, "PDB");
2952  1 assertTrue("Expected non-null result for PDB",
2953    pdb != null);
2954  1 Assert.assertEquals(pdb.size(), 2, "Expected 2 annotations");
2955   
2956  1 List<AlignmentAnnotation> swiss = AlignmentUtils.getAlignmentAnnotationForSource(
2957    seq, "SWISS-MODEL");
2958  1 assertTrue("Expected non-null result for SWISS-MODEL",
2959    swiss != null);
2960  1 Assert.assertEquals(swiss.size(), 1, "Expected 1 annotation");
2961   
2962  1 List<AlignmentAnnotation> alphafold = AlignmentUtils.getAlignmentAnnotationForSource(
2963    seq, "AlphaFold DB");
2964  1 assertTrue("Expected non-null result for AlphaFold DB",
2965    alphafold != null);
2966  1 Assert.assertEquals(alphafold.size(), 2, "Expected 2 annotations");
2967    }
2968   
2969   
 
2970  1 toggle @Test(groups = "Functional")
2971    public void testa3mToMSA_byfile() throws Exception
2972    {
2973  1 String queryFile="examples/testdata/query"; // "examples/uniref50.a3m/a3m.fa
2974  1 SequenceI[] origseq = new FastaFile(queryFile+".a3m.fa", DataSourceType.FILE).getSeqsAsArray();
2975   
2976  1 SequenceI[] a3mseq = new FastaFile(queryFile+".a3m", DataSourceType.FILE).getSeqsAsArray();
2977  1 ShiftList maxinserts = new ShiftList();
2978    // from reformat.pl
2979    // patch:
2980    // missing [0,1] at beginning
2981  1 String expectedShifts = "[16,3],[34,4],[38,23],[76,2],[86,1],[92,1],[108,21],[130,3],[132,27],[140,4],[148,3],[176,1],[178,1],[184,1],[188,1],[198,1],[200,1],[202,10],[210,5],[212,3],[224,10],[228,1],[234,1],[238,2],[242,1],[244,6],[246,1],[250,19],[254,2],[256,3],[264,19],[270,1],[272,3],[294,4],[300,1],[306,2],[328,2],[330,15],[332,5],[336,1],[338,5],[340,2],[346,6],[352,2],[364,1],[366,4],[376,1],[378,7],[380,2],[382,1],[384,2],[428,1],[430,1],[472,6],[476,4],[478,2],[480,1],[484,8],[498,5],[502,3],[506,10],[508,1],[510,2],[512,4],[516,3],[524,9],[536,1],[540,7],[542,10],[544,2],[546,1],[552,4],[560,4],[572,14],[582,1],[590,1],[612,2],[620,2],[652,1],[658,1],[686,2],[688,2],[744,3],[746,1],[748,8],[754,5],[760,2],[766,2],[800,1],[810,2],[814,10],[816,15],[820,1],[830,5]";
2982  1 AlignmentUtils.computeMaxShifts(a3mseq,maxinserts);
2983   
2984  1 boolean comma=false;
2985  1 String obsinserts="";
2986  1 for (int[] il:maxinserts.getShifts()) {
2987  94 if (comma)
2988    {
2989  93 obsinserts+=",";
2990    }
2991  94 comma=true;
2992  94 obsinserts+="["+il[0]+","+il[1]+"]";
2993    }
2994  1 assertEquals(expectedShifts,obsinserts);
2995   
2996    // System.out.println("");
2997    // for (int i=0; i<origseq.length;i++)
2998    // {
2999    // System.out.println("or: "+origseq[i].getSequenceAsString());
3000    // System.out.println("xf: "+a3mseq[i].getSequenceAsString());
3001    // }
3002  1 AlignmentUtils.a3mToMSA(a3mseq);
3003  1 String exp="",match="";
3004  60 for (int i=0; i<origseq.length;i++)
3005    {
3006   
3007    // System.out.println(origseq[i].getName()+"\n"
3008    // + origseq[i].getSequenceAsString());
3009    // System.out.println(a3mseq[i].getSequenceAsString());
3010  59 if (!origseq[i].getSequenceAsString().equals(a3mseq[i].getSequenceAsString())) {
3011  0 exp+=origseq[i].getSequenceAsString()+" "+origseq[i].getName()+"\n";
3012  0 match+=a3mseq[i].getSequenceAsString()+" "+a3mseq[i].getName()+"\n";;
3013  0 System.out.println(i+"or: "+origseq[i].getSequenceAsString());
3014  0 System.out.println(i+"xf: "+a3mseq[i].getSequenceAsString());
3015    }
3016    }
3017  1 assertEquals(exp,match);
3018    }
 
3019  1 toggle @Test(groups = "Functional")
3020    public void testA3mInsertShifts() throws Exception
3021    {
3022  1 ShiftList a3mInserts = new ShiftList();
3023  1 a3mInserts.addShift(16,3);
3024  1 a3mInserts.addShift(38,23);
3025  1 SequenceI sq = new Sequence("F1RVZ5_PIG","--PAGGQCtgiWHLLTRPLRP--QG");
3026  1 String expSeq = "--PAGGQCtgiWHLLTRPLRP-------------------------QG";
3027  1 AlignmentUtils.insertShifts(new SequenceI[] {sq}, a3mInserts);
3028  1 assertEquals(expSeq, sq.getSequenceAsString());
3029    }
3030   
 
3031  1 toggle @Test(groups = "Functional")
3032    public void testa3mToMSA()
3033    {
3034  1 String ins1="aaaSSmmm..TTaa";
3035  1 String nonI="AASSTTVVWWXXYY";
3036  1 Sequence seq = new Sequence("a3mS1",ins1);
3037  1 Sequence nonI_seq = new Sequence("a3mS2",nonI);
3038  1 String exp_nonI="---AA---SSTT--VVWWXXYY";
3039   
3040  1 ArrayList<int[]> expInserts = new ArrayList();
3041  1 expInserts.add(new int [] { 0,3});
3042  1 expInserts.add(new int [] { 4,3});
3043  1 expInserts.add(new int [] { 12,2});
3044   
3045  1 ShiftList obsInserts = new ShiftList();
3046  1 AlignmentUtils.computeMaxShifts(new Sequence[] { seq,nonI_seq}, obsInserts);
3047  1 assertEquals(expInserts.size(),obsInserts.getShifts().size());
3048  4 for (int p=0;p<expInserts.size(); p++)
3049    {
3050  3 int[] obser=obsInserts.getShifts().get(p);
3051  3 int[] exped=expInserts.get(p);
3052  3 Assert.assertEquals(obser[0],exped[0]," for "+p);
3053  3 Assert.assertEquals(obser[1],exped[1]," for "+p);
3054    }
3055   
3056  1 AlignmentUtils.a3mToMSA(new Sequence[] {seq,nonI_seq});
3057  1 assertEquals(ins1,seq.getSequenceAsString());
3058  1 assertEquals(exp_nonI,nonI_seq.getSequenceAsString());
3059   
3060    // Reset and Reverse
3061  1 seq = new Sequence("a3mS1",ins1);
3062  1 nonI_seq = new Sequence("a3mS2",nonI);
3063   
3064  1 AlignmentUtils.a3mToMSA(new Sequence[] {nonI_seq, seq});
3065  1 assertEquals(ins1,seq.getSequenceAsString());
3066  1 assertEquals(exp_nonI,nonI_seq.getSequenceAsString());
3067    }
3068   
3069    }