Clover icon

Coverage Report

  1. Project Clover database Wed Dec 3 2025 15:58:31 GMT
  2. Package jalview.analysis

File AlignmentUtilsTests.java

 

Code metrics

22
1,343
57
1
3,054
1,996
68
0.05
23.56
57
1.19

Classes

Class Line # Actions
AlignmentUtilsTests 79 1,343 68
0.968354496.8%
 

Contributing tests

This file is covered by 54 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.analysis;
22   
23    import static org.testng.AssertJUnit.assertEquals;
24    import static org.testng.AssertJUnit.assertFalse;
25    import static org.testng.AssertJUnit.assertNotNull;
26    import static org.testng.AssertJUnit.assertNull;
27    import static org.testng.AssertJUnit.assertSame;
28    import static org.testng.AssertJUnit.assertTrue;
29   
30    import java.awt.Color;
31    import java.io.IOException;
32    import java.util.ArrayList;
33    import java.util.Arrays;
34    import java.util.HashMap;
35    import java.util.LinkedHashMap;
36    import java.util.List;
37    import java.util.Map;
38    import java.util.SortedMap;
39    import java.util.TreeMap;
40    import java.util.Vector;
41   
42    import org.testng.Assert;
43    import org.testng.annotations.BeforeClass;
44    import org.testng.annotations.DataProvider;
45    import org.testng.annotations.Test;
46   
47    import jalview.datamodel.AlignedCodonFrame;
48    import jalview.datamodel.Alignment;
49    import jalview.datamodel.AlignmentAnnotation;
50    import jalview.datamodel.AlignmentI;
51    import jalview.datamodel.Annotation;
52    import jalview.datamodel.ContactListI;
53    import jalview.datamodel.ContactMatrixI;
54    import jalview.datamodel.DBRefEntry;
55    import jalview.datamodel.GeneLociI;
56    import jalview.datamodel.Mapping;
57    import jalview.datamodel.PDBEntry;
58    import jalview.datamodel.SearchResultMatchI;
59    import jalview.datamodel.SearchResultsI;
60    import jalview.datamodel.SeqDistanceContactMatrix;
61    import jalview.datamodel.Sequence;
62    import jalview.datamodel.SequenceFeature;
63    import jalview.datamodel.SequenceGroup;
64    import jalview.datamodel.SequenceI;
65    import jalview.gui.JvOptionPane;
66    import jalview.io.AppletFormatAdapter;
67    import jalview.io.DataSourceType;
68    import jalview.io.FastaFile;
69    import jalview.io.FileFormat;
70    import jalview.io.FileFormatI;
71    import jalview.io.FormatAdapter;
72    import jalview.io.gff.SequenceOntologyI;
73    import jalview.util.Comparison;
74    import jalview.util.Constants;
75    import jalview.util.MapList;
76    import jalview.util.MappingUtils;
77    import jalview.util.ShiftList;
78   
 
79    public class AlignmentUtilsTests
80    {
81    private static Sequence ts = new Sequence("short",
82    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklm");
83   
 
84  1 toggle @BeforeClass(alwaysRun = true)
85    public void setUpJvOptionPane()
86    {
87  1 JvOptionPane.setInteractiveMode(false);
88  1 JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
89   
90  1 AlignmentAnnotation ann1 = new AlignmentAnnotation(
91    "Secondary Structure", "Secondary Structure",
92    new Annotation[] {});
93  1 AlignmentAnnotation ann2 = new AlignmentAnnotation("jnetpred",
94    "jnetpred", new Annotation[] {});
95  1 AlignmentAnnotation ann3 = new AlignmentAnnotation("Temp", "Temp",
96    new Annotation[] {});
97  1 AlignmentAnnotation ann4 = new AlignmentAnnotation("Temp", "Temp",
98    new Annotation[] {});
99   
100  1 AlignmentAnnotation[] anns1 = new AlignmentAnnotation[] { ann1, ann3,
101    ann4 };
102   
103  1 AlignmentAnnotation[] anns2 = new AlignmentAnnotation[] { ann2, ann3,
104    ann4 };
105   
106  1 AlignmentAnnotation[] anns3 = new AlignmentAnnotation[] { ann3, ann4 };
107   
108  1 AlignmentAnnotation[] anns4 = new AlignmentAnnotation[0];
109   
110  1 AlignmentAnnotation[] anns5 = new AlignmentAnnotation[] { ann1, ann2,
111    ann3, ann4 };
112    }
113   
 
114  1 toggle @Test(groups = { "Functional" })
115    public void testExpandContext()
116    {
117  1 AlignmentI al = new Alignment(new Sequence[] {});
118  6 for (int i = 4; i < 14; i += 2)
119    {
120  5 SequenceI s1 = ts.deriveSequence().getSubSequence(i, i + 7);
121  5 al.addSequence(s1);
122    }
123  1 System.out.println(new AppletFormatAdapter()
124    .formatSequences(FileFormat.Clustal, al, true));
125  27 for (int flnk = -1; flnk < 25; flnk++)
126    {
127  26 AlignmentI exp = AlignmentUtils.expandContext(al, flnk);
128  26 System.out.println("\nFlank size: " + flnk);
129  26 System.out.println(new AppletFormatAdapter()
130    .formatSequences(FileFormat.Clustal, exp, true));
131  26 if (flnk == -1)
132    {
133    /*
134    * Full expansion to complete sequences
135    */
136  1 for (SequenceI sq : exp.getSequences())
137    {
138  5 String ung = sq.getSequenceAsString().replaceAll("-+", "");
139  5 final String errorMsg = "Flanking sequence not the same as original dataset sequence.\n"
140    + ung + "\n"
141    + sq.getDatasetSequence().getSequenceAsString();
142  5 assertTrue(errorMsg, ung.equalsIgnoreCase(
143    sq.getDatasetSequence().getSequenceAsString()));
144    }
145    }
146  25 else if (flnk == 24)
147    {
148    /*
149    * Last sequence is fully expanded, others have leading gaps to match
150    */
151  1 assertTrue(exp.getSequenceAt(4).getSequenceAsString()
152    .startsWith("abc"));
153  1 assertTrue(exp.getSequenceAt(3).getSequenceAsString()
154    .startsWith("--abc"));
155  1 assertTrue(exp.getSequenceAt(2).getSequenceAsString()
156    .startsWith("----abc"));
157  1 assertTrue(exp.getSequenceAt(1).getSequenceAsString()
158    .startsWith("------abc"));
159  1 assertTrue(exp.getSequenceAt(0).getSequenceAsString()
160    .startsWith("--------abc"));
161    }
162    }
163    }
164   
165    /**
166    * Test that annotations are correctly adjusted by expandContext
167    */
 
168  1 toggle @Test(groups = { "Functional" })
169    public void testExpandContext_annotation()
170    {
171  1 AlignmentI al = new Alignment(new Sequence[] {});
172  1 SequenceI ds = new Sequence("Seq1", "ABCDEFGHI");
173    // subsequence DEF:
174  1 SequenceI seq1 = ds.deriveSequence().getSubSequence(3, 6);
175  1 al.addSequence(seq1);
176   
177    /*
178    * Annotate DEF with 4/5/6 respectively
179    */
180  1 Annotation[] anns = new Annotation[] { new Annotation(4),
181    new Annotation(5), new Annotation(6) };
182  1 AlignmentAnnotation ann = new AlignmentAnnotation("SS",
183    "secondary structure", anns);
184  1 seq1.addAlignmentAnnotation(ann);
185   
186    /*
187    * The annotations array should match aligned positions
188    */
189  1 assertEquals(3, ann.annotations.length);
190  1 assertEquals(4, ann.annotations[0].value, 0.001);
191  1 assertEquals(5, ann.annotations[1].value, 0.001);
192  1 assertEquals(6, ann.annotations[2].value, 0.001);
193   
194    /*
195    * Check annotation to sequence position mappings before expanding the
196    * sequence; these are set up in Sequence.addAlignmentAnnotation ->
197    * Annotation.setSequenceRef -> createSequenceMappings
198    */
199  1 assertNull(ann.getAnnotationForPosition(1));
200  1 assertNull(ann.getAnnotationForPosition(2));
201  1 assertNull(ann.getAnnotationForPosition(3));
202  1 assertEquals(4, ann.getAnnotationForPosition(4).value, 0.001);
203  1 assertEquals(5, ann.getAnnotationForPosition(5).value, 0.001);
204  1 assertEquals(6, ann.getAnnotationForPosition(6).value, 0.001);
205  1 assertNull(ann.getAnnotationForPosition(7));
206  1 assertNull(ann.getAnnotationForPosition(8));
207  1 assertNull(ann.getAnnotationForPosition(9));
208   
209    /*
210    * Expand the subsequence to the full sequence abcDEFghi
211    */
212  1 AlignmentI expanded = AlignmentUtils.expandContext(al, -1);
213  1 assertEquals("abcDEFghi",
214    expanded.getSequenceAt(0).getSequenceAsString());
215   
216    /*
217    * Confirm the alignment and sequence have the same SS annotation,
218    * referencing the expanded sequence
219    */
220  1 ann = expanded.getSequenceAt(0).getAnnotation()[0];
221  1 assertSame(ann, expanded.getAlignmentAnnotation()[0]);
222  1 assertSame(expanded.getSequenceAt(0), ann.sequenceRef);
223   
224    /*
225    * The annotations array should have null values except for annotated
226    * positions
227    */
228  1 assertNull(ann.annotations[0]);
229  1 assertNull(ann.annotations[1]);
230  1 assertNull(ann.annotations[2]);
231  1 assertEquals(4, ann.annotations[3].value, 0.001);
232  1 assertEquals(5, ann.annotations[4].value, 0.001);
233  1 assertEquals(6, ann.annotations[5].value, 0.001);
234  1 assertNull(ann.annotations[6]);
235  1 assertNull(ann.annotations[7]);
236  1 assertNull(ann.annotations[8]);
237   
238    /*
239    * sequence position mappings should be unchanged
240    */
241  1 assertNull(ann.getAnnotationForPosition(1));
242  1 assertNull(ann.getAnnotationForPosition(2));
243  1 assertNull(ann.getAnnotationForPosition(3));
244  1 assertEquals(4, ann.getAnnotationForPosition(4).value, 0.001);
245  1 assertEquals(5, ann.getAnnotationForPosition(5).value, 0.001);
246  1 assertEquals(6, ann.getAnnotationForPosition(6).value, 0.001);
247  1 assertNull(ann.getAnnotationForPosition(7));
248  1 assertNull(ann.getAnnotationForPosition(8));
249  1 assertNull(ann.getAnnotationForPosition(9));
250    }
251   
252    /**
253    * Test method that returns a map of lists of sequences by sequence name.
254    *
255    * @throws IOException
256    */
 
257  1 toggle @Test(groups = { "Functional" })
258    public void testGetSequencesByName() throws IOException
259    {
260  1 final String data = ">Seq1Name\nKQYL\n" + ">Seq2Name\nRFPW\n"
261    + ">Seq1Name\nABCD\n";
262  1 AlignmentI al = loadAlignment(data, FileFormat.Fasta);
263  1 Map<String, List<SequenceI>> map = AlignmentUtils
264    .getSequencesByName(al);
265  1 assertEquals(2, map.keySet().size());
266  1 assertEquals(2, map.get("Seq1Name").size());
267  1 assertEquals("KQYL", map.get("Seq1Name").get(0).getSequenceAsString());
268  1 assertEquals("ABCD", map.get("Seq1Name").get(1).getSequenceAsString());
269  1 assertEquals(1, map.get("Seq2Name").size());
270  1 assertEquals("RFPW", map.get("Seq2Name").get(0).getSequenceAsString());
271    }
272   
273    /**
274    * Helper method to load an alignment and ensure dataset sequences are set up.
275    *
276    * @param data
277    * @param format
278    * TODO
279    * @return
280    * @throws IOException
281    */
 
282  1 toggle protected AlignmentI loadAlignment(final String data, FileFormatI format)
283    throws IOException
284    {
285  1 AlignmentI a = new FormatAdapter().readFile(data, DataSourceType.PASTE,
286    format);
287  1 a.setDataset(null);
288  1 return a;
289    }
290   
291    /**
292    * Test mapping of protein to cDNA, for the case where we have no sequence
293    * cross-references, so mappings are made first-served 1-1 where sequences
294    * translate.
295    *
296    * @throws IOException
297    */
 
298  1 toggle @Test(groups = { "Functional" })
299    public void testMapProteinAlignmentToCdna_noXrefs() throws IOException
300    {
301  1 List<SequenceI> protseqs = new ArrayList<>();
302  1 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
303  1 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
304  1 protseqs.add(new Sequence("UNIPROT|V12347", "SAR"));
305  1 AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3]));
306  1 protein.setDataset(null);
307   
308  1 List<SequenceI> dnaseqs = new ArrayList<>();
309  1 dnaseqs.add(new Sequence("EMBL|A11111", "TCAGCACGC")); // = SAR
310  1 dnaseqs.add(new Sequence("EMBL|A22222", "GAGATACAA")); // = EIQ
311  1 dnaseqs.add(new Sequence("EMBL|A33333", "GAAATCCAG")); // = EIQ
312  1 dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG")); // = EIQ
313  1 AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[4]));
314  1 cdna.setDataset(null);
315   
316  1 assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));
317   
318    // 3 mappings made, each from 1 to 1 sequence
319  1 assertEquals(3, protein.getCodonFrames().size());
320  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
321  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
322  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size());
323   
324    // V12345 mapped to A22222
325  1 AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0))
326    .get(0);
327  1 assertEquals(1, acf.getdnaSeqs().length);
328  1 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
329    acf.getdnaSeqs()[0]);
330  1 Mapping[] protMappings = acf.getProtMappings();
331  1 assertEquals(1, protMappings.length);
332  1 MapList mapList = protMappings[0].getMap();
333  1 assertEquals(3, mapList.getFromRatio());
334  1 assertEquals(1, mapList.getToRatio());
335  1 assertTrue(
336    Arrays.equals(new int[]
337    { 1, 9 }, mapList.getFromRanges().get(0)));
338  1 assertEquals(1, mapList.getFromRanges().size());
339  1 assertTrue(
340    Arrays.equals(new int[]
341    { 1, 3 }, mapList.getToRanges().get(0)));
342  1 assertEquals(1, mapList.getToRanges().size());
343   
344    // V12346 mapped to A33333
345  1 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
346  1 assertEquals(1, acf.getdnaSeqs().length);
347  1 assertEquals(cdna.getSequenceAt(2).getDatasetSequence(),
348    acf.getdnaSeqs()[0]);
349   
350    // V12347 mapped to A11111
351  1 acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0);
352  1 assertEquals(1, acf.getdnaSeqs().length);
353  1 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
354    acf.getdnaSeqs()[0]);
355   
356    // no mapping involving the 'extra' A44444
357  1 assertTrue(protein.getCodonFrame(cdna.getSequenceAt(3)).isEmpty());
358    }
359   
360    /**
361    * Test for the alignSequenceAs method that takes two sequences and a mapping.
362    */
 
363  1 toggle @Test(groups = { "Functional" })
364    public void testAlignSequenceAs_withMapping_noIntrons()
365    {
366  1 MapList map = new MapList(new int[] { 1, 6 }, new int[] { 1, 2 }, 3, 1);
367   
368    /*
369    * No existing gaps in dna:
370    */
371  1 checkAlignSequenceAs("GGGAAA", "-A-L-", false, false, map,
372    "---GGG---AAA");
373   
374    /*
375    * Now introduce gaps in dna but ignore them when realigning.
376    */
377  1 checkAlignSequenceAs("-G-G-G-A-A-A-", "-A-L-", false, false, map,
378    "---GGG---AAA");
379   
380    /*
381    * Now include gaps in dna when realigning. First retaining 'mapped' gaps
382    * only, i.e. those within the exon region.
383    */
384  1 checkAlignSequenceAs("-G-G--G-A--A-A-", "-A-L-", true, false, map,
385    "---G-G--G---A--A-A");
386   
387    /*
388    * Include all gaps in dna when realigning (within and without the exon
389    * region). The leading gap, and the gaps between codons, are subsumed by
390    * the protein alignment gap.
391    */
392  1 checkAlignSequenceAs("-G-GG--AA-A---", "-A-L-", true, true, map,
393    "---G-GG---AA-A---");
394   
395    /*
396    * Include only unmapped gaps in dna when realigning (outside the exon
397    * region). The leading gap, and the gaps between codons, are subsumed by
398    * the protein alignment gap.
399    */
400  1 checkAlignSequenceAs("-G-GG--AA-A-", "-A-L-", false, true, map,
401    "---GGG---AAA---");
402    }
403   
404    /**
405    * Test for the alignSequenceAs method that takes two sequences and a mapping.
406    */
 
407  1 toggle @Test(groups = { "Functional" })
408    public void testAlignSequenceAs_withMapping_withIntrons()
409    {
410    /*
411    * Exons at codon 2 (AAA) and 4 (TTT)
412    */
413  1 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
414    new int[]
415    { 1, 2 }, 3, 1);
416   
417    /*
418    * Simple case: no gaps in dna
419    */
420  1 checkAlignSequenceAs("GGGAAACCCTTTGGG", "--A-L-", false, false, map,
421    "GGG---AAACCCTTTGGG");
422   
423    /*
424    * Add gaps to dna - but ignore when realigning.
425    */
426  1 checkAlignSequenceAs("-G-G-G--A--A---AC-CC-T-TT-GG-G-", "--A-L-", false,
427    false, map, "GGG---AAACCCTTTGGG");
428   
429    /*
430    * Add gaps to dna - include within exons only when realigning.
431    */
432  1 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-", true,
433    false, map, "GGG---A--A---ACCCT-TTGGG");
434   
435    /*
436    * Include gaps outside exons only when realigning.
437    */
438  1 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-",
439    false, true, map, "-G-G-GAAAC-CCTTT-GG-G-");
440   
441    /*
442    * Include gaps following first intron if we are 'preserving mapped gaps'
443    */
444  1 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-", true,
445    true, map, "-G-G-G--A--A---A-C-CC-T-TT-GG-G-");
446   
447    /*
448    * Include all gaps in dna when realigning.
449    */
450  1 checkAlignSequenceAs("-G-G-G--A--A---A-C-CC-T-TT-GG-G-", "--A-L-", true,
451    true, map, "-G-G-G--A--A---A-C-CC-T-TT-GG-G-");
452    }
453   
454    /**
455    * Test for the case where not all of the protein sequence is mapped to cDNA.
456    */
 
457  1 toggle @Test(groups = { "Functional" })
458    public void testAlignSequenceAs_withMapping_withUnmappedProtein()
459    {
460    /*
461    * Exons at codon 2 (AAA) and 4 (TTT) mapped to A and P
462    */
463  1 final MapList map = new MapList(new int[] { 4, 6, 10, 12 },
464    new int[]
465    { 1, 1, 3, 3 }, 3, 1);
466   
467    /*
468    * -L- 'aligns' ccc------
469    */
470  1 checkAlignSequenceAs("gggAAAcccTTTggg", "-A-L-P-", false, false, map,
471    "gggAAAccc------TTTggg");
472    }
473   
474    /**
475    * Helper method that performs and verifies the method under test.
476    *
477    * @param alignee
478    * the sequence to be realigned
479    * @param alignModel
480    * the sequence whose alignment is to be copied
481    * @param preserveMappedGaps
482    * @param preserveUnmappedGaps
483    * @param map
484    * @param expected
485    */
 
486  14 toggle protected void checkAlignSequenceAs(final String alignee,
487    final String alignModel, final boolean preserveMappedGaps,
488    final boolean preserveUnmappedGaps, MapList map,
489    final String expected)
490    {
491  14 SequenceI alignMe = new Sequence("Seq1", alignee);
492  14 alignMe.createDatasetSequence();
493  14 SequenceI alignFrom = new Sequence("Seq2", alignModel);
494  14 alignFrom.createDatasetSequence();
495  14 AlignedCodonFrame acf = new AlignedCodonFrame();
496  14 acf.addMap(alignMe.getDatasetSequence(), alignFrom.getDatasetSequence(),
497    map);
498   
499  14 AlignmentUtils.alignSequenceAs(alignMe, alignFrom, acf, "---", '-',
500    preserveMappedGaps, preserveUnmappedGaps);
501  14 assertEquals(expected, alignMe.getSequenceAsString());
502    }
503   
504    /**
505    * Test for the alignSequenceAs method where we preserve gaps in introns only.
506    */
 
507  1 toggle @Test(groups = { "Functional" })
508    public void testAlignSequenceAs_keepIntronGapsOnly()
509    {
510   
511    /*
512    * Intron GGGAAA followed by exon CCCTTT
513    */
514  1 MapList map = new MapList(new int[] { 7, 12 }, new int[] { 1, 2 }, 3,
515    1);
516   
517  1 checkAlignSequenceAs("GG-G-AA-A-C-CC-T-TT", "AL", false, true, map,
518    "GG-G-AA-ACCCTTT");
519    }
520   
521    /**
522    * Test the method that realigns protein to match mapped codon alignment.
523    */
 
524  1 toggle @Test(groups = { "Functional" })
525    public void testAlignProteinAsDna()
526    {
527    // seq1 codons are [1,2,3] [4,5,6] [7,8,9] [10,11,12]
528  1 SequenceI dna1 = new Sequence("Seq1", "TGCCATTACCAG-");
529    // seq2 codons are [1,3,4] [5,6,7] [8,9,10] [11,12,13]
530  1 SequenceI dna2 = new Sequence("Seq2", "T-GCCATTACCAG");
531    // seq3 codons are [1,2,3] [4,5,7] [8,9,10] [11,12,13]
532  1 SequenceI dna3 = new Sequence("Seq3", "TGCCA-TTACCAG");
533  1 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 });
534  1 dna.setDataset(null);
535   
536    // protein alignment will be realigned like dna
537  1 SequenceI prot1 = new Sequence("Seq1", "CHYQ");
538  1 SequenceI prot2 = new Sequence("Seq2", "CHYQ");
539  1 SequenceI prot3 = new Sequence("Seq3", "CHYQ");
540  1 SequenceI prot4 = new Sequence("Seq4", "R-QSV"); // unmapped, unchanged
541  1 AlignmentI protein = new Alignment(
542    new SequenceI[]
543    { prot1, prot2, prot3, prot4 });
544  1 protein.setDataset(null);
545   
546  1 MapList map = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 }, 3,
547    1);
548  1 AlignedCodonFrame acf = new AlignedCodonFrame();
549  1 acf.addMap(dna1.getDatasetSequence(), prot1.getDatasetSequence(), map);
550  1 acf.addMap(dna2.getDatasetSequence(), prot2.getDatasetSequence(), map);
551  1 acf.addMap(dna3.getDatasetSequence(), prot3.getDatasetSequence(), map);
552  1 ArrayList<AlignedCodonFrame> acfs = new ArrayList<>();
553  1 acfs.add(acf);
554  1 protein.setCodonFrames(acfs);
555   
556    /*
557    * Translated codon order is [1,2,3] [1,3,4] [4,5,6] [4,5,7] [5,6,7] [7,8,9]
558    * [8,9,10] [10,11,12] [11,12,13]
559    */
560  1 AlignmentUtils.alignProteinAsDna(protein, dna);
561  1 assertEquals("C-H--Y-Q-", prot1.getSequenceAsString());
562  1 assertEquals("-C--H-Y-Q", prot2.getSequenceAsString());
563  1 assertEquals("C--H--Y-Q", prot3.getSequenceAsString());
564  1 assertEquals("R-QSV", prot4.getSequenceAsString());
565    }
566   
567    /**
568    * Test the method that tests whether a CDNA sequence translates to a protein
569    * sequence
570    */
 
571  1 toggle @Test(groups = { "Functional" })
572    public void testTranslatesAs()
573    {
574    // null arguments check
575  1 assertFalse(AlignmentUtils.translatesAs(null, 0, null));
576  1 assertFalse(AlignmentUtils.translatesAs(new char[] { 't' }, 0, null));
577  1 assertFalse(AlignmentUtils.translatesAs(null, 0, new char[] { 'a' }));
578   
579    // straight translation
580  1 assertTrue(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), 0,
581    "FPKG".toCharArray()));
582    // with extra start codon (not in protein)
583  1 assertTrue(AlignmentUtils.translatesAs("atgtttcccaaaggg".toCharArray(),
584    3, "FPKG".toCharArray()));
585    // with stop codon1 (not in protein)
586  1 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtaa".toCharArray(),
587    0, "FPKG".toCharArray()));
588    // with stop codon1 (in protein as *)
589  1 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtaa".toCharArray(),
590    0, "FPKG*".toCharArray()));
591    // with stop codon2 (not in protein)
592  1 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtag".toCharArray(),
593    0, "FPKG".toCharArray()));
594    // with stop codon3 (not in protein)
595  1 assertTrue(AlignmentUtils.translatesAs("tttcccaaagggtga".toCharArray(),
596    0, "FPKG".toCharArray()));
597    // with start and stop codon1
598  1 assertTrue(AlignmentUtils.translatesAs(
599    "atgtttcccaaagggtaa".toCharArray(), 3, "FPKG".toCharArray()));
600    // with start and stop codon1 (in protein as *)
601  1 assertTrue(AlignmentUtils.translatesAs(
602    "atgtttcccaaagggtaa".toCharArray(), 3, "FPKG*".toCharArray()));
603    // with start and stop codon2
604  1 assertTrue(AlignmentUtils.translatesAs(
605    "atgtttcccaaagggtag".toCharArray(), 3, "FPKG".toCharArray()));
606    // with start and stop codon3
607  1 assertTrue(AlignmentUtils.translatesAs(
608    "atgtttcccaaagggtga".toCharArray(), 3, "FPKG".toCharArray()));
609   
610    // with embedded stop codons
611  1 assertTrue(AlignmentUtils.translatesAs(
612    "atgtttTAGcccaaaTAAgggtga".toCharArray(), 3,
613    "F*PK*G".toCharArray()));
614   
615    // wrong protein
616  1 assertFalse(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), 0,
617    "FPMG".toCharArray()));
618   
619    // truncated dna
620  1 assertFalse(AlignmentUtils.translatesAs("tttcccaaagg".toCharArray(), 0,
621    "FPKG".toCharArray()));
622   
623    // truncated protein
624  1 assertFalse(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), 0,
625    "FPK".toCharArray()));
626   
627    // overlong dna (doesn't end in stop codon)
628  1 assertFalse(AlignmentUtils.translatesAs("tttcccaaagggttt".toCharArray(),
629    0, "FPKG".toCharArray()));
630   
631    // dna + stop codon + more
632  1 assertFalse(AlignmentUtils.translatesAs(
633    "tttcccaaagggttaga".toCharArray(), 0, "FPKG".toCharArray()));
634   
635    // overlong protein
636  1 assertFalse(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), 0,
637    "FPKGQ".toCharArray()));
638    }
639   
640    /**
641    * Test mapping of protein to cDNA, for cases where the cDNA has start and/or
642    * stop codons in addition to the protein coding sequence.
643    *
644    * @throws IOException
645    */
 
646  1 toggle @Test(groups = { "Functional" })
647    public void testMapProteinAlignmentToCdna_withStartAndStopCodons()
648    throws IOException
649    {
650  1 List<SequenceI> protseqs = new ArrayList<>();
651  1 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
652  1 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
653  1 protseqs.add(new Sequence("UNIPROT|V12347", "SAR"));
654  1 AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3]));
655  1 protein.setDataset(null);
656   
657  1 List<SequenceI> dnaseqs = new ArrayList<>();
658    // start + SAR:
659  1 dnaseqs.add(new Sequence("EMBL|A11111", "ATGTCAGCACGC"));
660    // = EIQ + stop
661  1 dnaseqs.add(new Sequence("EMBL|A22222", "GAGATACAATAA"));
662    // = start +EIQ + stop
663  1 dnaseqs.add(new Sequence("EMBL|A33333", "ATGGAAATCCAGTAG"));
664  1 dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG"));
665  1 AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[4]));
666  1 cdna.setDataset(null);
667   
668  1 assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));
669   
670    // 3 mappings made, each from 1 to 1 sequence
671  1 assertEquals(3, protein.getCodonFrames().size());
672  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
673  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
674  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size());
675   
676    // V12345 mapped from A22222
677  1 AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0))
678    .get(0);
679  1 assertEquals(1, acf.getdnaSeqs().length);
680  1 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
681    acf.getdnaSeqs()[0]);
682  1 Mapping[] protMappings = acf.getProtMappings();
683  1 assertEquals(1, protMappings.length);
684  1 MapList mapList = protMappings[0].getMap();
685  1 assertEquals(3, mapList.getFromRatio());
686  1 assertEquals(1, mapList.getToRatio());
687  1 assertTrue(
688    Arrays.equals(new int[]
689    { 1, 9 }, mapList.getFromRanges().get(0)));
690  1 assertEquals(1, mapList.getFromRanges().size());
691  1 assertTrue(
692    Arrays.equals(new int[]
693    { 1, 3 }, mapList.getToRanges().get(0)));
694  1 assertEquals(1, mapList.getToRanges().size());
695   
696    // V12346 mapped from A33333 starting position 4
697  1 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
698  1 assertEquals(1, acf.getdnaSeqs().length);
699  1 assertEquals(cdna.getSequenceAt(2).getDatasetSequence(),
700    acf.getdnaSeqs()[0]);
701  1 protMappings = acf.getProtMappings();
702  1 assertEquals(1, protMappings.length);
703  1 mapList = protMappings[0].getMap();
704  1 assertEquals(3, mapList.getFromRatio());
705  1 assertEquals(1, mapList.getToRatio());
706  1 assertTrue(
707    Arrays.equals(new int[]
708    { 4, 12 }, mapList.getFromRanges().get(0)));
709  1 assertEquals(1, mapList.getFromRanges().size());
710  1 assertTrue(
711    Arrays.equals(new int[]
712    { 1, 3 }, mapList.getToRanges().get(0)));
713  1 assertEquals(1, mapList.getToRanges().size());
714   
715    // V12347 mapped to A11111 starting position 4
716  1 acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0);
717  1 assertEquals(1, acf.getdnaSeqs().length);
718  1 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
719    acf.getdnaSeqs()[0]);
720  1 protMappings = acf.getProtMappings();
721  1 assertEquals(1, protMappings.length);
722  1 mapList = protMappings[0].getMap();
723  1 assertEquals(3, mapList.getFromRatio());
724  1 assertEquals(1, mapList.getToRatio());
725  1 assertTrue(
726    Arrays.equals(new int[]
727    { 4, 12 }, mapList.getFromRanges().get(0)));
728  1 assertEquals(1, mapList.getFromRanges().size());
729  1 assertTrue(
730    Arrays.equals(new int[]
731    { 1, 3 }, mapList.getToRanges().get(0)));
732  1 assertEquals(1, mapList.getToRanges().size());
733   
734    // no mapping involving the 'extra' A44444
735  1 assertTrue(protein.getCodonFrame(cdna.getSequenceAt(3)).isEmpty());
736    }
737   
738    /**
739    * Test mapping of protein to cDNA, for the case where we have some sequence
740    * cross-references. Verify that 1-to-many mappings are made where
741    * cross-references exist and sequences are mappable.
742    *
743    * @throws IOException
744    */
 
745  1 toggle @Test(groups = { "Functional" })
746    public void testMapProteinAlignmentToCdna_withXrefs() throws IOException
747    {
748  1 List<SequenceI> protseqs = new ArrayList<>();
749  1 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
750  1 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
751  1 protseqs.add(new Sequence("UNIPROT|V12347", "SAR"));
752  1 AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3]));
753  1 protein.setDataset(null);
754   
755  1 List<SequenceI> dnaseqs = new ArrayList<>();
756  1 dnaseqs.add(new Sequence("EMBL|A11111", "TCAGCACGC")); // = SAR
757  1 dnaseqs.add(new Sequence("EMBL|A22222", "ATGGAGATACAA")); // = start + EIQ
758  1 dnaseqs.add(new Sequence("EMBL|A33333", "GAAATCCAG")); // = EIQ
759  1 dnaseqs.add(new Sequence("EMBL|A44444", "GAAATTCAG")); // = EIQ
760  1 dnaseqs.add(new Sequence("EMBL|A55555", "GAGATTCAG")); // = EIQ
761  1 AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[5]));
762  1 cdna.setDataset(null);
763   
764    // Xref A22222 to V12345 (should get mapped)
765  1 dnaseqs.get(1).addDBRef(new DBRefEntry("UNIPROT", "1", "V12345"));
766    // Xref V12345 to A44444 (should get mapped)
767  1 protseqs.get(0).addDBRef(new DBRefEntry("EMBL", "1", "A44444"));
768    // Xref A33333 to V12347 (sequence mismatch - should not get mapped)
769  1 dnaseqs.get(2).addDBRef(new DBRefEntry("UNIPROT", "1", "V12347"));
770    // as V12345 is mapped to A22222 and A44444, this leaves V12346 unmapped.
771    // it should get paired up with the unmapped A33333
772    // A11111 should be mapped to V12347
773    // A55555 is spare and has no xref so is not mapped
774   
775  1 assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));
776   
777    // 4 protein mappings made for 3 proteins, 2 to V12345, 1 each to V12346/7
778  1 assertEquals(3, protein.getCodonFrames().size());
779  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
780  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
781  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(2)).size());
782   
783    // one mapping for each of the first 4 cDNA sequences
784  1 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(0)).size());
785  1 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(1)).size());
786  1 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(2)).size());
787  1 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(3)).size());
788   
789    // V12345 mapped to A22222 and A44444
790  1 AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0))
791    .get(0);
792  1 assertEquals(2, acf.getdnaSeqs().length);
793  1 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
794    acf.getdnaSeqs()[0]);
795  1 assertEquals(cdna.getSequenceAt(3).getDatasetSequence(),
796    acf.getdnaSeqs()[1]);
797   
798    // V12346 mapped to A33333
799  1 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
800  1 assertEquals(1, acf.getdnaSeqs().length);
801  1 assertEquals(cdna.getSequenceAt(2).getDatasetSequence(),
802    acf.getdnaSeqs()[0]);
803   
804    // V12347 mapped to A11111
805  1 acf = protein.getCodonFrame(protein.getSequenceAt(2)).get(0);
806  1 assertEquals(1, acf.getdnaSeqs().length);
807  1 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
808    acf.getdnaSeqs()[0]);
809   
810    // no mapping involving the 'extra' A55555
811  1 assertTrue(protein.getCodonFrame(cdna.getSequenceAt(4)).isEmpty());
812    }
813   
814    /**
815    * Test mapping of protein to cDNA, for the case where we have some sequence
816    * cross-references. Verify that once we have made an xref mapping we don't
817    * also map un-xrefd sequeces.
818    *
819    * @throws IOException
820    */
 
821  1 toggle @Test(groups = { "Functional" })
822    public void testMapProteinAlignmentToCdna_prioritiseXrefs()
823    throws IOException
824    {
825  1 List<SequenceI> protseqs = new ArrayList<>();
826  1 protseqs.add(new Sequence("UNIPROT|V12345", "EIQ"));
827  1 protseqs.add(new Sequence("UNIPROT|V12346", "EIQ"));
828  1 AlignmentI protein = new Alignment(
829    protseqs.toArray(new SequenceI[protseqs.size()]));
830  1 protein.setDataset(null);
831   
832  1 List<SequenceI> dnaseqs = new ArrayList<>();
833  1 dnaseqs.add(new Sequence("EMBL|A11111", "GAAATCCAG")); // = EIQ
834  1 dnaseqs.add(new Sequence("EMBL|A22222", "GAAATTCAG")); // = EIQ
835  1 AlignmentI cdna = new Alignment(
836    dnaseqs.toArray(new SequenceI[dnaseqs.size()]));
837  1 cdna.setDataset(null);
838   
839    // Xref A22222 to V12345 (should get mapped)
840    // A11111 should then be mapped to the unmapped V12346
841  1 dnaseqs.get(1).addDBRef(new DBRefEntry("UNIPROT", "1", "V12345"));
842   
843  1 assertTrue(AlignmentUtils.mapProteinAlignmentToCdna(protein, cdna));
844   
845    // 2 protein mappings made
846  1 assertEquals(2, protein.getCodonFrames().size());
847  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).size());
848  1 assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).size());
849   
850    // one mapping for each of the cDNA sequences
851  1 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(0)).size());
852  1 assertEquals(1, protein.getCodonFrame(cdna.getSequenceAt(1)).size());
853   
854    // V12345 mapped to A22222
855  1 AlignedCodonFrame acf = protein.getCodonFrame(protein.getSequenceAt(0))
856    .get(0);
857  1 assertEquals(1, acf.getdnaSeqs().length);
858  1 assertEquals(cdna.getSequenceAt(1).getDatasetSequence(),
859    acf.getdnaSeqs()[0]);
860   
861    // V12346 mapped to A11111
862  1 acf = protein.getCodonFrame(protein.getSequenceAt(1)).get(0);
863  1 assertEquals(1, acf.getdnaSeqs().length);
864  1 assertEquals(cdna.getSequenceAt(0).getDatasetSequence(),
865    acf.getdnaSeqs()[0]);
866    }
867   
868    /**
869    * Test the method that shows or hides sequence annotations by type(s) and
870    * selection group.
871    */
 
872  1 toggle @Test(groups = { "Functional" })
873    public void testShowOrHideSequenceAnnotations()
874    {
875  1 SequenceI seq1 = new Sequence("Seq1", "AAA");
876  1 SequenceI seq2 = new Sequence("Seq2", "BBB");
877  1 SequenceI seq3 = new Sequence("Seq3", "CCC");
878  1 Annotation[] anns = new Annotation[] { new Annotation(2f) };
879  1 AlignmentAnnotation ann1 = new AlignmentAnnotation("Structure", "ann1",
880    anns);
881  1 ann1.setSequenceRef(seq1);
882  1 AlignmentAnnotation ann2 = new AlignmentAnnotation("Structure", "ann2",
883    anns);
884  1 ann2.setSequenceRef(seq2);
885  1 AlignmentAnnotation ann3 = new AlignmentAnnotation("Structure", "ann3",
886    anns);
887  1 AlignmentAnnotation ann4 = new AlignmentAnnotation("Temp", "ann4",
888    anns);
889  1 ann4.setSequenceRef(seq1);
890  1 AlignmentAnnotation ann5 = new AlignmentAnnotation("Temp", "ann5",
891    anns);
892  1 ann5.setSequenceRef(seq2);
893  1 AlignmentAnnotation ann6 = new AlignmentAnnotation("Temp", "ann6",
894    anns);
895  1 AlignmentI al = new Alignment(new SequenceI[] { seq1, seq2, seq3 });
896  1 al.addAnnotation(ann1); // Structure for Seq1
897  1 al.addAnnotation(ann2); // Structure for Seq2
898  1 al.addAnnotation(ann3); // Structure for no sequence
899  1 al.addAnnotation(ann4); // Temp for seq1
900  1 al.addAnnotation(ann5); // Temp for seq2
901  1 al.addAnnotation(ann6); // Temp for no sequence
902  1 List<String> types = new ArrayList<>();
903  1 List<SequenceI> scope = new ArrayList<>();
904   
905    /*
906    * Set all sequence related Structure to hidden (ann1, ann2)
907    */
908  1 types.add("Structure");
909  1 AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, false,
910    false);
911  1 assertFalse(ann1.visible);
912  1 assertFalse(ann2.visible);
913  1 assertTrue(ann3.visible); // not sequence-related, not affected
914  1 assertTrue(ann4.visible); // not Structure, not affected
915  1 assertTrue(ann5.visible); // "
916  1 assertTrue(ann6.visible); // not sequence-related, not affected
917   
918    /*
919    * Set Temp in {seq1, seq3} to hidden
920    */
921  1 types.clear();
922  1 types.add("Temp");
923  1 scope.add(seq1);
924  1 scope.add(seq3);
925  1 AlignmentUtils.showOrHideSequenceAnnotations(al, types, scope, false,
926    false);
927  1 assertFalse(ann1.visible); // unchanged
928  1 assertFalse(ann2.visible); // unchanged
929  1 assertTrue(ann3.visible); // not sequence-related, not affected
930  1 assertFalse(ann4.visible); // Temp for seq1 hidden
931  1 assertTrue(ann5.visible); // not in scope, not affected
932  1 assertTrue(ann6.visible); // not sequence-related, not affected
933   
934    /*
935    * Set Temp in all sequences to hidden
936    */
937  1 types.clear();
938  1 types.add("Temp");
939  1 scope.add(seq1);
940  1 scope.add(seq3);
941  1 AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, false,
942    false);
943  1 assertFalse(ann1.visible); // unchanged
944  1 assertFalse(ann2.visible); // unchanged
945  1 assertTrue(ann3.visible); // not sequence-related, not affected
946  1 assertFalse(ann4.visible); // Temp for seq1 hidden
947  1 assertFalse(ann5.visible); // Temp for seq2 hidden
948  1 assertTrue(ann6.visible); // not sequence-related, not affected
949   
950    /*
951    * Set all types in {seq1, seq3} to visible
952    */
953  1 types.clear();
954  1 scope.clear();
955  1 scope.add(seq1);
956  1 scope.add(seq3);
957  1 AlignmentUtils.showOrHideSequenceAnnotations(al, types, scope, true,
958    true);
959  1 assertTrue(ann1.visible); // Structure for seq1 set visible
960  1 assertFalse(ann2.visible); // not in scope, unchanged
961  1 assertTrue(ann3.visible); // not sequence-related, not affected
962  1 assertTrue(ann4.visible); // Temp for seq1 set visible
963  1 assertFalse(ann5.visible); // not in scope, unchanged
964  1 assertTrue(ann6.visible); // not sequence-related, not affected
965   
966    /*
967    * Set all types in all scope to hidden
968    */
969  1 AlignmentUtils.showOrHideSequenceAnnotations(al, types, null, true,
970    false);
971  1 assertFalse(ann1.visible);
972  1 assertFalse(ann2.visible);
973  1 assertTrue(ann3.visible); // not sequence-related, not affected
974  1 assertFalse(ann4.visible);
975  1 assertFalse(ann5.visible);
976  1 assertTrue(ann6.visible); // not sequence-related, not affected
977    }
978   
979    /**
980    * Tests for the method that checks if one sequence cross-references another
981    */
 
982  1 toggle @Test(groups = { "Functional" })
983    public void testHasCrossRef()
984    {
985  1 assertFalse(AlignmentUtils.hasCrossRef(null, null));
986  1 SequenceI seq1 = new Sequence("EMBL|A12345", "ABCDEF");
987  1 assertFalse(AlignmentUtils.hasCrossRef(seq1, null));
988  1 assertFalse(AlignmentUtils.hasCrossRef(null, seq1));
989  1 SequenceI seq2 = new Sequence("UNIPROT|V20192", "ABCDEF");
990  1 assertFalse(AlignmentUtils.hasCrossRef(seq1, seq2));
991   
992    // different ref
993  1 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "v20193"));
994  1 assertFalse(AlignmentUtils.hasCrossRef(seq1, seq2));
995   
996    // case-insensitive; version number is ignored
997  1 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "v20192"));
998  1 assertTrue(AlignmentUtils.hasCrossRef(seq1, seq2));
999   
1000    // right case!
1001  1 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "V20192"));
1002  1 assertTrue(AlignmentUtils.hasCrossRef(seq1, seq2));
1003    // test is one-way only
1004  1 assertFalse(AlignmentUtils.hasCrossRef(seq2, seq1));
1005    }
1006   
1007    /**
1008    * Tests for the method that checks if either sequence cross-references the
1009    * other
1010    */
 
1011  1 toggle @Test(groups = { "Functional" })
1012    public void testHaveCrossRef()
1013    {
1014  1 assertFalse(AlignmentUtils.hasCrossRef(null, null));
1015  1 SequenceI seq1 = new Sequence("EMBL|A12345", "ABCDEF");
1016  1 assertFalse(AlignmentUtils.haveCrossRef(seq1, null));
1017  1 assertFalse(AlignmentUtils.haveCrossRef(null, seq1));
1018  1 SequenceI seq2 = new Sequence("UNIPROT|V20192", "ABCDEF");
1019  1 assertFalse(AlignmentUtils.haveCrossRef(seq1, seq2));
1020   
1021  1 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "V20192"));
1022  1 assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2));
1023    // next is true for haveCrossRef, false for hasCrossRef
1024  1 assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1));
1025   
1026    // now the other way round
1027  1 seq1.setDBRefs(null);
1028  1 seq2.addDBRef(new DBRefEntry("EMBL", "1", "A12345"));
1029  1 assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2));
1030  1 assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1));
1031   
1032    // now both ways
1033  1 seq1.addDBRef(new DBRefEntry("UNIPROT", "1", "V20192"));
1034  1 assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2));
1035  1 assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1));
1036    }
1037   
1038    /**
1039    * Test the method that extracts the cds-only part of a dna alignment.
1040    */
 
1041  1 toggle @Test(groups = { "Functional" })
1042    public void testMakeCdsAlignment()
1043    {
1044    /*
1045    * scenario:
1046    * dna1 --> [4, 6] [10,12] --> pep1
1047    * dna2 --> [1, 3] [7, 9] [13,15] --> pep2
1048    */
1049  1 SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
1050  1 SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC");
1051  1 SequenceI pep1 = new Sequence("pep1", "GF");
1052  1 SequenceI pep2 = new Sequence("pep2", "GFP");
1053  1 pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "pep1"));
1054  1 pep2.addDBRef(new DBRefEntry("UNIPROT", "0", "pep2"));
1055  1 dna1.createDatasetSequence();
1056  1 dna2.createDatasetSequence();
1057  1 pep1.createDatasetSequence();
1058  1 pep2.createDatasetSequence();
1059  1 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 });
1060  1 dna.setDataset(null);
1061   
1062    /*
1063    * put a variant feature on dna2 base 8
1064    * - should transfer to cds2 base 5
1065    */
1066  1 dna2.addSequenceFeature(
1067    new SequenceFeature("variant", "hgmd", 8, 8, 0f, null));
1068   
1069    /*
1070    * need a sourceDbRef if we are to construct dbrefs to the CDS
1071    * sequence from the dna contig sequences
1072    */
1073  1 DBRefEntry dbref = new DBRefEntry("ENSEMBL", "0", "dna1");
1074  1 dna1.getDatasetSequence().addDBRef(dbref);
1075  1 org.testng.Assert.assertEquals(dbref, dna1.getPrimaryDBRefs().get(0));
1076  1 dbref = new DBRefEntry("ENSEMBL", "0", "dna2");
1077  1 dna2.getDatasetSequence().addDBRef(dbref);
1078  1 org.testng.Assert.assertEquals(dbref, dna2.getPrimaryDBRefs().get(0));
1079   
1080    /*
1081    * CDS sequences are 'discovered' from dna-to-protein mappings on the alignment
1082    * dataset (e.g. added from dbrefs by CrossRef.findXrefSequences)
1083    */
1084  1 MapList mapfordna1 = new MapList(new int[] { 4, 6, 10, 12 },
1085    new int[]
1086    { 1, 2 }, 3, 1);
1087  1 AlignedCodonFrame acf = new AlignedCodonFrame();
1088  1 acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(),
1089    mapfordna1);
1090  1 dna.addCodonFrame(acf);
1091  1 MapList mapfordna2 = new MapList(new int[] { 1, 3, 7, 9, 13, 15 },
1092    new int[]
1093    { 1, 3 }, 3, 1);
1094  1 acf = new AlignedCodonFrame();
1095  1 acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(),
1096    mapfordna2);
1097  1 dna.addCodonFrame(acf);
1098   
1099    /*
1100    * In this case, mappings originally came from matching Uniprot accessions
1101    * - so need an xref on dna involving those regions.
1102    * These are normally constructed from CDS annotation
1103    */
1104  1 DBRefEntry dna1xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep1",
1105    new Mapping(mapfordna1));
1106  1 dna1.addDBRef(dna1xref);
1107  1 assertEquals(2, dna1.getDBRefs().size()); // to self and to pep1
1108  1 DBRefEntry dna2xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep2",
1109    new Mapping(mapfordna2));
1110  1 dna2.addDBRef(dna2xref);
1111  1 assertEquals(2, dna2.getDBRefs().size()); // to self and to pep2
1112   
1113    /*
1114    * execute method under test:
1115    */
1116  1 AlignmentI cds = AlignmentUtils
1117    .makeCdsAlignment(new SequenceI[]
1118    { dna1, dna2 }, dna.getDataset(), null);
1119   
1120    /*
1121    * verify cds sequences
1122    */
1123  1 assertEquals(2, cds.getSequences().size());
1124  1 assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString());
1125  1 assertEquals("GGGTTTCCC", cds.getSequenceAt(1).getSequenceAsString());
1126   
1127    /*
1128    * verify shared, extended alignment dataset
1129    */
1130  1 assertSame(dna.getDataset(), cds.getDataset());
1131  1 SequenceI cds1Dss = cds.getSequenceAt(0).getDatasetSequence();
1132  1 SequenceI cds2Dss = cds.getSequenceAt(1).getDatasetSequence();
1133  1 assertTrue(dna.getDataset().getSequences().contains(cds1Dss));
1134  1 assertTrue(dna.getDataset().getSequences().contains(cds2Dss));
1135   
1136    /*
1137    * verify CDS has a dbref with mapping to peptide
1138    */
1139  1 assertNotNull(cds1Dss.getDBRefs());
1140  1 assertEquals(2, cds1Dss.getDBRefs().size());
1141  1 dbref = cds1Dss.getDBRefs().get(0);
1142  1 assertEquals(dna1xref.getSource(), dbref.getSource());
1143    // version is via ensembl's primary ref
1144  1 assertEquals(dna1xref.getVersion(), dbref.getVersion());
1145  1 assertEquals(dna1xref.getAccessionId(), dbref.getAccessionId());
1146  1 assertNotNull(dbref.getMap());
1147  1 assertSame(pep1.getDatasetSequence(), dbref.getMap().getTo());
1148  1 MapList cdsMapping = new MapList(new int[] { 1, 6 }, new int[] { 1, 2 },
1149    3, 1);
1150  1 assertEquals(cdsMapping, dbref.getMap().getMap());
1151   
1152    /*
1153    * verify peptide has added a dbref with reverse mapping to CDS
1154    */
1155  1 assertNotNull(pep1.getDBRefs());
1156    // FIXME pep1.getDBRefs() is 1 - is that the correct behaviour ?
1157  1 assertEquals(2, pep1.getDBRefs().size());
1158  1 dbref = pep1.getDBRefs().get(1);
1159  1 assertEquals("ENSEMBL", dbref.getSource());
1160  1 assertEquals("0", dbref.getVersion());
1161  1 assertEquals("CDS|dna1", dbref.getAccessionId());
1162  1 assertNotNull(dbref.getMap());
1163  1 assertSame(cds1Dss, dbref.getMap().getTo());
1164  1 assertEquals(cdsMapping.getInverse(), dbref.getMap().getMap());
1165   
1166    /*
1167    * verify cDNA has added a dbref with mapping to CDS
1168    */
1169  1 assertEquals(3, dna1.getDBRefs().size());
1170  1 DBRefEntry dbRefEntry = dna1.getDBRefs().get(2);
1171  1 assertSame(cds1Dss, dbRefEntry.getMap().getTo());
1172  1 MapList dnaToCdsMapping = new MapList(new int[] { 4, 6, 10, 12 },
1173    new int[]
1174    { 1, 6 }, 1, 1);
1175  1 assertEquals(dnaToCdsMapping, dbRefEntry.getMap().getMap());
1176  1 assertEquals(3, dna2.getDBRefs().size());
1177  1 dbRefEntry = dna2.getDBRefs().get(2);
1178  1 assertSame(cds2Dss, dbRefEntry.getMap().getTo());
1179  1 dnaToCdsMapping = new MapList(new int[] { 1, 3, 7, 9, 13, 15 },
1180    new int[]
1181    { 1, 9 }, 1, 1);
1182  1 assertEquals(dnaToCdsMapping, dbRefEntry.getMap().getMap());
1183   
1184    /*
1185    * verify CDS has added a dbref with mapping to cDNA
1186    */
1187  1 assertEquals(2, cds1Dss.getDBRefs().size());
1188  1 dbRefEntry = cds1Dss.getDBRefs().get(1);
1189  1 assertSame(dna1.getDatasetSequence(), dbRefEntry.getMap().getTo());
1190  1 MapList cdsToDnaMapping = new MapList(new int[] { 1, 6 },
1191    new int[]
1192    { 4, 6, 10, 12 }, 1, 1);
1193  1 assertEquals(cdsToDnaMapping, dbRefEntry.getMap().getMap());
1194  1 assertEquals(2, cds2Dss.getDBRefs().size());
1195  1 dbRefEntry = cds2Dss.getDBRefs().get(1);
1196  1 assertSame(dna2.getDatasetSequence(), dbRefEntry.getMap().getTo());
1197  1 cdsToDnaMapping = new MapList(new int[] { 1, 9 },
1198    new int[]
1199    { 1, 3, 7, 9, 13, 15 }, 1, 1);
1200  1 assertEquals(cdsToDnaMapping, dbRefEntry.getMap().getMap());
1201   
1202    /*
1203    * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide
1204    * the mappings are on the shared alignment dataset
1205    * 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep)
1206    */
1207  1 List<AlignedCodonFrame> cdsMappings = cds.getDataset().getCodonFrames();
1208  1 assertEquals(6, cdsMappings.size());
1209   
1210    /*
1211    * verify that mapping sets for dna and cds alignments are different
1212    * [not current behaviour - all mappings are on the alignment dataset]
1213    */
1214    // select -> subselect type to test.
1215    // Assert.assertNotSame(dna.getCodonFrames(), cds.getCodonFrames());
1216    // assertEquals(4, dna.getCodonFrames().size());
1217    // assertEquals(4, cds.getCodonFrames().size());
1218   
1219    /*
1220    * Two mappings involve pep1 (dna to pep1, cds to pep1)
1221    * Mapping from pep1 to GGGTTT in first new exon sequence
1222    */
1223  1 List<AlignedCodonFrame> pep1Mappings = MappingUtils
1224    .findMappingsForSequence(pep1, cdsMappings);
1225  1 assertEquals(2, pep1Mappings.size());
1226  1 List<AlignedCodonFrame> mappings = MappingUtils
1227    .findMappingsForSequence(cds.getSequenceAt(0), pep1Mappings);
1228  1 assertEquals(1, mappings.size());
1229   
1230    // map G to GGG
1231  1 SearchResultsI sr = MappingUtils.buildSearchResults(pep1, 1, mappings);
1232  1 assertEquals(1, sr.getResults().size());
1233  1 SearchResultMatchI m = sr.getResults().get(0);
1234  1 assertSame(cds1Dss, m.getSequence());
1235  1 assertEquals(1, m.getStart());
1236  1 assertEquals(3, m.getEnd());
1237    // map F to TTT
1238  1 sr = MappingUtils.buildSearchResults(pep1, 2, mappings);
1239  1 m = sr.getResults().get(0);
1240  1 assertSame(cds1Dss, m.getSequence());
1241  1 assertEquals(4, m.getStart());
1242  1 assertEquals(6, m.getEnd());
1243   
1244    /*
1245    * Two mappings involve pep2 (dna to pep2, cds to pep2)
1246    * Verify mapping from pep2 to GGGTTTCCC in second new exon sequence
1247    */
1248  1 List<AlignedCodonFrame> pep2Mappings = MappingUtils
1249    .findMappingsForSequence(pep2, cdsMappings);
1250  1 assertEquals(2, pep2Mappings.size());
1251  1 mappings = MappingUtils.findMappingsForSequence(cds.getSequenceAt(1),
1252    pep2Mappings);
1253  1 assertEquals(1, mappings.size());
1254    // map G to GGG
1255  1 sr = MappingUtils.buildSearchResults(pep2, 1, mappings);
1256  1 assertEquals(1, sr.getResults().size());
1257  1 m = sr.getResults().get(0);
1258  1 assertSame(cds2Dss, m.getSequence());
1259  1 assertEquals(1, m.getStart());
1260  1 assertEquals(3, m.getEnd());
1261    // map F to TTT
1262  1 sr = MappingUtils.buildSearchResults(pep2, 2, mappings);
1263  1 m = sr.getResults().get(0);
1264  1 assertSame(cds2Dss, m.getSequence());
1265  1 assertEquals(4, m.getStart());
1266  1 assertEquals(6, m.getEnd());
1267    // map P to CCC
1268  1 sr = MappingUtils.buildSearchResults(pep2, 3, mappings);
1269  1 m = sr.getResults().get(0);
1270  1 assertSame(cds2Dss, m.getSequence());
1271  1 assertEquals(7, m.getStart());
1272  1 assertEquals(9, m.getEnd());
1273   
1274    /*
1275    * check cds2 acquired a variant feature in position 5
1276    */
1277  1 List<SequenceFeature> sfs = cds2Dss.getSequenceFeatures();
1278  1 assertNotNull(sfs);
1279  1 assertEquals(1, sfs.size());
1280  1 assertEquals("variant", sfs.get(0).type);
1281  1 assertEquals(5, sfs.get(0).begin);
1282  1 assertEquals(5, sfs.get(0).end);
1283    }
1284   
1285    /**
1286    * Test the method that makes a cds-only alignment from a DNA sequence and its
1287    * product mappings, for the case where there are multiple exon mappings to
1288    * different protein products.
1289    */
 
1290  1 toggle @Test(groups = { "Functional" })
1291    public void testMakeCdsAlignment_multipleProteins()
1292    {
1293  1 SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
1294  1 SequenceI pep1 = new Sequence("pep1", "GF"); // GGGTTT
1295  1 SequenceI pep2 = new Sequence("pep2", "KP"); // aaaccc
1296  1 SequenceI pep3 = new Sequence("pep3", "KF"); // aaaTTT
1297  1 dna1.createDatasetSequence();
1298  1 pep1.createDatasetSequence();
1299  1 pep2.createDatasetSequence();
1300  1 pep3.createDatasetSequence();
1301  1 pep1.getDatasetSequence()
1302    .addDBRef(new DBRefEntry("EMBLCDS", "2", "A12345"));
1303  1 pep2.getDatasetSequence()
1304    .addDBRef(new DBRefEntry("EMBLCDS", "3", "A12346"));
1305  1 pep3.getDatasetSequence()
1306    .addDBRef(new DBRefEntry("EMBLCDS", "4", "A12347"));
1307   
1308    /*
1309    * Create the CDS alignment
1310    */
1311  1 AlignmentI dna = new Alignment(new SequenceI[] { dna1 });
1312  1 dna.setDataset(null);
1313   
1314    /*
1315    * Make the mappings from dna to protein
1316    */
1317    // map ...GGG...TTT to GF
1318  1 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
1319    new int[]
1320    { 1, 2 }, 3, 1);
1321  1 AlignedCodonFrame acf = new AlignedCodonFrame();
1322  1 acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
1323  1 dna.addCodonFrame(acf);
1324   
1325    // map aaa...ccc to KP
1326  1 map = new MapList(new int[] { 1, 3, 7, 9 }, new int[] { 1, 2 }, 3, 1);
1327  1 acf = new AlignedCodonFrame();
1328  1 acf.addMap(dna1.getDatasetSequence(), pep2.getDatasetSequence(), map);
1329  1 dna.addCodonFrame(acf);
1330   
1331    // map aaa......TTT to KF
1332  1 map = new MapList(new int[] { 1, 3, 10, 12 }, new int[] { 1, 2 }, 3, 1);
1333  1 acf = new AlignedCodonFrame();
1334  1 acf.addMap(dna1.getDatasetSequence(), pep3.getDatasetSequence(), map);
1335  1 dna.addCodonFrame(acf);
1336   
1337    /*
1338    * execute method under test
1339    */
1340  1 AlignmentI cdsal = AlignmentUtils
1341    .makeCdsAlignment(new SequenceI[]
1342    { dna1 }, dna.getDataset(), null);
1343   
1344    /*
1345    * Verify we have 3 cds sequences, mapped to pep1/2/3 respectively
1346    */
1347  1 List<SequenceI> cds = cdsal.getSequences();
1348  1 assertEquals(3, cds.size());
1349   
1350    /*
1351    * verify shared, extended alignment dataset
1352    */
1353  1 assertSame(cdsal.getDataset(), dna.getDataset());
1354  1 assertTrue(dna.getDataset().getSequences()
1355    .contains(cds.get(0).getDatasetSequence()));
1356  1 assertTrue(dna.getDataset().getSequences()
1357    .contains(cds.get(1).getDatasetSequence()));
1358  1 assertTrue(dna.getDataset().getSequences()
1359    .contains(cds.get(2).getDatasetSequence()));
1360   
1361    /*
1362    * verify aligned cds sequences and their xrefs
1363    */
1364  1 SequenceI cdsSeq = cds.get(0);
1365  1 assertEquals("GGGTTT", cdsSeq.getSequenceAsString());
1366    // assertEquals("dna1|A12345", cdsSeq.getName());
1367  1 assertEquals("CDS|dna1", cdsSeq.getName());
1368    // assertEquals(1, cdsSeq.getDBRefs().length);
1369    // DBRefEntry cdsRef = cdsSeq.getDBRefs()[0];
1370    // assertEquals("EMBLCDS", cdsRef.getSource());
1371    // assertEquals("2", cdsRef.getVersion());
1372    // assertEquals("A12345", cdsRef.getAccessionId());
1373   
1374  1 cdsSeq = cds.get(1);
1375  1 assertEquals("aaaccc", cdsSeq.getSequenceAsString());
1376    // assertEquals("dna1|A12346", cdsSeq.getName());
1377  1 assertEquals("CDS|dna1", cdsSeq.getName());
1378    // assertEquals(1, cdsSeq.getDBRefs().length);
1379    // cdsRef = cdsSeq.getDBRefs()[0];
1380    // assertEquals("EMBLCDS", cdsRef.getSource());
1381    // assertEquals("3", cdsRef.getVersion());
1382    // assertEquals("A12346", cdsRef.getAccessionId());
1383   
1384  1 cdsSeq = cds.get(2);
1385  1 assertEquals("aaaTTT", cdsSeq.getSequenceAsString());
1386    // assertEquals("dna1|A12347", cdsSeq.getName());
1387  1 assertEquals("CDS|dna1", cdsSeq.getName());
1388    // assertEquals(1, cdsSeq.getDBRefs().length);
1389    // cdsRef = cdsSeq.getDBRefs()[0];
1390    // assertEquals("EMBLCDS", cdsRef.getSource());
1391    // assertEquals("4", cdsRef.getVersion());
1392    // assertEquals("A12347", cdsRef.getAccessionId());
1393   
1394    /*
1395    * Verify there are mappings from each cds sequence to its protein product
1396    * and also to its dna source
1397    */
1398  1 List<AlignedCodonFrame> newMappings = cdsal.getCodonFrames();
1399   
1400    /*
1401    * 6 mappings involve dna1 (to pep1/2/3, cds1/2/3)
1402    */
1403  1 List<AlignedCodonFrame> dnaMappings = MappingUtils
1404    .findMappingsForSequence(dna1, newMappings);
1405  1 assertEquals(6, dnaMappings.size());
1406   
1407    /*
1408    * dna1 to pep1
1409    */
1410  1 List<AlignedCodonFrame> mappings = MappingUtils
1411    .findMappingsForSequence(pep1, dnaMappings);
1412  1 assertEquals(1, mappings.size());
1413  1 assertEquals(1, mappings.get(0).getMappings().size());
1414  1 assertSame(pep1.getDatasetSequence(),
1415    mappings.get(0).getMappings().get(0).getMapping().getTo());
1416   
1417    /*
1418    * dna1 to cds1
1419    */
1420  1 List<AlignedCodonFrame> dnaToCds1Mappings = MappingUtils
1421    .findMappingsForSequence(cds.get(0), dnaMappings);
1422  1 Mapping mapping = dnaToCds1Mappings.get(0).getMappings().get(0)
1423    .getMapping();
1424  1 assertSame(cds.get(0).getDatasetSequence(), mapping.getTo());
1425  1 assertEquals("G(1) in CDS should map to G(4) in DNA", 4,
1426    mapping.getMap().getToPosition(1));
1427   
1428    /*
1429    * dna1 to pep2
1430    */
1431  1 mappings = MappingUtils.findMappingsForSequence(pep2, dnaMappings);
1432  1 assertEquals(1, mappings.size());
1433  1 assertEquals(1, mappings.get(0).getMappings().size());
1434  1 assertSame(pep2.getDatasetSequence(),
1435    mappings.get(0).getMappings().get(0).getMapping().getTo());
1436   
1437    /*
1438    * dna1 to cds2
1439    */
1440  1 List<AlignedCodonFrame> dnaToCds2Mappings = MappingUtils
1441    .findMappingsForSequence(cds.get(1), dnaMappings);
1442  1 mapping = dnaToCds2Mappings.get(0).getMappings().get(0).getMapping();
1443  1 assertSame(cds.get(1).getDatasetSequence(), mapping.getTo());
1444  1 assertEquals("c(4) in CDS should map to c(7) in DNA", 7,
1445    mapping.getMap().getToPosition(4));
1446   
1447    /*
1448    * dna1 to pep3
1449    */
1450  1 mappings = MappingUtils.findMappingsForSequence(pep3, dnaMappings);
1451  1 assertEquals(1, mappings.size());
1452  1 assertEquals(1, mappings.get(0).getMappings().size());
1453  1 assertSame(pep3.getDatasetSequence(),
1454    mappings.get(0).getMappings().get(0).getMapping().getTo());
1455   
1456    /*
1457    * dna1 to cds3
1458    */
1459  1 List<AlignedCodonFrame> dnaToCds3Mappings = MappingUtils
1460    .findMappingsForSequence(cds.get(2), dnaMappings);
1461  1 mapping = dnaToCds3Mappings.get(0).getMappings().get(0).getMapping();
1462  1 assertSame(cds.get(2).getDatasetSequence(), mapping.getTo());
1463  1 assertEquals("T(4) in CDS should map to T(10) in DNA", 10,
1464    mapping.getMap().getToPosition(4));
1465    }
1466   
 
1467  1 toggle @Test(groups = { "Functional" })
1468    public void testIsMappable()
1469    {
1470  1 SequenceI dna1 = new Sequence("dna1", "cgCAGtgGT");
1471  1 SequenceI aa1 = new Sequence("aa1", "RSG");
1472  1 AlignmentI al1 = new Alignment(new SequenceI[] { dna1 });
1473  1 AlignmentI al2 = new Alignment(new SequenceI[] { aa1 });
1474   
1475  1 assertFalse(AlignmentUtils.isMappable(null, null));
1476  1 assertFalse(AlignmentUtils.isMappable(al1, null));
1477  1 assertFalse(AlignmentUtils.isMappable(null, al1));
1478  1 assertFalse(AlignmentUtils.isMappable(al1, al1));
1479  1 assertFalse(AlignmentUtils.isMappable(al2, al2));
1480   
1481  1 assertTrue(AlignmentUtils.isMappable(al1, al2));
1482  1 assertTrue(AlignmentUtils.isMappable(al2, al1));
1483    }
1484   
1485    /**
1486    * Test creating a mapping when the sequences involved do not start at residue
1487    * 1
1488    *
1489    * @throws IOException
1490    */
 
1491  1 toggle @Test(groups = { "Functional" })
1492    public void testMapCdnaToProtein_forSubsequence() throws IOException
1493    {
1494  1 SequenceI prot = new Sequence("UNIPROT|V12345", "E-I--Q", 10, 12);
1495  1 prot.createDatasetSequence();
1496   
1497  1 SequenceI dna = new Sequence("EMBL|A33333", "GAA--AT-C-CAG", 40, 48);
1498  1 dna.createDatasetSequence();
1499   
1500  1 MapList map = AlignmentUtils.mapCdnaToProtein(prot, dna);
1501  1 assertEquals(10, map.getToLowest());
1502  1 assertEquals(12, map.getToHighest());
1503  1 assertEquals(40, map.getFromLowest());
1504  1 assertEquals(48, map.getFromHighest());
1505    }
1506   
1507    /**
1508    * Test for the alignSequenceAs method where we have protein mapped to protein
1509    */
 
1510  1 toggle @Test(groups = { "Functional" })
1511    public void testAlignSequenceAs_mappedProteinProtein()
1512    {
1513   
1514  1 SequenceI alignMe = new Sequence("Match", "MGAASEV");
1515  1 alignMe.createDatasetSequence();
1516  1 SequenceI alignFrom = new Sequence("Query", "LQTGYMGAASEVMFSPTRR");
1517  1 alignFrom.createDatasetSequence();
1518   
1519  1 AlignedCodonFrame acf = new AlignedCodonFrame();
1520    // this is like a domain or motif match of part of a peptide sequence
1521  1 MapList map = new MapList(new int[] { 6, 12 }, new int[] { 1, 7 }, 1,
1522    1);
1523  1 acf.addMap(alignFrom.getDatasetSequence(), alignMe.getDatasetSequence(),
1524    map);
1525   
1526  1 AlignmentUtils.alignSequenceAs(alignMe, alignFrom, acf, "-", '-', true,
1527    true);
1528  1 assertEquals("-----MGAASEV-------", alignMe.getSequenceAsString());
1529    }
1530   
1531    /**
1532    * Test for the alignSequenceAs method where there are trailing unmapped
1533    * residues in the model sequence
1534    */
 
1535  1 toggle @Test(groups = { "Functional" })
1536    public void testAlignSequenceAs_withTrailingPeptide()
1537    {
1538    // map first 3 codons to KPF; G is a trailing unmapped residue
1539  1 MapList map = new MapList(new int[] { 1, 9 }, new int[] { 1, 3 }, 3, 1);
1540   
1541  1 checkAlignSequenceAs("AAACCCTTT", "K-PFG", true, true, map,
1542    "AAA---CCCTTT---");
1543    }
1544   
1545    /**
1546    * Tests for transferring features between mapped sequences
1547    */
 
1548  1 toggle @Test(groups = { "Functional" })
1549    public void testTransferFeatures()
1550    {
1551  1 SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt");
1552  1 SequenceI cds = new Sequence("cds/10-15", "TAGGCC");
1553   
1554    // no overlap
1555  1 dna.addSequenceFeature(
1556    new SequenceFeature("type1", "desc1", 1, 2, 1f, null));
1557    // partial overlap - to [1, 1]
1558  1 dna.addSequenceFeature(
1559    new SequenceFeature("type2", "desc2", 3, 4, 2f, null));
1560    // exact overlap - to [1, 3]
1561  1 dna.addSequenceFeature(
1562    new SequenceFeature("type3", "desc3", 4, 6, 3f, null));
1563    // spanning overlap - to [2, 5]
1564  1 dna.addSequenceFeature(
1565    new SequenceFeature("type4", "desc4", 5, 11, 4f, null));
1566    // exactly overlaps whole mapped range [1, 6]
1567  1 dna.addSequenceFeature(
1568    new SequenceFeature("type5", "desc5", 4, 12, 5f, null));
1569    // no overlap (internal)
1570  1 dna.addSequenceFeature(
1571    new SequenceFeature("type6", "desc6", 7, 9, 6f, null));
1572    // no overlap (3' end)
1573  1 dna.addSequenceFeature(
1574    new SequenceFeature("type7", "desc7", 13, 15, 7f, null));
1575    // overlap (3' end) - to [6, 6]
1576  1 dna.addSequenceFeature(
1577    new SequenceFeature("type8", "desc8", 12, 12, 8f, null));
1578    // extended overlap - to [6, +]
1579  1 dna.addSequenceFeature(
1580    new SequenceFeature("type9", "desc9", 12, 13, 9f, null));
1581   
1582  1 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
1583    new int[]
1584    { 1, 6 }, 1, 1);
1585   
1586    /*
1587    * transferFeatures() will build 'partial overlap' for regions
1588    * that partially overlap 5' or 3' (start or end) of target sequence
1589    */
1590  1 AlignmentUtils.transferFeatures(dna, cds, map, null);
1591  1 List<SequenceFeature> sfs = cds.getSequenceFeatures();
1592  1 assertEquals(6, sfs.size());
1593   
1594  1 SequenceFeature sf = sfs.get(0);
1595  1 assertEquals("type2", sf.getType());
1596  1 assertEquals("desc2", sf.getDescription());
1597  1 assertEquals(2f, sf.getScore());
1598  1 assertEquals(1, sf.getBegin());
1599  1 assertEquals(1, sf.getEnd());
1600   
1601  1 sf = sfs.get(1);
1602  1 assertEquals("type3", sf.getType());
1603  1 assertEquals("desc3", sf.getDescription());
1604  1 assertEquals(3f, sf.getScore());
1605  1 assertEquals(1, sf.getBegin());
1606  1 assertEquals(3, sf.getEnd());
1607   
1608  1 sf = sfs.get(2);
1609  1 assertEquals("type4", sf.getType());
1610  1 assertEquals(2, sf.getBegin());
1611  1 assertEquals(5, sf.getEnd());
1612   
1613  1 sf = sfs.get(3);
1614  1 assertEquals("type5", sf.getType());
1615  1 assertEquals(1, sf.getBegin());
1616  1 assertEquals(6, sf.getEnd());
1617   
1618  1 sf = sfs.get(4);
1619  1 assertEquals("type8", sf.getType());
1620  1 assertEquals(6, sf.getBegin());
1621  1 assertEquals(6, sf.getEnd());
1622   
1623  1 sf = sfs.get(5);
1624  1 assertEquals("type9", sf.getType());
1625  1 assertEquals(6, sf.getBegin());
1626  1 assertEquals(6, sf.getEnd());
1627    }
1628   
1629    /**
1630    * Tests for transferring features between mapped sequences
1631    */
 
1632  1 toggle @Test(groups = { "Functional" })
1633    public void testTransferFeatures_withOmit()
1634    {
1635  1 SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt");
1636  1 SequenceI cds = new Sequence("cds/10-15", "TAGGCC");
1637   
1638  1 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
1639    new int[]
1640    { 1, 6 }, 1, 1);
1641   
1642    // [5, 11] maps to [2, 5]
1643  1 dna.addSequenceFeature(
1644    new SequenceFeature("type4", "desc4", 5, 11, 4f, null));
1645    // [4, 12] maps to [1, 6]
1646  1 dna.addSequenceFeature(
1647    new SequenceFeature("type5", "desc5", 4, 12, 5f, null));
1648    // [12, 12] maps to [6, 6]
1649  1 dna.addSequenceFeature(
1650    new SequenceFeature("type8", "desc8", 12, 12, 8f, null));
1651   
1652    // desc4 and desc8 are the 'omit these' varargs
1653  1 AlignmentUtils.transferFeatures(dna, cds, map, null, "type4", "type8");
1654  1 List<SequenceFeature> sfs = cds.getSequenceFeatures();
1655  1 assertEquals(1, sfs.size());
1656   
1657  1 SequenceFeature sf = sfs.get(0);
1658  1 assertEquals("type5", sf.getType());
1659  1 assertEquals(1, sf.getBegin());
1660  1 assertEquals(6, sf.getEnd());
1661    }
1662   
1663    /**
1664    * Tests for transferring features between mapped sequences
1665    */
 
1666  1 toggle @Test(groups = { "Functional" })
1667    public void testTransferFeatures_withSelect()
1668    {
1669  1 SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt");
1670  1 SequenceI cds = new Sequence("cds/10-15", "TAGGCC");
1671   
1672  1 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
1673    new int[]
1674    { 1, 6 }, 1, 1);
1675   
1676    // [5, 11] maps to [2, 5]
1677  1 dna.addSequenceFeature(
1678    new SequenceFeature("type4", "desc4", 5, 11, 4f, null));
1679    // [4, 12] maps to [1, 6]
1680  1 dna.addSequenceFeature(
1681    new SequenceFeature("type5", "desc5", 4, 12, 5f, null));
1682    // [12, 12] maps to [6, 6]
1683  1 dna.addSequenceFeature(
1684    new SequenceFeature("type8", "desc8", 12, 12, 8f, null));
1685   
1686    // "type5" is the 'select this type' argument
1687  1 AlignmentUtils.transferFeatures(dna, cds, map, "type5");
1688  1 List<SequenceFeature> sfs = cds.getSequenceFeatures();
1689  1 assertEquals(1, sfs.size());
1690   
1691  1 SequenceFeature sf = sfs.get(0);
1692  1 assertEquals("type5", sf.getType());
1693  1 assertEquals(1, sf.getBegin());
1694  1 assertEquals(6, sf.getEnd());
1695    }
1696   
1697    /**
1698    * Test the method that extracts the cds-only part of a dna alignment, for the
1699    * case where the cds should be aligned to match its nucleotide sequence.
1700    */
 
1701  1 toggle @Test(groups = { "Functional" })
1702    public void testMakeCdsAlignment_alternativeTranscripts()
1703    {
1704  1 SequenceI dna1 = new Sequence("dna1", "aaaGGGCC-----CTTTaaaGGG");
1705    // alternative transcript of same dna skips CCC codon
1706  1 SequenceI dna2 = new Sequence("dna2", "aaaGGGCC-----cttTaaaGGG");
1707    // dna3 has no mapping (protein product) so should be ignored here
1708  1 SequenceI dna3 = new Sequence("dna3", "aaaGGGCCCCCGGGcttTaaaGGG");
1709  1 SequenceI pep1 = new Sequence("pep1", "GPFG");
1710  1 SequenceI pep2 = new Sequence("pep2", "GPG");
1711  1 dna1.createDatasetSequence();
1712  1 dna2.createDatasetSequence();
1713  1 dna3.createDatasetSequence();
1714  1 pep1.createDatasetSequence();
1715  1 pep2.createDatasetSequence();
1716   
1717  1 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 });
1718  1 dna.setDataset(null);
1719   
1720  1 MapList map = new MapList(new int[] { 4, 12, 16, 18 },
1721    new int[]
1722    { 1, 4 }, 3, 1);
1723  1 AlignedCodonFrame acf = new AlignedCodonFrame();
1724  1 acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
1725  1 dna.addCodonFrame(acf);
1726  1 map = new MapList(new int[] { 4, 8, 12, 12, 16, 18 },
1727    new int[]
1728    { 1, 3 }, 3, 1);
1729  1 acf = new AlignedCodonFrame();
1730  1 acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
1731  1 dna.addCodonFrame(acf);
1732   
1733  1 AlignmentI cds = AlignmentUtils
1734    .makeCdsAlignment(new SequenceI[]
1735    { dna1, dna2, dna3 }, dna.getDataset(), null);
1736  1 List<SequenceI> cdsSeqs = cds.getSequences();
1737  1 assertEquals(2, cdsSeqs.size());
1738  1 assertEquals("GGGCCCTTTGGG", cdsSeqs.get(0).getSequenceAsString());
1739  1 assertEquals("GGGCCTGGG", cdsSeqs.get(1).getSequenceAsString());
1740   
1741    /*
1742    * verify shared, extended alignment dataset
1743    */
1744  1 assertSame(dna.getDataset(), cds.getDataset());
1745  1 assertTrue(dna.getDataset().getSequences()
1746    .contains(cdsSeqs.get(0).getDatasetSequence()));
1747  1 assertTrue(dna.getDataset().getSequences()
1748    .contains(cdsSeqs.get(1).getDatasetSequence()));
1749   
1750    /*
1751    * Verify 6 mappings: dna1 to cds1, cds1 to pep1, dna1 to pep1
1752    * and the same for dna2/cds2/pep2
1753    */
1754  1 List<AlignedCodonFrame> mappings = cds.getCodonFrames();
1755  1 assertEquals(6, mappings.size());
1756   
1757    /*
1758    * 2 mappings involve pep1
1759    */
1760  1 List<AlignedCodonFrame> pep1Mappings = MappingUtils
1761    .findMappingsForSequence(pep1, mappings);
1762  1 assertEquals(2, pep1Mappings.size());
1763   
1764    /*
1765    * Get mapping of pep1 to cds1 and verify it
1766    * maps GPFG to 1-3,4-6,7-9,10-12
1767    */
1768  1 List<AlignedCodonFrame> pep1CdsMappings = MappingUtils
1769    .findMappingsForSequence(cds.getSequenceAt(0), pep1Mappings);
1770  1 assertEquals(1, pep1CdsMappings.size());
1771  1 SearchResultsI sr = MappingUtils.buildSearchResults(pep1, 1,
1772    pep1CdsMappings);
1773  1 assertEquals(1, sr.getResults().size());
1774  1 SearchResultMatchI m = sr.getResults().get(0);
1775  1 assertEquals(cds.getSequenceAt(0).getDatasetSequence(),
1776    m.getSequence());
1777  1 assertEquals(1, m.getStart());
1778  1 assertEquals(3, m.getEnd());
1779  1 sr = MappingUtils.buildSearchResults(pep1, 2, pep1CdsMappings);
1780  1 m = sr.getResults().get(0);
1781  1 assertEquals(4, m.getStart());
1782  1 assertEquals(6, m.getEnd());
1783  1 sr = MappingUtils.buildSearchResults(pep1, 3, pep1CdsMappings);
1784  1 m = sr.getResults().get(0);
1785  1 assertEquals(7, m.getStart());
1786  1 assertEquals(9, m.getEnd());
1787  1 sr = MappingUtils.buildSearchResults(pep1, 4, pep1CdsMappings);
1788  1 m = sr.getResults().get(0);
1789  1 assertEquals(10, m.getStart());
1790  1 assertEquals(12, m.getEnd());
1791   
1792    /*
1793    * Get mapping of pep2 to cds2 and verify it
1794    * maps GPG in pep2 to 1-3,4-6,7-9 in second CDS sequence
1795    */
1796  1 List<AlignedCodonFrame> pep2Mappings = MappingUtils
1797    .findMappingsForSequence(pep2, mappings);
1798  1 assertEquals(2, pep2Mappings.size());
1799  1 List<AlignedCodonFrame> pep2CdsMappings = MappingUtils
1800    .findMappingsForSequence(cds.getSequenceAt(1), pep2Mappings);
1801  1 assertEquals(1, pep2CdsMappings.size());
1802  1 sr = MappingUtils.buildSearchResults(pep2, 1, pep2CdsMappings);
1803  1 assertEquals(1, sr.getResults().size());
1804  1 m = sr.getResults().get(0);
1805  1 assertEquals(cds.getSequenceAt(1).getDatasetSequence(),
1806    m.getSequence());
1807  1 assertEquals(1, m.getStart());
1808  1 assertEquals(3, m.getEnd());
1809  1 sr = MappingUtils.buildSearchResults(pep2, 2, pep2CdsMappings);
1810  1 m = sr.getResults().get(0);
1811  1 assertEquals(4, m.getStart());
1812  1 assertEquals(6, m.getEnd());
1813  1 sr = MappingUtils.buildSearchResults(pep2, 3, pep2CdsMappings);
1814  1 m = sr.getResults().get(0);
1815  1 assertEquals(7, m.getStart());
1816  1 assertEquals(9, m.getEnd());
1817    }
1818   
1819    /**
1820    * Test the method that realigns protein to match mapped codon alignment.
1821    */
 
1822  1 toggle @Test(groups = { "Functional" })
1823    public void testAlignProteinAsDna_incompleteStartCodon()
1824    {
1825    // seq1: incomplete start codon (not mapped), then [3, 11]
1826  1 SequenceI dna1 = new Sequence("Seq1", "ccAAA-TTT-GGG-");
1827    // seq2 codons are [4, 5], [8, 11]
1828  1 SequenceI dna2 = new Sequence("Seq2", "ccaAA-ttT-GGG-");
1829    // seq3 incomplete start codon at 'tt'
1830  1 SequenceI dna3 = new Sequence("Seq3", "ccaaa-ttt-GGG-");
1831  1 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 });
1832  1 dna.setDataset(null);
1833   
1834    // prot1 has 'X' for incomplete start codon (not mapped)
1835  1 SequenceI prot1 = new Sequence("Seq1", "XKFG"); // X for incomplete start
1836  1 SequenceI prot2 = new Sequence("Seq2", "NG");
1837  1 SequenceI prot3 = new Sequence("Seq3", "XG"); // X for incomplete start
1838  1 AlignmentI protein = new Alignment(
1839    new SequenceI[]
1840    { prot1, prot2, prot3 });
1841  1 protein.setDataset(null);
1842   
1843    // map dna1 [3, 11] to prot1 [2, 4] KFG
1844  1 MapList map = new MapList(new int[] { 3, 11 }, new int[] { 2, 4 }, 3,
1845    1);
1846  1 AlignedCodonFrame acf = new AlignedCodonFrame();
1847  1 acf.addMap(dna1.getDatasetSequence(), prot1.getDatasetSequence(), map);
1848   
1849    // map dna2 [4, 5] [8, 11] to prot2 [1, 2] NG
1850  1 map = new MapList(new int[] { 4, 5, 8, 11 }, new int[] { 1, 2 }, 3, 1);
1851  1 acf.addMap(dna2.getDatasetSequence(), prot2.getDatasetSequence(), map);
1852   
1853    // map dna3 [9, 11] to prot3 [2, 2] G
1854  1 map = new MapList(new int[] { 9, 11 }, new int[] { 2, 2 }, 3, 1);
1855  1 acf.addMap(dna3.getDatasetSequence(), prot3.getDatasetSequence(), map);
1856   
1857  1 ArrayList<AlignedCodonFrame> acfs = new ArrayList<>();
1858  1 acfs.add(acf);
1859  1 protein.setCodonFrames(acfs);
1860   
1861    /*
1862    * verify X is included in the aligned proteins, and placed just
1863    * before the first mapped residue
1864    * CCT is between CCC and TTT
1865    */
1866  1 AlignmentUtils.alignProteinAsDna(protein, dna);
1867  1 assertEquals("XK-FG", prot1.getSequenceAsString());
1868  1 assertEquals("--N-G", prot2.getSequenceAsString());
1869  1 assertEquals("---XG", prot3.getSequenceAsString());
1870    }
1871   
1872    /**
1873    * Tests for the method that maps the subset of a dna sequence that has CDS
1874    * (or subtype) feature - case where the start codon is incomplete.
1875    */
 
1876  1 toggle @Test(groups = "Functional")
1877    public void testFindCdsPositions_fivePrimeIncomplete()
1878    {
1879  1 SequenceI dnaSeq = new Sequence("dna", "aaagGGCCCaaaTTTttt");
1880  1 dnaSeq.createDatasetSequence();
1881  1 SequenceI ds = dnaSeq.getDatasetSequence();
1882   
1883    // CDS for dna 5-6 (incomplete codon), 7-9
1884  1 SequenceFeature sf = new SequenceFeature("CDS", "", 5, 9, 0f, null);
1885  1 sf.setPhase("2"); // skip 2 bases to start of next codon
1886  1 ds.addSequenceFeature(sf);
1887    // CDS for dna 13-15
1888  1 sf = new SequenceFeature("CDS_predicted", "", 13, 15, 0f, null);
1889  1 ds.addSequenceFeature(sf);
1890   
1891  1 List<int[]> ranges = AlignmentUtils.findCdsPositions(dnaSeq);
1892   
1893    /*
1894    * check the mapping starts with the first complete codon
1895    */
1896  1 assertEquals(6, MappingUtils.getLength(ranges));
1897  1 assertEquals(2, ranges.size());
1898  1 assertEquals(7, ranges.get(0)[0]);
1899  1 assertEquals(9, ranges.get(0)[1]);
1900  1 assertEquals(13, ranges.get(1)[0]);
1901  1 assertEquals(15, ranges.get(1)[1]);
1902    }
1903   
1904    /**
1905    * Tests for the method that maps the subset of a dna sequence that has CDS
1906    * (or subtype) feature.
1907    */
 
1908  1 toggle @Test(groups = "Functional")
1909    public void testFindCdsPositions()
1910    {
1911  1 SequenceI dnaSeq = new Sequence("dna", "aaaGGGcccAAATTTttt");
1912  1 dnaSeq.createDatasetSequence();
1913  1 SequenceI ds = dnaSeq.getDatasetSequence();
1914   
1915    // CDS for dna 10-12
1916  1 SequenceFeature sf = new SequenceFeature("CDS_predicted", "", 10, 12,
1917    0f, null);
1918  1 sf.setStrand("+");
1919  1 ds.addSequenceFeature(sf);
1920    // CDS for dna 4-6
1921  1 sf = new SequenceFeature("CDS", "", 4, 6, 0f, null);
1922  1 sf.setStrand("+");
1923  1 ds.addSequenceFeature(sf);
1924    // exon feature should be ignored here
1925  1 sf = new SequenceFeature("exon", "", 7, 9, 0f, null);
1926  1 ds.addSequenceFeature(sf);
1927   
1928  1 List<int[]> ranges = AlignmentUtils.findCdsPositions(dnaSeq);
1929    /*
1930    * verify ranges { [4-6], [12-10] }
1931    * note CDS ranges are ordered ascending even if the CDS
1932    * features are not
1933    */
1934  1 assertEquals(6, MappingUtils.getLength(ranges));
1935  1 assertEquals(2, ranges.size());
1936  1 assertEquals(4, ranges.get(0)[0]);
1937  1 assertEquals(6, ranges.get(0)[1]);
1938  1 assertEquals(10, ranges.get(1)[0]);
1939  1 assertEquals(12, ranges.get(1)[1]);
1940    }
1941   
1942    /**
1943    * Tests for the method that maps the subset of a dna sequence that has CDS
1944    * (or subtype) feature, with CDS strand = '-' (reverse)
1945    */
1946    // test turned off as currently findCdsPositions is not strand-dependent
1947    // left in case it comes around again...
 
1948  0 toggle @Test(groups = "Functional", enabled = false)
1949    public void testFindCdsPositions_reverseStrand()
1950    {
1951  0 SequenceI dnaSeq = new Sequence("dna", "aaaGGGcccAAATTTttt");
1952  0 dnaSeq.createDatasetSequence();
1953  0 SequenceI ds = dnaSeq.getDatasetSequence();
1954   
1955    // CDS for dna 4-6
1956  0 SequenceFeature sf = new SequenceFeature("CDS", "", 4, 6, 0f, null);
1957  0 sf.setStrand("-");
1958  0 ds.addSequenceFeature(sf);
1959    // exon feature should be ignored here
1960  0 sf = new SequenceFeature("exon", "", 7, 9, 0f, null);
1961  0 ds.addSequenceFeature(sf);
1962    // CDS for dna 10-12
1963  0 sf = new SequenceFeature("CDS_predicted", "", 10, 12, 0f, null);
1964  0 sf.setStrand("-");
1965  0 ds.addSequenceFeature(sf);
1966   
1967  0 List<int[]> ranges = AlignmentUtils.findCdsPositions(dnaSeq);
1968    /*
1969    * verify ranges { [12-10], [6-4] }
1970    */
1971  0 assertEquals(6, MappingUtils.getLength(ranges));
1972  0 assertEquals(2, ranges.size());
1973  0 assertEquals(12, ranges.get(0)[0]);
1974  0 assertEquals(10, ranges.get(0)[1]);
1975  0 assertEquals(6, ranges.get(1)[0]);
1976  0 assertEquals(4, ranges.get(1)[1]);
1977    }
1978   
1979    /**
1980    * Tests for the method that maps the subset of a dna sequence that has CDS
1981    * (or subtype) feature - reverse strand case where the start codon is
1982    * incomplete.
1983    */
 
1984  0 toggle @Test(groups = "Functional", enabled = false)
1985    // test turned off as currently findCdsPositions is not strand-dependent
1986    // left in case it comes around again...
1987    public void testFindCdsPositions_reverseStrandThreePrimeIncomplete()
1988    {
1989  0 SequenceI dnaSeq = new Sequence("dna", "aaagGGCCCaaaTTTttt");
1990  0 dnaSeq.createDatasetSequence();
1991  0 SequenceI ds = dnaSeq.getDatasetSequence();
1992   
1993    // CDS for dna 5-9
1994  0 SequenceFeature sf = new SequenceFeature("CDS", "", 5, 9, 0f, null);
1995  0 sf.setStrand("-");
1996  0 ds.addSequenceFeature(sf);
1997    // CDS for dna 13-15
1998  0 sf = new SequenceFeature("CDS_predicted", "", 13, 15, 0f, null);
1999  0 sf.setStrand("-");
2000  0 sf.setPhase("2"); // skip 2 bases to start of next codon
2001  0 ds.addSequenceFeature(sf);
2002   
2003  0 List<int[]> ranges = AlignmentUtils.findCdsPositions(dnaSeq);
2004   
2005    /*
2006    * check the mapping starts with the first complete codon
2007    * expect ranges [13, 13], [9, 5]
2008    */
2009  0 assertEquals(6, MappingUtils.getLength(ranges));
2010  0 assertEquals(2, ranges.size());
2011  0 assertEquals(13, ranges.get(0)[0]);
2012  0 assertEquals(13, ranges.get(0)[1]);
2013  0 assertEquals(9, ranges.get(1)[0]);
2014  0 assertEquals(5, ranges.get(1)[1]);
2015    }
2016   
 
2017  1 toggle @Test(groups = "Functional")
2018    public void testAlignAs_alternateTranscriptsUngapped()
2019    {
2020  1 SequenceI dna1 = new Sequence("dna1", "cccGGGTTTaaa");
2021  1 SequenceI dna2 = new Sequence("dna2", "CCCgggtttAAA");
2022  1 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 });
2023  1 ((Alignment) dna).createDatasetAlignment();
2024  1 SequenceI cds1 = new Sequence("cds1", "GGGTTT");
2025  1 SequenceI cds2 = new Sequence("cds2", "CCCAAA");
2026  1 AlignmentI cds = new Alignment(new SequenceI[] { cds1, cds2 });
2027  1 ((Alignment) cds).createDatasetAlignment();
2028   
2029  1 AlignedCodonFrame acf = new AlignedCodonFrame();
2030  1 MapList map = new MapList(new int[] { 4, 9 }, new int[] { 1, 6 }, 1, 1);
2031  1 acf.addMap(dna1.getDatasetSequence(), cds1.getDatasetSequence(), map);
2032  1 map = new MapList(new int[] { 1, 3, 10, 12 }, new int[] { 1, 6 }, 1, 1);
2033  1 acf.addMap(dna2.getDatasetSequence(), cds2.getDatasetSequence(), map);
2034   
2035    /*
2036    * verify CDS alignment is as:
2037    * cccGGGTTTaaa (cdna)
2038    * CCCgggtttAAA (cdna)
2039    *
2040    * ---GGGTTT--- (cds)
2041    * CCC------AAA (cds)
2042    */
2043  1 dna.addCodonFrame(acf);
2044  1 AlignmentUtils.alignAs(cds, dna);
2045  1 assertEquals("---GGGTTT", cds.getSequenceAt(0).getSequenceAsString());
2046  1 assertEquals("CCC------AAA",
2047    cds.getSequenceAt(1).getSequenceAsString());
2048    }
2049   
 
2050  1 toggle @Test(groups = { "Functional" })
2051    public void testAddMappedPositions()
2052    {
2053  1 SequenceI from = new Sequence("dna", "ggAA-ATcc-TT-g");
2054  1 SequenceI seq1 = new Sequence("cds", "AAATTT");
2055  1 from.createDatasetSequence();
2056  1 seq1.createDatasetSequence();
2057  1 Mapping mapping = new Mapping(seq1,
2058    new MapList(new int[]
2059    { 3, 6, 9, 10 }, new int[] { 1, 6 }, 1, 1));
2060  1 Map<Integer, Map<SequenceI, Character>> map = new TreeMap<>();
2061  1 AlignmentUtils.addMappedPositions(seq1, from, mapping, map);
2062   
2063    /*
2064    * verify map has seq1 residues in columns 3,4,6,7,11,12
2065    */
2066  1 assertEquals(6, map.size());
2067  1 assertEquals('A', map.get(3).get(seq1).charValue());
2068  1 assertEquals('A', map.get(4).get(seq1).charValue());
2069  1 assertEquals('A', map.get(6).get(seq1).charValue());
2070  1 assertEquals('T', map.get(7).get(seq1).charValue());
2071  1 assertEquals('T', map.get(11).get(seq1).charValue());
2072  1 assertEquals('T', map.get(12).get(seq1).charValue());
2073   
2074    /*
2075    *
2076    */
2077    }
2078   
2079    /**
2080    * Test case where the mapping 'from' range includes a stop codon which is
2081    * absent in the 'to' range
2082    */
 
2083  1 toggle @Test(groups = { "Functional" })
2084    public void testAddMappedPositions_withStopCodon()
2085    {
2086  1 SequenceI from = new Sequence("dna", "ggAA-ATcc-TT-g");
2087  1 SequenceI seq1 = new Sequence("cds", "AAATTT");
2088  1 from.createDatasetSequence();
2089  1 seq1.createDatasetSequence();
2090  1 Mapping mapping = new Mapping(seq1,
2091    new MapList(new int[]
2092    { 3, 6, 9, 10 }, new int[] { 1, 6 }, 1, 1));
2093  1 Map<Integer, Map<SequenceI, Character>> map = new TreeMap<>();
2094  1 AlignmentUtils.addMappedPositions(seq1, from, mapping, map);
2095   
2096    /*
2097    * verify map has seq1 residues in columns 3,4,6,7,11,12
2098    */
2099  1 assertEquals(6, map.size());
2100  1 assertEquals('A', map.get(3).get(seq1).charValue());
2101  1 assertEquals('A', map.get(4).get(seq1).charValue());
2102  1 assertEquals('A', map.get(6).get(seq1).charValue());
2103  1 assertEquals('T', map.get(7).get(seq1).charValue());
2104  1 assertEquals('T', map.get(11).get(seq1).charValue());
2105  1 assertEquals('T', map.get(12).get(seq1).charValue());
2106    }
2107   
2108    /**
2109    * Test for the case where the products for which we want CDS are specified.
2110    * This is to represent the case where EMBL has CDS mappings to both Uniprot
2111    * and EMBLCDSPROTEIN. makeCdsAlignment() should only return the mappings for
2112    * the protein sequences specified.
2113    */
 
2114  1 toggle @Test(groups = { "Functional" })
2115    public void testMakeCdsAlignment_filterProducts()
2116    {
2117  1 SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
2118  1 SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC");
2119  1 SequenceI pep1 = new Sequence("Uniprot|pep1", "GF");
2120  1 SequenceI pep2 = new Sequence("Uniprot|pep2", "GFP");
2121  1 SequenceI pep3 = new Sequence("EMBL|pep3", "GF");
2122  1 SequenceI pep4 = new Sequence("EMBL|pep4", "GFP");
2123  1 dna1.createDatasetSequence();
2124  1 dna2.createDatasetSequence();
2125  1 pep1.createDatasetSequence();
2126  1 pep2.createDatasetSequence();
2127  1 pep3.createDatasetSequence();
2128  1 pep4.createDatasetSequence();
2129  1 AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 });
2130  1 dna.setDataset(null);
2131  1 AlignmentI emblPeptides = new Alignment(new SequenceI[] { pep3, pep4 });
2132  1 emblPeptides.setDataset(null);
2133   
2134  1 AlignedCodonFrame acf = new AlignedCodonFrame();
2135  1 MapList map = new MapList(new int[] { 4, 6, 10, 12 },
2136    new int[]
2137    { 1, 2 }, 3, 1);
2138  1 acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
2139  1 acf.addMap(dna1.getDatasetSequence(), pep3.getDatasetSequence(), map);
2140  1 dna.addCodonFrame(acf);
2141   
2142  1 acf = new AlignedCodonFrame();
2143  1 map = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, new int[] { 1, 3 },
2144    3, 1);
2145  1 acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
2146  1 acf.addMap(dna2.getDatasetSequence(), pep4.getDatasetSequence(), map);
2147  1 dna.addCodonFrame(acf);
2148   
2149    /*
2150    * execute method under test to find CDS for EMBL peptides only
2151    */
2152  1 AlignmentI cds = AlignmentUtils
2153    .makeCdsAlignment(new SequenceI[]
2154    { dna1, dna2 }, dna.getDataset(),
2155    emblPeptides.getSequencesArray());
2156   
2157  1 assertEquals(2, cds.getSequences().size());
2158  1 assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString());
2159  1 assertEquals("GGGTTTCCC", cds.getSequenceAt(1).getSequenceAsString());
2160   
2161    /*
2162    * verify shared, extended alignment dataset
2163    */
2164  1 assertSame(dna.getDataset(), cds.getDataset());
2165  1 assertTrue(dna.getDataset().getSequences()
2166    .contains(cds.getSequenceAt(0).getDatasetSequence()));
2167  1 assertTrue(dna.getDataset().getSequences()
2168    .contains(cds.getSequenceAt(1).getDatasetSequence()));
2169   
2170    /*
2171    * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide
2172    * the mappings are on the shared alignment dataset
2173    */
2174  1 List<AlignedCodonFrame> cdsMappings = cds.getDataset().getCodonFrames();
2175    /*
2176    * 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep)
2177    */
2178  1 assertEquals(6, cdsMappings.size());
2179   
2180    /*
2181    * verify that mapping sets for dna and cds alignments are different
2182    * [not current behaviour - all mappings are on the alignment dataset]
2183    */
2184    // select -> subselect type to test.
2185    // Assert.assertNotSame(dna.getCodonFrames(), cds.getCodonFrames());
2186    // assertEquals(4, dna.getCodonFrames().size());
2187    // assertEquals(4, cds.getCodonFrames().size());
2188   
2189    /*
2190    * Two mappings involve pep3 (dna to pep3, cds to pep3)
2191    * Mapping from pep3 to GGGTTT in first new exon sequence
2192    */
2193  1 List<AlignedCodonFrame> pep3Mappings = MappingUtils
2194    .findMappingsForSequence(pep3, cdsMappings);
2195  1 assertEquals(2, pep3Mappings.size());
2196  1 List<AlignedCodonFrame> mappings = MappingUtils
2197    .findMappingsForSequence(cds.getSequenceAt(0), pep3Mappings);
2198  1 assertEquals(1, mappings.size());
2199   
2200    // map G to GGG
2201  1 SearchResultsI sr = MappingUtils.buildSearchResults(pep3, 1, mappings);
2202  1 assertEquals(1, sr.getResults().size());
2203  1 SearchResultMatchI m = sr.getResults().get(0);
2204  1 assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence());
2205  1 assertEquals(1, m.getStart());
2206  1 assertEquals(3, m.getEnd());
2207    // map F to TTT
2208  1 sr = MappingUtils.buildSearchResults(pep3, 2, mappings);
2209  1 m = sr.getResults().get(0);
2210  1 assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence());
2211  1 assertEquals(4, m.getStart());
2212  1 assertEquals(6, m.getEnd());
2213   
2214    /*
2215    * Two mappings involve pep4 (dna to pep4, cds to pep4)
2216    * Verify mapping from pep4 to GGGTTTCCC in second new exon sequence
2217    */
2218  1 List<AlignedCodonFrame> pep4Mappings = MappingUtils
2219    .findMappingsForSequence(pep4, cdsMappings);
2220  1 assertEquals(2, pep4Mappings.size());
2221  1 mappings = MappingUtils.findMappingsForSequence(cds.getSequenceAt(1),
2222    pep4Mappings);
2223  1 assertEquals(1, mappings.size());
2224    // map G to GGG
2225  1 sr = MappingUtils.buildSearchResults(pep4, 1, mappings);
2226  1 assertEquals(1, sr.getResults().size());
2227  1 m = sr.getResults().get(0);
2228  1 assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
2229  1 assertEquals(1, m.getStart());
2230  1 assertEquals(3, m.getEnd());
2231    // map F to TTT
2232  1 sr = MappingUtils.buildSearchResults(pep4, 2, mappings);
2233  1 m = sr.getResults().get(0);
2234  1 assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
2235  1 assertEquals(4, m.getStart());
2236  1 assertEquals(6, m.getEnd());
2237    // map P to CCC
2238  1 sr = MappingUtils.buildSearchResults(pep4, 3, mappings);
2239  1 m = sr.getResults().get(0);
2240  1 assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
2241  1 assertEquals(7, m.getStart());
2242  1 assertEquals(9, m.getEnd());
2243    }
2244   
2245    /**
2246    * Test the method that just copies aligned sequences, provided all sequences
2247    * to be aligned share the aligned sequence's dataset
2248    */
 
2249  1 toggle @Test(groups = "Functional")
2250    public void testAlignAsSameSequences()
2251    {
2252  1 SequenceI dna1 = new Sequence("dna1", "cccGGGTTTaaa");
2253  1 SequenceI dna2 = new Sequence("dna2", "CCCgggtttAAA");
2254  1 AlignmentI al1 = new Alignment(new SequenceI[] { dna1, dna2 });
2255  1 ((Alignment) al1).createDatasetAlignment();
2256   
2257  1 SequenceI dna3 = new Sequence(dna1);
2258  1 SequenceI dna4 = new Sequence(dna2);
2259  1 assertSame(dna3.getDatasetSequence(), dna1.getDatasetSequence());
2260  1 assertSame(dna4.getDatasetSequence(), dna2.getDatasetSequence());
2261  1 String seq1 = "-cc-GG-GT-TT--aaa";
2262  1 dna3.setSequence(seq1);
2263  1 String seq2 = "C--C-Cgg--gtt-tAA-A-";
2264  1 dna4.setSequence(seq2);
2265  1 AlignmentI al2 = new Alignment(new SequenceI[] { dna3, dna4 });
2266  1 ((Alignment) al2).createDatasetAlignment();
2267   
2268    /*
2269    * alignment removes gapped columns (two internal, two trailing)
2270    */
2271  1 assertTrue(AlignmentUtils.alignAsSameSequences(al1, al2));
2272  1 String aligned1 = "-cc-GG-GTTT-aaa";
2273  1 assertEquals(aligned1, al1.getSequenceAt(0).getSequenceAsString());
2274  1 String aligned2 = "C--C-Cgg-gtttAAA";
2275  1 assertEquals(aligned2, al1.getSequenceAt(1).getSequenceAsString());
2276   
2277    /*
2278    * add another sequence to 'aligned' - should still succeed, since
2279    * unaligned sequences still share a dataset with aligned sequences
2280    */
2281  1 SequenceI dna5 = new Sequence("dna5", "CCCgggtttAAA");
2282  1 dna5.createDatasetSequence();
2283  1 al2.addSequence(dna5);
2284  1 assertTrue(AlignmentUtils.alignAsSameSequences(al1, al2));
2285  1 assertEquals(aligned1, al1.getSequenceAt(0).getSequenceAsString());
2286  1 assertEquals(aligned2, al1.getSequenceAt(1).getSequenceAsString());
2287   
2288    /*
2289    * add another sequence to 'unaligned' - should fail, since now not
2290    * all unaligned sequences share a dataset with aligned sequences
2291    */
2292  1 SequenceI dna6 = new Sequence("dna6", "CCCgggtttAAA");
2293  1 dna6.createDatasetSequence();
2294  1 al1.addSequence(dna6);
2295    // JAL-2110 JBP Comment: what's the use case for this behaviour ?
2296  1 assertFalse(AlignmentUtils.alignAsSameSequences(al1, al2));
2297    }
2298   
 
2299  1 toggle @Test(groups = "Functional")
2300    public void testAlignAsSameSequencesMultipleSubSeq()
2301    {
2302  1 SequenceI dna1 = new Sequence("dna1", "cccGGGTTTaaa");
2303  1 SequenceI dna2 = new Sequence("dna2", "CCCgggtttAAA");
2304  1 SequenceI as1 = dna1.deriveSequence(); // cccGGGTTTaaa/1-12
2305  1 SequenceI as2 = dna1.deriveSequence().getSubSequence(3, 7); // GGGT/4-7
2306  1 SequenceI as3 = dna2.deriveSequence(); // CCCgggtttAAA/1-12
2307  1 as1.insertCharAt(6, 5, '-');
2308  1 assertEquals("cccGGG-----TTTaaa", as1.getSequenceAsString());
2309  1 as2.insertCharAt(6, 5, '-');
2310  1 assertEquals("GGGT-----", as2.getSequenceAsString());
2311  1 as3.insertCharAt(3, 5, '-');
2312  1 assertEquals("CCC-----gggtttAAA", as3.getSequenceAsString());
2313  1 AlignmentI aligned = new Alignment(new SequenceI[] { as1, as2, as3 });
2314   
2315    // why do we need to cast this still ?
2316  1 ((Alignment) aligned).createDatasetAlignment();
2317  1 SequenceI uas1 = dna1.deriveSequence();
2318  1 SequenceI uas2 = dna1.deriveSequence().getSubSequence(3, 7);
2319  1 SequenceI uas3 = dna2.deriveSequence();
2320  1 AlignmentI tobealigned = new Alignment(
2321    new SequenceI[]
2322    { uas1, uas2, uas3 });
2323  1 ((Alignment) tobealigned).createDatasetAlignment();
2324   
2325    /*
2326    * alignAs lines up dataset sequences and removes empty columns (two)
2327    */
2328  1 assertTrue(AlignmentUtils.alignAsSameSequences(tobealigned, aligned));
2329  1 assertEquals("cccGGG---TTTaaa", uas1.getSequenceAsString());
2330  1 assertEquals("GGGT", uas2.getSequenceAsString());
2331  1 assertEquals("CCC---gggtttAAA", uas3.getSequenceAsString());
2332    }
2333   
 
2334  1 toggle @Test(groups = { "Functional" })
2335    public void testTransferGeneLoci()
2336    {
2337  1 SequenceI from = new Sequence("transcript",
2338    "aaacccgggTTTAAACCCGGGtttaaacccgggttt");
2339  1 SequenceI to = new Sequence("CDS", "TTTAAACCCGGG");
2340  1 MapList map = new MapList(new int[] { 1, 12 }, new int[] { 10, 21 }, 1,
2341    1);
2342   
2343    /*
2344    * first with nothing to transfer
2345    */
2346  1 AlignmentUtils.transferGeneLoci(from, map, to);
2347  1 assertNull(to.getGeneLoci());
2348   
2349    /*
2350    * next with gene loci set on 'from' sequence
2351    */
2352  1 int[] exons = new int[] { 100, 105, 155, 164, 210, 229 };
2353  1 MapList geneMap = new MapList(new int[] { 1, 36 }, exons, 1, 1);
2354  1 from.setGeneLoci("human", "GRCh38", "7", geneMap);
2355  1 AlignmentUtils.transferGeneLoci(from, map, to);
2356   
2357  1 GeneLociI toLoci = to.getGeneLoci();
2358  1 assertNotNull(toLoci);
2359    // DBRefEntry constructor upper-cases 'source'
2360  1 assertEquals("HUMAN", toLoci.getSpeciesId());
2361  1 assertEquals("GRCh38", toLoci.getAssemblyId());
2362  1 assertEquals("7", toLoci.getChromosomeId());
2363   
2364    /*
2365    * transcript 'exons' are 1-6, 7-16, 17-36
2366    * CDS 1:12 is transcript 10-21
2367    * transcript 'CDS' is 10-16, 17-21
2368    * which is 'gene' 158-164, 210-214
2369    */
2370  1 MapList toMap = toLoci.getMapping();
2371  1 assertEquals(1, toMap.getFromRanges().size());
2372  1 assertEquals(2, toMap.getFromRanges().get(0).length);
2373  1 assertEquals(1, toMap.getFromRanges().get(0)[0]);
2374  1 assertEquals(12, toMap.getFromRanges().get(0)[1]);
2375  1 assertEquals(2, toMap.getToRanges().size());
2376  1 assertEquals(2, toMap.getToRanges().get(0).length);
2377  1 assertEquals(158, toMap.getToRanges().get(0)[0]);
2378  1 assertEquals(164, toMap.getToRanges().get(0)[1]);
2379  1 assertEquals(210, toMap.getToRanges().get(1)[0]);
2380  1 assertEquals(214, toMap.getToRanges().get(1)[1]);
2381    // or summarised as (but toString might change in future):
2382  1 assertEquals("[ [1, 12] ] 1:1 to [ [158, 164] [210, 214] ]",
2383    toMap.toString());
2384   
2385    /*
2386    * an existing value is not overridden
2387    */
2388  1 geneMap = new MapList(new int[] { 1, 36 }, new int[] { 36, 1 }, 1, 1);
2389  1 from.setGeneLoci("inhuman", "GRCh37", "6", geneMap);
2390  1 AlignmentUtils.transferGeneLoci(from, map, to);
2391  1 assertEquals("GRCh38", toLoci.getAssemblyId());
2392  1 assertEquals("7", toLoci.getChromosomeId());
2393  1 toMap = toLoci.getMapping();
2394  1 assertEquals("[ [1, 12] ] 1:1 to [ [158, 164] [210, 214] ]",
2395    toMap.toString());
2396    }
2397   
2398    /**
2399    * Tests for the method that maps nucleotide to protein based on CDS features
2400    */
 
2401  1 toggle @Test(groups = "Functional")
2402    public void testMapCdsToProtein()
2403    {
2404  1 SequenceI peptide = new Sequence("pep", "KLQ");
2405   
2406    /*
2407    * Case 1: CDS 3 times length of peptide
2408    * NB method only checks lengths match, not translation
2409    */
2410  1 SequenceI dna = new Sequence("dna", "AACGacgtCTCCT");
2411  1 dna.createDatasetSequence();
2412  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
2413  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 13, null));
2414  1 MapList ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
2415  1 assertEquals(3, ml.getFromRatio());
2416  1 assertEquals(1, ml.getToRatio());
2417  1 assertEquals("[[1, 3]]",
2418    Arrays.deepToString(ml.getToRanges().toArray()));
2419  1 assertEquals("[[1, 4], [9, 13]]",
2420    Arrays.deepToString(ml.getFromRanges().toArray()));
2421   
2422    /*
2423    * Case 2: CDS 3 times length of peptide + stop codon
2424    * (note code does not currently check trailing codon is a stop codon)
2425    */
2426  1 dna = new Sequence("dna", "AACGacgtCTCCTCCC");
2427  1 dna.createDatasetSequence();
2428  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
2429  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 16, null));
2430  1 ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
2431  1 assertEquals(3, ml.getFromRatio());
2432  1 assertEquals(1, ml.getToRatio());
2433  1 assertEquals("[[1, 3]]",
2434    Arrays.deepToString(ml.getToRanges().toArray()));
2435  1 assertEquals("[[1, 4], [9, 13]]",
2436    Arrays.deepToString(ml.getFromRanges().toArray()));
2437   
2438    /*
2439    * Case 3: CDS longer than 3 * peptide + stop codon - no mapping is made
2440    */
2441  1 dna = new Sequence("dna", "AACGacgtCTCCTTGATCA");
2442  1 dna.createDatasetSequence();
2443  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
2444  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 19, null));
2445  1 ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
2446  1 assertNull(ml);
2447   
2448    /*
2449    * Case 4: CDS shorter than 3 * peptide - no mapping is made
2450    */
2451  1 dna = new Sequence("dna", "AACGacgtCTCC");
2452  1 dna.createDatasetSequence();
2453  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
2454  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 12, null));
2455  1 ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
2456  1 assertNull(ml);
2457   
2458    /*
2459    * Case 5: CDS 3 times length of peptide + part codon - mapping is truncated
2460    */
2461  1 dna = new Sequence("dna", "AACGacgtCTCCTTG");
2462  1 dna.createDatasetSequence();
2463  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
2464  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 15, null));
2465  1 ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
2466  1 assertEquals(3, ml.getFromRatio());
2467  1 assertEquals(1, ml.getToRatio());
2468  1 assertEquals("[[1, 3]]",
2469    Arrays.deepToString(ml.getToRanges().toArray()));
2470  1 assertEquals("[[1, 4], [9, 13]]",
2471    Arrays.deepToString(ml.getFromRanges().toArray()));
2472   
2473    /*
2474    * Case 6: incomplete start codon corresponding to X in peptide
2475    */
2476  1 dna = new Sequence("dna", "ACGacgtCTCCTTGG");
2477  1 dna.createDatasetSequence();
2478  1 SequenceFeature sf = new SequenceFeature("CDS", "", 1, 3, null);
2479  1 sf.setPhase("2"); // skip 2 positions (AC) to start of next codon (GCT)
2480  1 dna.addSequenceFeature(sf);
2481  1 dna.addSequenceFeature(new SequenceFeature("CDS", "", 8, 15, null));
2482  1 peptide = new Sequence("pep", "XLQ");
2483  1 ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
2484  1 assertEquals("[[2, 3]]",
2485    Arrays.deepToString(ml.getToRanges().toArray()));
2486  1 assertEquals("[[3, 3], [8, 12]]",
2487    Arrays.deepToString(ml.getFromRanges().toArray()));
2488    }
2489   
2490    /**
2491    * Tests for the method that locates the CDS sequence that has a mapping to
2492    * the given protein. That is, given a transcript-to-peptide mapping, find the
2493    * cds-to-peptide mapping that relates to both, and return the CDS sequence.
2494    */
 
2495  1 toggle @Test(groups = "Functional")
2496    public void testFindCdsForProtein()
2497    {
2498  1 List<AlignedCodonFrame> mappings = new ArrayList<>();
2499  1 AlignedCodonFrame acf1 = new AlignedCodonFrame();
2500  1 mappings.add(acf1);
2501   
2502  1 SequenceI dna1 = new Sequence("dna1", "cgatATcgGCTATCTATGacg");
2503  1 dna1.createDatasetSequence();
2504   
2505    // NB we currently exclude STOP codon from CDS sequences
2506    // the test would need to change if this changes in future
2507  1 SequenceI cds1 = new Sequence("cds1", "ATGCTATCT");
2508  1 cds1.createDatasetSequence();
2509   
2510  1 SequenceI pep1 = new Sequence("pep1", "MLS");
2511  1 pep1.createDatasetSequence();
2512  1 List<AlignedCodonFrame> seqMappings = new ArrayList<>();
2513  1 MapList mapList = new MapList(new int[] { 5, 6, 9, 15 },
2514    new int[]
2515    { 1, 3 }, 3, 1);
2516  1 Mapping dnaToPeptide = new Mapping(pep1.getDatasetSequence(), mapList);
2517   
2518    // add dna to peptide mapping
2519  1 seqMappings.add(acf1);
2520  1 acf1.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(),
2521    mapList);
2522   
2523    /*
2524    * first case - no dna-to-CDS mapping exists - search fails
2525    */
2526  1 SequenceI seq = AlignmentUtils.findCdsForProtein(mappings, dna1,
2527    seqMappings, dnaToPeptide);
2528  1 assertNull(seq);
2529   
2530    /*
2531    * second case - CDS-to-peptide mapping exists but no dna-to-CDS
2532    * - search fails
2533    */
2534    // todo this test fails if the mapping is added to acf1, not acf2
2535    // need to tidy up use of lists of mappings in AlignedCodonFrame
2536  1 AlignedCodonFrame acf2 = new AlignedCodonFrame();
2537  1 mappings.add(acf2);
2538  1 MapList cdsToPeptideMapping = new MapList(new int[] { 1, 9 },
2539    new int[]
2540    { 1, 3 }, 3, 1);
2541  1 acf2.addMap(cds1.getDatasetSequence(), pep1.getDatasetSequence(),
2542    cdsToPeptideMapping);
2543  1 assertNull(AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,
2544    dnaToPeptide));
2545   
2546    /*
2547    * third case - add dna-to-CDS mapping - CDS is now found!
2548    */
2549  1 MapList dnaToCdsMapping = new MapList(new int[] { 5, 6, 9, 15 },
2550    new int[]
2551    { 1, 9 }, 1, 1);
2552  1 acf1.addMap(dna1.getDatasetSequence(), cds1.getDatasetSequence(),
2553    dnaToCdsMapping);
2554  1 seq = AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,
2555    dnaToPeptide);
2556  1 assertSame(seq, cds1.getDatasetSequence());
2557    }
2558   
2559    /**
2560    * Tests for the method that locates the CDS sequence that has a mapping to
2561    * the given protein. That is, given a transcript-to-peptide mapping, find the
2562    * cds-to-peptide mapping that relates to both, and return the CDS sequence.
2563    * This test is for the case where transcript and CDS are the same length.
2564    */
 
2565  1 toggle @Test(groups = "Functional")
2566    public void testFindCdsForProtein_noUTR()
2567    {
2568  1 List<AlignedCodonFrame> mappings = new ArrayList<>();
2569  1 AlignedCodonFrame acf1 = new AlignedCodonFrame();
2570  1 mappings.add(acf1);
2571   
2572  1 SequenceI dna1 = new Sequence("dna1", "ATGCTATCTTAA");
2573  1 dna1.createDatasetSequence();
2574   
2575    // NB we currently exclude STOP codon from CDS sequences
2576    // the test would need to change if this changes in future
2577  1 SequenceI cds1 = new Sequence("cds1", "ATGCTATCT");
2578  1 cds1.createDatasetSequence();
2579   
2580  1 SequenceI pep1 = new Sequence("pep1", "MLS");
2581  1 pep1.createDatasetSequence();
2582  1 List<AlignedCodonFrame> seqMappings = new ArrayList<>();
2583  1 MapList mapList = new MapList(new int[] { 1, 9 }, new int[] { 1, 3 }, 3,
2584    1);
2585  1 Mapping dnaToPeptide = new Mapping(pep1.getDatasetSequence(), mapList);
2586   
2587    // add dna to peptide mapping
2588  1 seqMappings.add(acf1);
2589  1 acf1.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(),
2590    mapList);
2591   
2592    /*
2593    * first case - transcript lacks CDS features - it appears to be
2594    * the CDS sequence and is returned
2595    */
2596  1 SequenceI seq = AlignmentUtils.findCdsForProtein(mappings, dna1,
2597    seqMappings, dnaToPeptide);
2598  1 assertSame(seq, dna1.getDatasetSequence());
2599   
2600    /*
2601    * second case - transcript has CDS feature - this means it is
2602    * not returned as a match for CDS (CDS sequences don't have CDS features)
2603    */
2604  1 dna1.addSequenceFeature(
2605    new SequenceFeature(SequenceOntologyI.CDS, "cds", 1, 12, null));
2606  1 seq = AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,
2607    dnaToPeptide);
2608  1 assertNull(seq);
2609   
2610    /*
2611    * third case - CDS-to-peptide mapping exists but no dna-to-CDS
2612    * - search fails
2613    */
2614    // todo this test fails if the mapping is added to acf1, not acf2
2615    // need to tidy up use of lists of mappings in AlignedCodonFrame
2616  1 AlignedCodonFrame acf2 = new AlignedCodonFrame();
2617  1 mappings.add(acf2);
2618  1 MapList cdsToPeptideMapping = new MapList(new int[] { 1, 9 },
2619    new int[]
2620    { 1, 3 }, 3, 1);
2621  1 acf2.addMap(cds1.getDatasetSequence(), pep1.getDatasetSequence(),
2622    cdsToPeptideMapping);
2623  1 assertNull(AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,
2624    dnaToPeptide));
2625   
2626    /*
2627    * fourth case - add dna-to-CDS mapping - CDS is now found!
2628    */
2629  1 MapList dnaToCdsMapping = new MapList(new int[] { 1, 9 },
2630    new int[]
2631    { 1, 9 }, 1, 1);
2632  1 acf1.addMap(dna1.getDatasetSequence(), cds1.getDatasetSequence(),
2633    dnaToCdsMapping);
2634  1 seq = AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings,
2635    dnaToPeptide);
2636  1 assertSame(seq, cds1.getDatasetSequence());
2637    }
2638   
 
2639  1 toggle @Test(groups = "Functional")
2640    public void testAddReferenceAnnotations()
2641    {
2642  1 SequenceI longseq = new Sequence("longA", "ASDASDASDASDAASDASDASDASDA");
2643  1 Annotation[] aa = new Annotation[longseq.getLength()];
2644   
2645  27 for (int p = 0; p < aa.length; p++)
2646    {
2647  26 aa[p] = new Annotation("P", "pos " + (p + 1), (char) 0,
2648    (float) p + 1);
2649    }
2650  1 AlignmentAnnotation refAnnot = new AlignmentAnnotation("LongSeqAnnot",
2651    "Annotations", aa);
2652  1 refAnnot.setCalcId("Test");
2653  1 longseq.addAlignmentAnnotation(refAnnot);
2654  1 verifyExpectedSequenceAnnotation(refAnnot);
2655   
2656  1 Alignment ourAl = new Alignment(
2657    new SequenceI[]
2658    { longseq.getSubSequence(5, 10),
2659    longseq.getSubSequence(7, 12) });
2660  1 ourAl.createDatasetAlignment();
2661   
2662    // transfer annotation
2663  1 SortedMap<String, String> tipEntries = new TreeMap<>();
2664  1 Map<SequenceI, List<AlignmentAnnotation>> candidates = new LinkedHashMap<>();
2665   
2666  1 AlignmentUtils.findAddableReferenceAnnotations(ourAl.getSequences(),
2667    tipEntries, candidates, ourAl);
2668  1 AlignmentUtils.addReferenceAnnotations(candidates, ourAl, null);
2669   
2670  1 assertNotNull(ourAl.getAlignmentAnnotation());
2671  1 assertEquals(ourAl.getAlignmentAnnotation().length, 2);
2672   
2673  1 for (AlignmentAnnotation alan : ourAl.getAlignmentAnnotation())
2674    {
2675  2 verifyExpectedSequenceAnnotation(alan);
2676    }
2677    // Everything above works for 2.11.3 and 2.11.2.x.
2678    // now simulate copy/paste to new alignment
2679  1 SequenceI[] newSeqAl = new SequenceI[2];
2680    // copy sequences but no annotation
2681  1 newSeqAl[0] = new Sequence(ourAl.getSequenceAt(0),
2682    ourAl.getSequenceAt(0).getAnnotation());
2683  1 newSeqAl[1] = new Sequence(ourAl.getSequenceAt(1),
2684    ourAl.getSequenceAt(1).getAnnotation());
2685   
2686  1 Alignment newAl = new Alignment(newSeqAl);
2687    // delete annotation
2688  1 for (SequenceI sq : newAl.getSequences())
2689    {
2690  2 sq.setAlignmentAnnotation(new AlignmentAnnotation[0]);
2691    }
2692    // JAL-4182 scenario test
2693  1 SequenceGroup sg = new SequenceGroup(Arrays.asList(newSeqAl));
2694  1 sg.setStartRes(0);
2695  1 sg.setEndRes(newAl.getWidth());
2696  1 AlignmentUtils.addReferenceAnnotationTo(newAl, newSeqAl[0],
2697    newSeqAl[0].getDatasetSequence().getAnnotation()[0], sg);
2698  1 AlignmentUtils.addReferenceAnnotationTo(newAl, newSeqAl[1],
2699    newSeqAl[1].getDatasetSequence().getAnnotation()[0], sg);
2700  1 for (AlignmentAnnotation alan : newAl.getAlignmentAnnotation())
2701    {
2702  2 verifyExpectedSequenceAnnotation(alan);
2703    }
2704    }
2705   
2706    /**
2707    * helper - tests annotation is mapped to position it was originally created
2708    * for
2709    *
2710    * @param alan
2711    */
 
2712  5 toggle private void verifyExpectedSequenceAnnotation(AlignmentAnnotation alan)
2713    {
2714  51 for (int c = 0; c < alan.annotations.length; c++)
2715    {
2716  46 Annotation a = alan.annotations[c];
2717  46 if (a != null)
2718    {
2719  46 assertEquals("Misaligned annotation at " + c,
2720    (float) alan.sequenceRef.findPosition(c), a.value);
2721    }
2722    else
2723    {
2724  0 assertTrue("Unexpected Null at position " + c,
2725    c >= alan.sequenceRef.getLength()
2726    || Comparison.isGap(alan.sequenceRef.getCharAt(c)));
2727    }
2728    }
2729    }
2730   
 
2731  1 toggle @Test(groups = "Functional")
2732    public void testAddReferenceContactMap()
2733    {
2734  1 SequenceI sq = new Sequence("a", "SSSQ");
2735  1 ContactMatrixI cm = new SeqDistanceContactMatrix(4);
2736  1 AlignmentAnnotation cm_aan = sq.addContactList(cm);
2737  1 cm_aan.description = cm_aan.description + " cm1";
2738  1 SequenceI dssq = sq.createDatasetSequence();
2739   
2740    // remove annotation on our non-dataset sequence
2741  1 sq.removeAlignmentAnnotation(sq.getAnnotation()[0]);
2742    // test transfer
2743  1 Alignment al = new Alignment(new SequenceI[] { sq });
2744  1 SortedMap<String, String> tipEntries = new TreeMap<>();
2745  1 Map<SequenceI, List<AlignmentAnnotation>> candidates = new LinkedHashMap<>();
2746   
2747  1 AlignmentUtils.findAddableReferenceAnnotations(al.getSequences(),
2748    tipEntries, candidates, al);
2749  1 AlignmentUtils.addReferenceAnnotations(candidates, al, null);
2750  1 assertTrue("No contact map annotation transferred",
2751    al.getAlignmentAnnotation() != null
2752    && al.getAlignmentAnnotation().length == 1);
2753  1 AlignmentAnnotation alan = al.findAnnotations(sq, null, cm_aan.label)
2754    .iterator().next();
2755  1 ContactMatrixI t_cm = al.getContactMatrixFor(alan);
2756  1 assertNotNull("No contact map for the transferred annotation row.",
2757    t_cm);
2758  1 assertTrue(t_cm instanceof SeqDistanceContactMatrix);
2759  1 assertTrue(((SeqDistanceContactMatrix) t_cm).hasReferenceSeq());
2760   
2761  1 ContactListI cl = al.getContactListFor(alan, 1);
2762  1 assertNotNull(
2763    "No contact matrix recovered after reference annotation transfer",
2764    cl);
2765    // semantics of sequence associated contact list is slightly tricky - column
2766    // 3 in alignment should have data
2767  1 cl = al.getContactListFor(alan, 3);
2768  1 assertNotNull(
2769    "Contact matrix should have data for last position in sequence",
2770    cl);
2771   
2772  1 ContactMatrixI cm2 = new SeqDistanceContactMatrix(4);
2773  1 dssq.addContactList(cm2);
2774  1 tipEntries = new TreeMap<>();
2775  1 candidates = new LinkedHashMap<>();
2776   
2777  1 AlignmentUtils.findAddableReferenceAnnotations(al.getSequences(),
2778    tipEntries, candidates, al);
2779  1 AlignmentUtils.addReferenceAnnotations(candidates, al, null);
2780  1 assertTrue("Expected two contact map annotation transferred",
2781    al.getAlignmentAnnotation() != null
2782    && al.getAlignmentAnnotation().length == 2);
2783   
2784    }
2785   
 
2786  5 toggle @Test(
2787    groups = "Functional",
2788    dataProvider = "SecondaryStructureAnnotations")
2789    public void testSecondaryStructurePresentAndSources(
2790    AlignmentAnnotation[] annotations, boolean expectedSSPresent,
2791    ArrayList<String> expectedSSSources)
2792    {
2793  5 Assert.assertEquals(expectedSSPresent,
2794    AlignmentUtils.isSecondaryStructurePresent(annotations));
2795    }
2796   
 
2797  1 toggle @DataProvider(name = "SecondaryStructureAnnotations")
2798    public static Object[][] provideSecondaryStructureAnnotations()
2799    {
2800  1 AlignmentAnnotation ann1 = new AlignmentAnnotation(
2801    "Secondary Structure", "Secondary Structure",
2802    new Annotation[] {});
2803  1 AlignmentAnnotation ann2 = new AlignmentAnnotation("jnetpred",
2804    "jnetpred", new Annotation[] {});
2805  1 AlignmentAnnotation ann3 = new AlignmentAnnotation("Temp", "Temp",
2806    new Annotation[] {});
2807  1 AlignmentAnnotation ann4 = new AlignmentAnnotation("Temp", "Temp",
2808    new Annotation[] {});
2809   
2810  1 List<String> ssSources1 = new ArrayList<>(
2811    Arrays.asList("3D Structures"));
2812  1 List<String> ssSources2 = new ArrayList<>(Arrays.asList("JPred"));
2813  1 List<String> ssSources3 = new ArrayList<>(
2814    Arrays.asList("3D Structures", "JPred"));
2815  1 List<String> ssSources4 = new ArrayList<>();
2816   
2817  1 return new Object[][] {
2818    { new AlignmentAnnotation[]
2819    { ann1, ann3, ann4 }, true, ssSources1 },
2820    { new AlignmentAnnotation[]
2821    { ann2, ann3, ann4 }, true, ssSources2 },
2822    { new AlignmentAnnotation[]
2823    { ann3, ann4 }, false, ssSources4 },
2824    { new AlignmentAnnotation[] {}, false, ssSources4 },
2825    { new AlignmentAnnotation[]
2826    { ann1, ann2, ann3, ann4 }, true, ssSources3 } };
2827    }
2828   
 
2829  4 toggle @Test(dataProvider = "SecondaryStructureAnnotationColours", groups = { "Functional" })
2830    public void testSecondaryStructureAnnotationColour(char symbol,
2831    Color expectedColor)
2832    {
2833  4 Color actualColor = AlignmentUtils
2834    .getSecondaryStructureAnnotationColour(symbol);
2835  4 Assert.assertEquals(actualColor, expectedColor);
2836    }
2837   
 
2838  1 toggle @DataProvider(name = "SecondaryStructureAnnotationColours")
2839    public static Object[][] provideSecondaryStructureAnnotationColours()
2840    {
2841  1 return new Object[][] { { 'C', Color.gray }, { 'E', Color.green },
2842    { 'H', Color.red },
2843    { '-', Color.white } };
2844    }
2845   
 
2846  4 toggle @Test(dataProvider = "SSAnnotationPresence", groups = { "Functional" })
2847    public void testIsSSAnnotationPresent(
2848    Map<SequenceI, List<AlignmentAnnotation>> annotations,
2849    boolean expectedPresence)
2850    {
2851  4 boolean actualPresence = AlignmentUtils
2852    .isSSAnnotationPresent(annotations);
2853  4 Assert.assertEquals(actualPresence, expectedPresence);
2854    }
2855   
 
2856  1 toggle @DataProvider(name = "SSAnnotationPresence")
2857    public static Object[][] provideSSAnnotationPresence()
2858    {
2859  1 Map<SequenceI, List<AlignmentAnnotation>> annotations1 = new HashMap<>();
2860  1 SequenceI seq1 = new Sequence("Seq1", "ASD---ASD---ASD", 37, 45);
2861  1 List<AlignmentAnnotation> annotationsList1 = new ArrayList<>();
2862  1 annotationsList1.add(new AlignmentAnnotation("Secondary Structure",
2863    "Secondary Structure", new Annotation[] {}));
2864  1 annotations1.put(seq1, annotationsList1); // Annotation present secondary
2865    // structure for seq1
2866   
2867  1 Map<SequenceI, List<AlignmentAnnotation>> annotations2 = new HashMap<>();
2868  1 SequenceI seq2 = new Sequence("Seq2", "ASD---ASD------", 37, 42);
2869  1 List<AlignmentAnnotation> annotationsList2 = new ArrayList<>();
2870  1 annotationsList2.add(new AlignmentAnnotation("Other Annotation",
2871    "Other Annotation", new Annotation[] {}));
2872  1 annotations2.put(seq2, annotationsList2); // Annotation not related to any
2873    // of secondary structure for seq2
2874   
2875  1 Map<SequenceI, List<AlignmentAnnotation>> annotations3 = new HashMap<>();
2876    // Empty annotation map
2877   
2878  1 Map<SequenceI, List<AlignmentAnnotation>> annotations4 = new HashMap<>();
2879  1 SequenceI seq4 = new Sequence("Seq4", "ASD---ASD---AS-", 37, 44);
2880  1 List<AlignmentAnnotation> annotationsList4 = new ArrayList<>();
2881  1 annotationsList4.add(new AlignmentAnnotation("jnetpred", "jnetpred",
2882    new Annotation[] {}));
2883  1 annotations4.put(seq4, annotationsList4); // Annotation present from JPred
2884    // for seq4
2885   
2886  1 return new Object[][] { { annotations1, true }, // Annotations present
2887    // secondary structure
2888    // present
2889    { annotations2, false }, // No annotations related to any of the
2890    // secondary structure present
2891    { annotations3, false }, // Empty annotation map
2892    { annotations4, true }, // Annotations present from JPred secondary
2893    // structure present
2894    };
2895    }
2896   
2897   
 
2898  1 toggle @Test(groups = "Functional")
2899    public void testGetAlignmentAnnotationForSource()
2900    {
2901   
2902  1 SequenceI seq = new Sequence("testSeq", "ACDEFGHIKLMNPQRSTVWY");
2903   
2904  1 AlignmentAnnotation annot1 =
2905    new AlignmentAnnotation("Secondary Structure",
2906    "Secondary Structure for 4zhpA", new Annotation[] {}); //PDB
2907  1 annot1.setProperty(Constants.SS_PROVIDER_PROPERTY, "PDB");
2908  1 AlignmentAnnotation annot2 =
2909    new AlignmentAnnotation("Secondary Structure",
2910    "Secondary Structure for 5zhpA", new Annotation[] {}); //PDB
2911  1 annot2.setProperty(Constants.SS_PROVIDER_PROPERTY, "PDB");
2912  1 AlignmentAnnotation annot3 = new AlignmentAnnotation("Secondary Structure",
2913    "Secondary Structure for p09911_54-147__3a7wzn.1.p3502557454997462030P",
2914    new Annotation[] {}); //Swiss model
2915  1 annot3.setProperty(Constants.SS_PROVIDER_PROPERTY, "SWISS-MODEL");
2916  1 AlignmentAnnotation annot4 = new AlignmentAnnotation("Secondary Structure",
2917    "Secondary Structure for af-q43517-f1A", new Annotation[] {}); //Alphafold
2918  1 annot4.setProperty(Constants.SS_PROVIDER_PROPERTY, "AlphaFold DB");
2919  1 AlignmentAnnotation annot5 = new AlignmentAnnotation("Secondary Structure",
2920    "Secondary Structure for af-q43517-f1A", new Annotation[] {}); //Alphafold
2921  1 annot5.setProperty(Constants.SS_PROVIDER_PROPERTY, "AlphaFold DB");
2922   
2923  1 seq.addAlignmentAnnotation(annot1);
2924  1 seq.addAlignmentAnnotation(annot2);
2925  1 seq.addAlignmentAnnotation(annot3);
2926  1 seq.addAlignmentAnnotation(annot4);
2927  1 seq.addAlignmentAnnotation(annot5);
2928   
2929  1 List<AlignmentAnnotation> all = AlignmentUtils.getAlignmentAnnotationForSource(
2930    seq, Constants.SS_ALL_PROVIDERS);
2931  1 assertTrue("Expected non-null result for SS_ALL_PROVIDERS",
2932    all != null);
2933  1 Assert.assertEquals(all.size(), 5, "Expected all annotations");
2934   
2935  1 List<AlignmentAnnotation> pdb = AlignmentUtils.getAlignmentAnnotationForSource(
2936    seq, "PDB");
2937  1 assertTrue("Expected non-null result for PDB",
2938    pdb != null);
2939  1 Assert.assertEquals(pdb.size(), 2, "Expected 2 annotations");
2940   
2941  1 List<AlignmentAnnotation> swiss = AlignmentUtils.getAlignmentAnnotationForSource(
2942    seq, "SWISS-MODEL");
2943  1 assertTrue("Expected non-null result for SWISS-MODEL",
2944    swiss != null);
2945  1 Assert.assertEquals(swiss.size(), 1, "Expected 1 annotation");
2946   
2947  1 List<AlignmentAnnotation> alphafold = AlignmentUtils.getAlignmentAnnotationForSource(
2948    seq, "AlphaFold DB");
2949  1 assertTrue("Expected non-null result for AlphaFold DB",
2950    alphafold != null);
2951  1 Assert.assertEquals(alphafold.size(), 2, "Expected 2 annotations");
2952    }
2953   
2954   
 
2955  1 toggle @Test(groups = "Functional")
2956    public void testa3mToMSA_byfile() throws Exception
2957    {
2958  1 String queryFile="examples/testdata/query"; // "examples/uniref50.a3m/a3m.fa
2959  1 SequenceI[] origseq = new FastaFile(queryFile+".a3m.fa", DataSourceType.FILE).getSeqsAsArray();
2960   
2961  1 SequenceI[] a3mseq = new FastaFile(queryFile+".a3m", DataSourceType.FILE).getSeqsAsArray();
2962  1 ShiftList maxinserts = new ShiftList();
2963    // from reformat.pl
2964    // patch:
2965    // missing [0,1] at beginning
2966  1 String expectedShifts = "[16,3],[34,4],[38,23],[76,2],[86,1],[92,1],[108,21],[130,3],[132,27],[140,4],[148,3],[176,1],[178,1],[184,1],[188,1],[198,1],[200,1],[202,10],[210,5],[212,3],[224,10],[228,1],[234,1],[238,2],[242,1],[244,6],[246,1],[250,19],[254,2],[256,3],[264,19],[270,1],[272,3],[294,4],[300,1],[306,2],[328,2],[330,15],[332,5],[336,1],[338,5],[340,2],[346,6],[352,2],[364,1],[366,4],[376,1],[378,7],[380,2],[382,1],[384,2],[428,1],[430,1],[472,6],[476,4],[478,2],[480,1],[484,8],[498,5],[502,3],[506,10],[508,1],[510,2],[512,4],[516,3],[524,9],[536,1],[540,7],[542,10],[544,2],[546,1],[552,4],[560,4],[572,14],[582,1],[590,1],[612,2],[620,2],[652,1],[658,1],[686,2],[688,2],[744,3],[746,1],[748,8],[754,5],[760,2],[766,2],[800,1],[810,2],[814,10],[816,15],[820,1],[830,5]";
2967  1 AlignmentUtils.computeMaxShifts(a3mseq,maxinserts);
2968   
2969  1 boolean comma=false;
2970  1 String obsinserts="";
2971  1 for (int[] il:maxinserts.getShifts()) {
2972  94 if (comma)
2973    {
2974  93 obsinserts+=",";
2975    }
2976  94 comma=true;
2977  94 obsinserts+="["+il[0]+","+il[1]+"]";
2978    }
2979  1 assertEquals(expectedShifts,obsinserts);
2980   
2981    // System.out.println("");
2982    // for (int i=0; i<origseq.length;i++)
2983    // {
2984    // System.out.println("or: "+origseq[i].getSequenceAsString());
2985    // System.out.println("xf: "+a3mseq[i].getSequenceAsString());
2986    // }
2987  1 AlignmentUtils.a3mToMSA(a3mseq);
2988  1 String exp="",match="";
2989  60 for (int i=0; i<origseq.length;i++)
2990    {
2991   
2992    // System.out.println(origseq[i].getName()+"\n"
2993    // + origseq[i].getSequenceAsString());
2994    // System.out.println(a3mseq[i].getSequenceAsString());
2995  59 if (!origseq[i].getSequenceAsString().equals(a3mseq[i].getSequenceAsString())) {
2996  0 exp+=origseq[i].getSequenceAsString()+" "+origseq[i].getName()+"\n";
2997  0 match+=a3mseq[i].getSequenceAsString()+" "+a3mseq[i].getName()+"\n";;
2998  0 System.out.println(i+"or: "+origseq[i].getSequenceAsString());
2999  0 System.out.println(i+"xf: "+a3mseq[i].getSequenceAsString());
3000    }
3001    }
3002  1 assertEquals(exp,match);
3003    }
 
3004  1 toggle @Test(groups = "Functional")
3005    public void testA3mInsertShifts() throws Exception
3006    {
3007  1 ShiftList a3mInserts = new ShiftList();
3008  1 a3mInserts.addShift(16,3);
3009  1 a3mInserts.addShift(38,23);
3010  1 SequenceI sq = new Sequence("F1RVZ5_PIG","--PAGGQCtgiWHLLTRPLRP--QG");
3011  1 String expSeq = "--PAGGQCtgiWHLLTRPLRP-------------------------QG";
3012  1 AlignmentUtils.insertShifts(new SequenceI[] {sq}, a3mInserts);
3013  1 assertEquals(expSeq, sq.getSequenceAsString());
3014    }
3015   
 
3016  1 toggle @Test(groups = "Functional")
3017    public void testa3mToMSA()
3018    {
3019  1 String ins1="aaaSSmmm..TTaa";
3020  1 String nonI="AASSTTVVWWXXYY";
3021  1 Sequence seq = new Sequence("a3mS1",ins1);
3022  1 Sequence nonI_seq = new Sequence("a3mS2",nonI);
3023  1 String exp_nonI="---AA---SSTT--VVWWXXYY";
3024   
3025  1 ArrayList<int[]> expInserts = new ArrayList();
3026  1 expInserts.add(new int [] { 0,3});
3027  1 expInserts.add(new int [] { 4,3});
3028  1 expInserts.add(new int [] { 12,2});
3029   
3030  1 ShiftList obsInserts = new ShiftList();
3031  1 AlignmentUtils.computeMaxShifts(new Sequence[] { seq,nonI_seq}, obsInserts);
3032  1 assertEquals(expInserts.size(),obsInserts.getShifts().size());
3033  4 for (int p=0;p<expInserts.size(); p++)
3034    {
3035  3 int[] obser=obsInserts.getShifts().get(p);
3036  3 int[] exped=expInserts.get(p);
3037  3 Assert.assertEquals(obser[0],exped[0]," for "+p);
3038  3 Assert.assertEquals(obser[1],exped[1]," for "+p);
3039    }
3040   
3041  1 AlignmentUtils.a3mToMSA(new Sequence[] {seq,nonI_seq});
3042  1 assertEquals(ins1,seq.getSequenceAsString());
3043  1 assertEquals(exp_nonI,nonI_seq.getSequenceAsString());
3044   
3045    // Reset and Reverse
3046  1 seq = new Sequence("a3mS1",ins1);
3047  1 nonI_seq = new Sequence("a3mS2",nonI);
3048   
3049  1 AlignmentUtils.a3mToMSA(new Sequence[] {nonI_seq, seq});
3050  1 assertEquals(ins1,seq.getSequenceAsString());
3051  1 assertEquals(exp_nonI,nonI_seq.getSequenceAsString());
3052    }
3053   
3054    }