Clover icon

Coverage Report

  1. Project Clover database Thu Aug 13 2020 12:04:21 BST
  2. Package jalview.analysis

File CrossRefTest.java

 

Code metrics

2
274
20
1
752
432
21
0.08
13.7
20
1.05

Classes

Class Line # Actions
CrossRefTest 55 274 21
0.570945957.1%
 

Contributing tests

This file is covered by 8 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.analysis;
22   
23    import static org.testng.AssertJUnit.assertEquals;
24    import static org.testng.AssertJUnit.assertFalse;
25    import static org.testng.AssertJUnit.assertNotNull;
26    import static org.testng.AssertJUnit.assertNotSame;
27    import static org.testng.AssertJUnit.assertNull;
28    import static org.testng.AssertJUnit.assertSame;
29    import static org.testng.AssertJUnit.assertTrue;
30   
31    import jalview.datamodel.AlignedCodonFrame;
32    import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
33    import jalview.datamodel.Alignment;
34    import jalview.datamodel.AlignmentI;
35    import jalview.datamodel.DBRefEntry;
36    import jalview.datamodel.Mapping;
37    import jalview.datamodel.Sequence;
38    import jalview.datamodel.SequenceFeature;
39    import jalview.datamodel.SequenceI;
40    import jalview.gui.JvOptionPane;
41    import jalview.util.DBRefUtils;
42    import jalview.util.MapList;
43    import jalview.ws.SequenceFetcher;
44    import jalview.ws.SequenceFetcherFactory;
45    import jalview.ws.params.InvalidArgumentException;
46   
47    import java.util.ArrayList;
48    import java.util.Arrays;
49    import java.util.List;
50   
51    import org.testng.annotations.AfterClass;
52    import org.testng.annotations.BeforeClass;
53    import org.testng.annotations.Test;
54   
 
55    public class CrossRefTest
56    {
57   
 
58  1 toggle @BeforeClass(alwaysRun = true)
59    public void setUpJvOptionPane()
60    {
61  1 JvOptionPane.setInteractiveMode(false);
62  1 JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
63    }
64   
 
65  1 toggle @Test(groups = { "Functional" })
66    public void testFindXDbRefs()
67    {
68  1 DBRefEntry ref1 = new DBRefEntry("UNIPROT", "1", "A123");
69  1 DBRefEntry ref2 = new DBRefEntry("UNIPROTKB/TREMBL", "1", "A123");
70  1 DBRefEntry ref3 = new DBRefEntry("pdb", "1", "A123");
71  1 DBRefEntry ref4 = new DBRefEntry("EMBLCDSPROTEIN", "1", "A123");
72  1 DBRefEntry ref5 = new DBRefEntry("embl", "1", "A123");
73  1 DBRefEntry ref6 = new DBRefEntry("emblCDS", "1", "A123");
74  1 DBRefEntry ref7 = new DBRefEntry("GeneDB", "1", "A123");
75  1 DBRefEntry ref8 = new DBRefEntry("PFAM", "1", "A123");
76    // ENSEMBL is a source of either dna or protein sequence data
77  1 DBRefEntry ref9 = new DBRefEntry("ENSEMBL", "1", "A123");
78  1 List<DBRefEntry> refs = Arrays.asList(new DBRefEntry[] { ref1, ref2, ref3, ref4, ref5,
79    ref6, ref7, ref8, ref9 });
80   
81    /*
82    * Just the DNA refs:
83    */
84  1 List<DBRefEntry> found = DBRefUtils.selectDbRefs(true, refs);
85  1 assertEquals(4, found.size());
86  1 assertSame(ref5, found.get(0));
87  1 assertSame(ref6, found.get(1));
88  1 assertSame(ref7, found.get(2));
89  1 assertSame(ref9, found.get(3));
90   
91    /*
92    * Just the protein refs:
93    */
94  1 found = DBRefUtils.selectDbRefs(false, refs);
95  1 assertEquals(4, found.size());
96  1 assertSame(ref1, found.get(0));
97  1 assertSame(ref2, found.get(1));
98  1 assertSame(ref4, found.get(2));
99  1 assertSame(ref9, found.get(3));
100    }
101   
102    /**
103    * Test the method that finds a sequence's "product" xref source databases,
104    * which may be direct (dbrefs on the sequence), or indirect (dbrefs on
105    * sequences which share a dbref with the sequence
106    */
 
107  1 toggle @Test(groups = { "Functional" }, enabled = true)
108    public void testFindXrefSourcesForSequence_proteinToDna()
109    {
110  1 SequenceI seq = new Sequence("Seq1", "MGKYQARLSS");
111  1 List<String> sources = new ArrayList<>();
112  1 AlignmentI al = new Alignment(new SequenceI[] {});
113   
114    /*
115    * first with no dbrefs to search
116    */
117  1 sources = new CrossRef(new SequenceI[] { seq }, al)
118    .findXrefSourcesForSequences(false);
119  1 assertTrue(sources.isEmpty());
120   
121    /*
122    * add some dbrefs to sequence
123    */
124    // protein db is not a candidate for findXrefSources
125  1 seq.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234"));
126    // dna coding databatases are
127  1 seq.addDBRef(new DBRefEntry("EMBL", "0", "E2345"));
128    // a second EMBL xref should not result in a duplicate
129  1 seq.addDBRef(new DBRefEntry("EMBL", "0", "E2346"));
130  1 seq.addDBRef(new DBRefEntry("EMBLCDS", "0", "E2347"));
131  1 seq.addDBRef(new DBRefEntry("GENEDB", "0", "E2348"));
132  1 seq.addDBRef(new DBRefEntry("ENSEMBL", "0", "E2349"));
133  1 seq.addDBRef(new DBRefEntry("ENSEMBLGENOMES", "0", "E2350"));
134  1 sources = new CrossRef(new SequenceI[] { seq }, al)
135    .findXrefSourcesForSequences(false);
136    // method is patched to remove EMBL from the sources to match
137  1 assertEquals(4, sources.size());
138  1 assertEquals("[EMBLCDS, GENEDB, ENSEMBL, ENSEMBLGENOMES]",
139    sources.toString());
140   
141    /*
142    * add a sequence to the alignment which has a dbref to UNIPROT|A1234
143    * and others to dna coding databases
144    */
145  1 sources.clear();
146  1 seq.setDBRefs(null);
147  1 seq.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234"));
148  1 seq.addDBRef(new DBRefEntry("EMBLCDS", "0", "E2347"));
149  1 SequenceI seq2 = new Sequence("Seq2", "MGKYQARLSS");
150  1 seq2.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234"));
151  1 seq2.addDBRef(new DBRefEntry("EMBL", "0", "E2345"));
152  1 seq2.addDBRef(new DBRefEntry("GENEDB", "0", "E2348"));
153    // TODO include ENSEMBLGENOMES in DBRefSource.DNACODINGDBS ?
154  1 al.addSequence(seq2);
155  1 sources = new CrossRef(new SequenceI[] { seq, seq2 }, al)
156    .findXrefSourcesForSequences(false);
157    // method removed EMBL from sources to match
158  1 assertEquals(2, sources.size());
159  1 assertEquals("[EMBLCDS, GENEDB]", sources.toString());
160    }
161   
162    /**
163    * Test for finding 'product' sequences for the case where only an indirect
164    * xref is found - not on the nucleotide sequence but on a peptide sequence in
165    * the alignment which which it shares a nucleotide dbref
166    */
 
167  1 toggle @Test(groups = { "Functional" }, enabled = true)
168    public void testFindXrefSequences_indirectDbrefToProtein()
169    {
170    /*
171    * Alignment setup:
172    * - nucleotide dbref EMBL|AF039662
173    * - peptide dbrefs EMBL|AF039662, UNIPROT|Q9ZTS2
174    */
175  1 SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
176  1 emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
177  1 SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS");
178  1 uniprotSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
179  1 uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
180   
181    /*
182    * Find UNIPROT xrefs for nucleotide
183    * - it has no UNIPROT dbref of its own
184    * - but peptide with matching nucleotide dbref does, so is returned
185    */
186  1 AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq });
187  1 Alignment xrefs = new CrossRef(new SequenceI[] { emblSeq }, al)
188    .findXrefSequences("UNIPROT", true);
189  1 assertEquals(1, xrefs.getHeight());
190  1 assertSame(uniprotSeq, xrefs.getSequenceAt(0));
191    }
192   
193    /**
194    * Test for finding 'product' sequences for the case where only an indirect
195    * xref is found - not on the peptide sequence but on a nucleotide sequence in
196    * the alignment which which it shares a protein dbref
197    */
 
198  1 toggle @Test(groups = { "Functional" }, enabled = true)
199    public void testFindXrefSequences_indirectDbrefToNucleotide()
200    {
201    /*
202    * Alignment setup:
203    * - peptide dbref UNIPROT|Q9ZTS2
204    * - nucleotide dbref EMBL|AF039662, UNIPROT|Q9ZTS2
205    */
206  1 SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS");
207  1 uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
208  1 SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
209  1 emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
210  1 emblSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
211   
212    /*
213    * find EMBL xrefs for peptide sequence - it has no direct
214    * dbrefs, but the 'corresponding' nucleotide sequence does, so is returned
215    */
216    /*
217    * Find EMBL xrefs for peptide
218    * - it has no EMBL dbref of its own
219    * - but nucleotide with matching peptide dbref does, so is returned
220    */
221  1 AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq });
222  1 Alignment xrefs = new CrossRef(new SequenceI[] { uniprotSeq }, al)
223    .findXrefSequences("EMBL", false);
224  1 assertEquals(1, xrefs.getHeight());
225  1 assertSame(emblSeq, xrefs.getSequenceAt(0));
226    }
227   
228    /**
229    * Test for finding 'product' sequences for the case where the selected
230    * sequence has no dbref to the desired source, and there are no indirect
231    * references via another sequence in the alignment
232    */
 
233  1 toggle @Test(groups = { "Functional" })
234    public void testFindXrefSequences_noDbrefs()
235    {
236    /*
237    * two nucleotide sequences, one with UNIPROT dbref
238    */
239  1 SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
240  1 dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
241  1 SequenceI dna2 = new Sequence("AJ307031", "AAACCCTTT");
242   
243    /*
244    * find UNIPROT xrefs for peptide sequence - it has no direct
245    * dbrefs, and the other sequence (which has a UNIPROT dbref) is not
246    * equatable to it, so no results found
247    */
248  1 AlignmentI al = new Alignment(new SequenceI[] { dna1, dna2 });
249  1 Alignment xrefs = new CrossRef(new SequenceI[] { dna2 }, al)
250    .findXrefSequences("UNIPROT", true);
251  1 assertNull(xrefs);
252    }
253   
254    /**
255    * Tests for the method that searches an alignment (with one sequence
256    * excluded) for protein/nucleotide sequences with a given cross-reference
257    */
 
258  1 toggle @Test(groups = { "Functional" }, enabled = true)
259    public void testSearchDataset()
260    {
261    /*
262    * nucleotide sequence with UNIPROT AND EMBL dbref
263    * peptide sequence with UNIPROT dbref
264    */
265  1 SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
266  1 Mapping map = new Mapping(new Sequence("pep2", "MLAVSRG"), new MapList(
267    new int[] { 1, 21 }, new int[] { 1, 7 }, 3, 1));
268  1 DBRefEntry dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map);
269  1 dna1.addDBRef(dbref);
270  1 dna1.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
271  1 SequenceI pep1 = new Sequence("Q9ZTS2", "MLAVSRGQ");
272  1 dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
273  1 pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
274  1 AlignmentI al = new Alignment(new SequenceI[] { dna1, pep1 });
275   
276  1 List<SequenceI> result = new ArrayList<>();
277   
278    /*
279    * first search for a dbref nowhere on the alignment:
280    */
281  1 dbref = new DBRefEntry("UNIPROT", "0", "P30419");
282  1 CrossRef testee = new CrossRef(al.getSequencesArray(), al);
283  1 AlignedCodonFrame acf = new AlignedCodonFrame();
284  1 boolean found = testee.searchDataset(true, dna1, dbref, result, acf,
285    true, DBRefUtils.SEARCH_MODE_FULL);
286  1 assertFalse(found);
287  1 assertTrue(result.isEmpty());
288  1 assertTrue(acf.isEmpty());
289   
290    /*
291    * search for a protein sequence with dbref UNIPROT:Q9ZTS2
292    */
293  1 acf = new AlignedCodonFrame();
294  1 dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
295  1 found = testee.searchDataset(!dna1.isProtein(), dna1, dbref, result,
296    acf, false, DBRefUtils.SEARCH_MODE_FULL); // search dataset with a protein xref from a dna
297    // sequence to locate the protein product
298  1 assertTrue(found);
299  1 assertEquals(1, result.size());
300  1 assertSame(pep1, result.get(0));
301  1 assertTrue(acf.isEmpty());
302   
303    /*
304    * search for a nucleotide sequence with dbref UNIPROT:Q9ZTS2
305    */
306  1 result.clear();
307  1 acf = new AlignedCodonFrame();
308  1 dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
309  1 found = testee.searchDataset(!pep1.isProtein(), pep1, dbref, result,
310    acf, false, DBRefUtils.SEARCH_MODE_FULL); // search dataset with a protein's direct dbref to
311    // locate dna sequences with matching xref
312  1 assertTrue(found);
313  1 assertEquals(1, result.size());
314  1 assertSame(dna1, result.get(0));
315    // should now have a mapping from dna to pep1
316  1 List<SequenceToSequenceMapping> mappings = acf.getMappings();
317  1 assertEquals(1, mappings.size());
318  1 SequenceToSequenceMapping mapping = mappings.get(0);
319  1 assertSame(dna1, mapping.getFromSeq());
320  1 assertSame(pep1, mapping.getMapping().getTo());
321  1 MapList mapList = mapping.getMapping().getMap();
322  1 assertEquals(1, mapList.getToRatio());
323  1 assertEquals(3, mapList.getFromRatio());
324  1 assertEquals(1, mapList.getFromRanges().size());
325  1 assertEquals(1, mapList.getFromRanges().get(0)[0]);
326  1 assertEquals(21, mapList.getFromRanges().get(0)[1]);
327  1 assertEquals(1, mapList.getToRanges().size());
328  1 assertEquals(1, mapList.getToRanges().get(0)[0]);
329  1 assertEquals(7, mapList.getToRanges().get(0)[1]);
330    }
331   
332    /**
333    * Test for finding 'product' sequences for the case where the selected
334    * sequence has a dbref with a mapping to a sequence. This represents the case
335    * where either
336    * <ul>
337    * <li>a fetched sequence is already decorated with its cross-reference (e.g.
338    * EMBL + translation), or</li>
339    * <li>Get Cross-References has been done once resulting in instantiated
340    * cross-reference mappings</li>
341    * </ul>
342    */
 
343  1 toggle @Test(groups = { "Functional" })
344    public void testFindXrefSequences_fromDbRefMap()
345    {
346    /*
347    * scenario: nucleotide sequence AF039662
348    * with dbref + mapping to Q9ZTS2 and P30419
349    * which themselves each have a dbref and feature
350    */
351  1 SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
352  1 SequenceI pep1 = new Sequence("Q9ZTS2", "MALFQRSV");
353  1 SequenceI pep2 = new Sequence("P30419", "MTRRSQIF");
354  1 dna1.createDatasetSequence();
355  1 pep1.createDatasetSequence();
356  1 pep2.createDatasetSequence();
357   
358  1 pep1.getDatasetSequence().addDBRef(
359    new DBRefEntry("Pfam", "0", "PF00111"));
360  1 pep1.addSequenceFeature(new SequenceFeature("type", "desc", 12, 14, 1f,
361    "group"));
362  1 pep2.getDatasetSequence().addDBRef(new DBRefEntry("PDB", "0", "3JTK"));
363  1 pep2.addSequenceFeature(new SequenceFeature("type2", "desc2", 13, 15,
364    12f, "group2"));
365   
366  1 MapList mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 },
367    3, 1);
368  1 Mapping map = new Mapping(pep1, mapList);
369  1 DBRefEntry dbRef1 = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map);
370  1 dna1.getDatasetSequence().addDBRef(dbRef1);
371  1 mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 }, 3, 1);
372  1 map = new Mapping(pep2, mapList);
373  1 DBRefEntry dbRef2 = new DBRefEntry("UNIPROT", "0", "P30419", map);
374  1 dna1.getDatasetSequence().addDBRef(dbRef2);
375   
376    /*
377    * find UNIPROT xrefs for nucleotide sequence - it should pick up
378    * mapped sequences
379    */
380  1 AlignmentI al = new Alignment(new SequenceI[] { dna1 });
381  1 Alignment xrefs = new CrossRef(new SequenceI[] { dna1 }, al)
382    .findXrefSequences("UNIPROT", true);
383  1 assertEquals(2, xrefs.getHeight());
384   
385    /*
386    * cross-refs alignment holds copies of the mapped sequences
387    * including copies of their dbrefs and features
388    */
389  1 checkCopySequence(pep1, xrefs.getSequenceAt(0));
390  1 checkCopySequence(pep2, xrefs.getSequenceAt(1));
391    }
392   
393    /**
394    * Helper method that verifies that 'copy' has the same name, start, end,
395    * sequence and dataset sequence object as 'original' (but is not the same
396    * object)
397    *
398    * @param copy
399    * @param original
400    */
 
401  2 toggle private void checkCopySequence(SequenceI copy, SequenceI original)
402    {
403  2 assertNotSame(copy, original);
404  2 assertSame(copy.getDatasetSequence(), original.getDatasetSequence());
405  2 assertEquals(copy.getName(), original.getName());
406  2 assertEquals(copy.getStart(), original.getStart());
407  2 assertEquals(copy.getEnd(), original.getEnd());
408  2 assertEquals(copy.getSequenceAsString(), original.getSequenceAsString());
409    }
410   
411    /**
412    * Test for finding 'product' sequences for the case where the selected
413    * sequence has a dbref with no mapping, triggering a fetch from database
414    */
 
415  0 toggle @Test(groups = { "Functional_Failing" })
416    public void testFindXrefSequences_withFetch()
417    {
418  0 SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
419  0 dna1.addDBRef(new DBRefEntry("UNIPROT", "ENA:0", "Q9ZTS2"));
420  0 dna1.addDBRef(new DBRefEntry("UNIPROT", "ENA:0", "P30419"));
421  0 dna1.addDBRef(new DBRefEntry("UNIPROT", "ENA:0", "P00314"));
422  0 final SequenceI pep1 = new Sequence("Q9ZTS2", "MYQLIRSSW");
423  0 pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
424   
425  0 final SequenceI pep2 = new Sequence("P00314", "MRKLLAASG");
426  0 pep2.addDBRef(new DBRefEntry("UNIPROT", "0", "P00314"));
427   
428    /*
429    * argument false suppresses adding DAS sources
430    * todo: define an interface type SequenceFetcherI and mock that
431    */
432  0 SequenceFetcher mockFetcher = new SequenceFetcher()
433    {
 
434  0 toggle @Override
435    public boolean isFetchable(String source)
436    {
437  0 return true;
438    }
439   
 
440  0 toggle @Override
441    public SequenceI[] getSequences(List<DBRefEntry> refs, boolean dna)
442    {
443  0 return new SequenceI[] { pep1, pep2 };
444    }
445    };
446  0 SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
447   
448    /*
449    * find UNIPROT xrefs for nucleotide sequence
450    */
451  0 AlignmentI al = new Alignment(new SequenceI[] { dna1 });
452  0 Alignment xrefs = new CrossRef(new SequenceI[] { dna1 }, al)
453    .findXrefSequences("UNIPROT", true);
454  0 assertEquals(2, xrefs.getHeight());
455  0 assertSame(pep1, xrefs.getSequenceAt(0));
456  0 assertSame(pep2, xrefs.getSequenceAt(1));
457    }
458   
 
459  1 toggle @AfterClass(alwaysRun = true)
460    public void tearDown()
461    {
462  1 SequenceFetcherFactory.setSequenceFetcher(null);
463    }
464   
465    /**
466    * Test for finding 'product' sequences for the case where both gene and
467    * transcript sequences have dbrefs to Uniprot.
468    */
 
469  0 toggle @Test(groups = { "Functional_Failing" })
470    public void testFindXrefSequences_forGeneAndTranscripts()
471    {
472    /*
473    * 'gene' sequence
474    */
475  0 SequenceI gene = new Sequence("ENSG00000157764", "CGCCTCCCTTCCCC");
476  0 gene.addDBRef(new DBRefEntry("UNIPROT", "0", "P15056"));
477  0 gene.addDBRef(new DBRefEntry("UNIPROT", "0", "H7C5K3"));
478   
479    /*
480    * 'transcript' with CDS feature (supports mapping to protein)
481    */
482  0 SequenceI braf001 = new Sequence("ENST00000288602", "taagATGGCGGCGCTGa");
483  0 braf001.addDBRef(new DBRefEntry("UNIPROT", "0", "P15056"));
484  0 braf001.addSequenceFeature(new SequenceFeature("CDS", "", 5, 16, 0f,
485    null));
486   
487    /*
488    * 'spliced transcript' with CDS ranges
489    */
490  0 SequenceI braf002 = new Sequence("ENST00000497784", "gCAGGCtaTCTGTTCaa");
491  0 braf002.addDBRef(new DBRefEntry("UNIPROT", "ENSEMBL|0", "H7C5K3"));
492  0 braf002.addSequenceFeature(new SequenceFeature("CDS", "", 2, 6, 0f,
493    null));
494  0 braf002.addSequenceFeature(new SequenceFeature("CDS", "", 9, 15, 0f,
495    null));
496   
497    /*
498    * TODO code is fragile - use of SequenceIdMatcher depends on fetched
499    * sequences having a name starting Source|Accession
500    * which happens to be true for Uniprot,PDB,EMBL but not Pfam,Rfam,Ensembl
501    */
502  0 final SequenceI pep1 = new Sequence("UNIPROT|P15056", "MAAL");
503  0 pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "P15056"));
504  0 final SequenceI pep2 = new Sequence("UNIPROT|H7C5K3", "QALF");
505  0 pep2.addDBRef(new DBRefEntry("UNIPROT", "0", "H7C5K3"));
506    /*
507    * argument false suppresses adding DAS sources
508    * todo: define an interface type SequenceFetcherI and mock that
509    */
510  0 SequenceFetcher mockFetcher = new SequenceFetcher()
511    {
 
512  0 toggle @Override
513    public boolean isFetchable(String source)
514    {
515  0 return true;
516    }
517   
 
518  0 toggle @Override
519    public SequenceI[] getSequences(List<DBRefEntry> refs, boolean dna)
520    {
521  0 return new SequenceI[] { pep1, pep2 };
522    }
523    };
524  0 SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
525   
526    /*
527    * find UNIPROT xrefs for gene and transcripts
528    * verify that
529    * - the two proteins are retrieved but not duplicated
530    * - mappings are built from transcript (CDS) to proteins
531    * - no mappings from gene to proteins
532    */
533  0 SequenceI[] seqs = new SequenceI[] { gene, braf001, braf002 };
534  0 AlignmentI al = new Alignment(seqs);
535  0 Alignment xrefs = new CrossRef(seqs, al).findXrefSequences("UNIPROT",
536    true);
537  0 assertEquals(2, xrefs.getHeight());
538  0 assertSame(pep1, xrefs.getSequenceAt(0));
539  0 assertSame(pep2, xrefs.getSequenceAt(1));
540    }
541   
542    /**
543    * <pre>
544    * Test that emulates this (real but simplified) case:
545    * Alignment: DBrefs
546    * UNIPROT|P0CE19 EMBL|J03321, EMBL|X06707, EMBL|M19487
547    * UNIPROT|P0CE20 EMBL|J03321, EMBL|X06707, EMBL|X07547
548    * Find cross-references for EMBL. These are mocked here as
549    * EMBL|J03321 with mappings to P0CE18, P0CE19, P0CE20
550    * EMBL|X06707 with mappings to P0CE17, P0CE19, P0CE20
551    * EMBL|M19487 with mappings to P0CE19, Q46432
552    * EMBL|X07547 with mappings to P0CE20, B0BCM4
553    * EMBL sequences are first 'fetched' (mocked here) for P0CE19.
554    * The 3 EMBL sequences are added to the alignment dataset.
555    * Their dbrefs to Uniprot products P0CE19 and P0CE20 should be matched in the
556    * alignment dataset and updated to reference the original Uniprot sequences.
557    * For the second Uniprot sequence, the J03321 and X06707 xrefs should be
558    * resolved from the dataset, and only the X07547 dbref fetched.
559    * So the end state to verify is:
560    * - 4 cross-ref sequences returned: J03321, X06707, M19487, X07547
561    * - P0CE19/20 dbrefs to EMBL sequences now have mappings
562    * - J03321 dbrefs to P0CE19/20 mapped to original Uniprot sequences
563    * - X06707 dbrefs to P0CE19/20 mapped to original Uniprot sequences
564    * </pre>
565    */
 
566  0 toggle @Test(groups = { "Functional_Failing" })
567    public void testFindXrefSequences_uniprotEmblManyToMany()
568    {
569    /*
570    * Uniprot sequences, both with xrefs to EMBL|J03321
571    * and EMBL|X07547
572    */
573  0 SequenceI p0ce19 = new Sequence("UNIPROT|P0CE19", "KPFG");
574  0 p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "J03321"));
575  0 p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "X06707"));
576  0 p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "M19487"));
577  0 SequenceI p0ce20 = new Sequence("UNIPROT|P0CE20", "PFGK");
578  0 p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "J03321"));
579  0 p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "X06707"));
580  0 p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "X07547"));
581   
582    /*
583    * EMBL sequences to be 'fetched', complete with dbrefs and mappings
584    * to their protein products (CDS location and translations are provided
585    * in EMBL XML); these should be matched to, and replaced with,
586    * the corresponding uniprot sequences after fetching
587    */
588   
589    /*
590    * J03321 with mappings to P0CE19 and P0CE20
591    */
592  0 final SequenceI j03321 = new Sequence("EMBL|J03321", "AAACCCTTTGGGAAAA");
593  0 DBRefEntry dbref1 = new DBRefEntry("UNIPROT", "0", "P0CE19");
594  0 MapList mapList = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 },
595    3, 1);
596  0 Mapping map = new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"),
597    mapList);
598    // add a dbref to the mapped to sequence - should get copied to p0ce19
599  0 map.getTo().addDBRef(new DBRefEntry("PIR", "0", "S01875"));
600  0 dbref1.setMap(map);
601  0 j03321.addDBRef(dbref1);
602  0 DBRefEntry dbref2 = new DBRefEntry("UNIPROT", "0", "P0CE20");
603  0 mapList = new MapList(new int[] { 4, 15 }, new int[] { 2, 5 }, 3, 1);
604  0 dbref2.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"),
605    new MapList(mapList)));
606  0 j03321.addDBRef(dbref2);
607   
608    /*
609    * X06707 with mappings to P0CE19 and P0CE20
610    */
611  0 final SequenceI x06707 = new Sequence("EMBL|X06707", "atgAAACCCTTTGGG");
612  0 DBRefEntry dbref3 = new DBRefEntry("UNIPROT", "0", "P0CE19");
613  0 MapList map2 = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 }, 3,
614    1);
615  0 dbref3.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"), map2));
616  0 x06707.addDBRef(dbref3);
617  0 DBRefEntry dbref4 = new DBRefEntry("UNIPROT", "0", "P0CE20");
618  0 MapList map3 = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 }, 3,
619    1);
620  0 dbref4.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"), map3));
621  0 x06707.addDBRef(dbref4);
622   
623    /*
624    * M19487 with mapping to P0CE19 and Q46432
625    */
626  0 final SequenceI m19487 = new Sequence("EMBL|M19487", "AAACCCTTTGGG");
627  0 DBRefEntry dbref5 = new DBRefEntry("UNIPROT", "0", "P0CE19");
628  0 dbref5.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"),
629    new MapList(mapList)));
630  0 m19487.addDBRef(dbref5);
631  0 DBRefEntry dbref6 = new DBRefEntry("UNIPROT", "0", "Q46432");
632  0 dbref6.setMap(new Mapping(new Sequence("UNIPROT|Q46432", "KPFG"),
633    new MapList(mapList)));
634  0 m19487.addDBRef(dbref6);
635   
636    /*
637    * X07547 with mapping to P0CE20 and B0BCM4
638    */
639  0 final SequenceI x07547 = new Sequence("EMBL|X07547", "cccAAACCCTTTGGG");
640  0 DBRefEntry dbref7 = new DBRefEntry("UNIPROT", "0", "P0CE20");
641  0 dbref7.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"),
642    new MapList(map2)));
643  0 x07547.addDBRef(dbref7);
644  0 DBRefEntry dbref8 = new DBRefEntry("UNIPROT", "0", "B0BCM4");
645  0 dbref8.setMap(new Mapping(new Sequence("UNIPROT|B0BCM4", "KPFG"),
646    new MapList(map2)));
647  0 x07547.addDBRef(dbref8);
648   
649    /*
650    * mock sequence fetcher to 'return' the EMBL sequences
651    * TODO: Mockito would allow .thenReturn().thenReturn() here,
652    * and also capture and verification of the parameters
653    * passed in calls to getSequences() - important to verify that
654    * duplicate sequence fetches are not requested
655    */
656  0 SequenceFetcher mockFetcher = new SequenceFetcher()
657    {
658    int call = 0;
659   
 
660  0 toggle @Override
661    public boolean isFetchable(String source)
662    {
663  0 return true;
664    }
665   
 
666  0 toggle @Override
667    public SequenceI[] getSequences(List<DBRefEntry> refs, boolean dna)
668    {
669  0 call++;
670  0 if (call == 1)
671    {
672  0 assertEquals("Expected 3 embl seqs in first fetch", 3,
673    refs.size());
674  0 return new SequenceI[] { j03321, x06707, m19487 };
675    }
676    else
677    {
678  0 assertEquals("Expected 1 embl seq in second fetch", 1,
679    refs.size());
680  0 return new SequenceI[] { x07547 };
681    }
682    }
683    };
684  0 SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
685   
686    /*
687    * find EMBL xrefs for Uniprot seqs and verify that
688    * - the EMBL xref'd sequences are retrieved without duplicates
689    * - mappings are added to the Uniprot dbrefs
690    * - mappings in the EMBL-to-Uniprot dbrefs are updated to the
691    * alignment sequences
692    * - dbrefs on the EMBL sequences are added to the original dbrefs
693    */
694  0 SequenceI[] seqs = new SequenceI[] { p0ce19, p0ce20 };
695  0 AlignmentI al = new Alignment(seqs);
696  0 Alignment xrefs = new CrossRef(seqs, al).findXrefSequences("EMBL",
697    false);
698   
699    /*
700    * verify retrieved sequences
701    */
702  0 assertNotNull(xrefs);
703  0 assertEquals(4, xrefs.getHeight());
704  0 assertSame(j03321, xrefs.getSequenceAt(0));
705  0 assertSame(x06707, xrefs.getSequenceAt(1));
706  0 assertSame(m19487, xrefs.getSequenceAt(2));
707  0 assertSame(x07547, xrefs.getSequenceAt(3));
708   
709    /*
710    * verify mappings added to Uniprot-to-EMBL dbrefs
711    */
712  0 Mapping mapping = p0ce19.getDBRefs().get(0).getMap();
713  0 assertSame(j03321, mapping.getTo());
714  0 mapping = p0ce19.getDBRefs().get(1).getMap();
715  0 assertSame(x06707, mapping.getTo());
716  0 mapping = p0ce20.getDBRefs().get(0).getMap();
717  0 assertSame(j03321, mapping.getTo());
718  0 mapping = p0ce20.getDBRefs().get(1).getMap();
719  0 assertSame(x06707, mapping.getTo());
720   
721    /*
722    * verify dbrefs on EMBL are mapped to alignment seqs
723    */
724   
725  0 assertSame(p0ce19, j03321.getDBRefs().get(0).getMap().getTo());
726  0 assertSame(p0ce20, j03321.getDBRefs().get(1).getMap().getTo());
727  0 assertSame(p0ce19, x06707.getDBRefs().get(0).getMap().getTo());
728  0 assertSame(p0ce20, x06707.getDBRefs().get(1).getMap().getTo());
729   
730    /*
731    * verify new dbref on EMBL dbref mapping is copied to the
732    * original Uniprot sequence
733    */
734  0 assertEquals(4, p0ce19.getDBRefs().size());
735  0 assertEquals("PIR", p0ce19.getDBRefs().get(3).getSource());
736  0 assertEquals("S01875", p0ce19.getDBRefs().get(3).getAccessionId());
737    }
738   
 
739  1 toggle @Test(groups = "Functional")
740    public void testSameSequence()
741    {
742  1 assertTrue(CrossRef.sameSequence(null, null));
743  1 SequenceI seq1 = new Sequence("seq1", "ABCDEF");
744  1 assertFalse(CrossRef.sameSequence(seq1, null));
745  1 assertFalse(CrossRef.sameSequence(null, seq1));
746  1 assertTrue(CrossRef.sameSequence(seq1, new Sequence("seq2", "ABCDEF")));
747  1 assertTrue(CrossRef.sameSequence(seq1, new Sequence("seq2", "abcdef")));
748  1 assertFalse(CrossRef
749    .sameSequence(seq1, new Sequence("seq2", "ABCDE-F")));
750  1 assertFalse(CrossRef.sameSequence(seq1, new Sequence("seq2", "BCDEF")));
751    }
752    }