Clover icon

jalviewX

  1. Project Clover database Wed Oct 31 2018 15:13:58 GMT
  2. Package jalview.analysis

File CrossRefTest.java

 
 

Code metrics

2
274
20
1
749
430
21
0.08
13.7
20
1.05

Classes

Class Line # Actions
CrossRefTest 53 274 21 129
0.564189256.4%
 

Contributing tests

This file is covered by 11 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.analysis;
22   
23    import static org.testng.AssertJUnit.assertEquals;
24    import static org.testng.AssertJUnit.assertFalse;
25    import static org.testng.AssertJUnit.assertNotNull;
26    import static org.testng.AssertJUnit.assertNotSame;
27    import static org.testng.AssertJUnit.assertNull;
28    import static org.testng.AssertJUnit.assertSame;
29    import static org.testng.AssertJUnit.assertTrue;
30   
31    import jalview.datamodel.AlignedCodonFrame;
32    import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
33    import jalview.datamodel.Alignment;
34    import jalview.datamodel.AlignmentI;
35    import jalview.datamodel.DBRefEntry;
36    import jalview.datamodel.Mapping;
37    import jalview.datamodel.Sequence;
38    import jalview.datamodel.SequenceFeature;
39    import jalview.datamodel.SequenceI;
40    import jalview.gui.JvOptionPane;
41    import jalview.util.DBRefUtils;
42    import jalview.util.MapList;
43    import jalview.ws.SequenceFetcher;
44    import jalview.ws.SequenceFetcherFactory;
45   
46    import java.util.ArrayList;
47    import java.util.List;
48   
49    import org.testng.annotations.AfterClass;
50    import org.testng.annotations.BeforeClass;
51    import org.testng.annotations.Test;
52   
 
53    public class CrossRefTest
54    {
55   
 
56  1 toggle @BeforeClass(alwaysRun = true)
57    public void setUpJvOptionPane()
58    {
59  1 JvOptionPane.setInteractiveMode(false);
60  1 JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
61    }
62   
 
63  1 toggle @Test(groups = { "Functional" })
64    public void testFindXDbRefs()
65    {
66  1 DBRefEntry ref1 = new DBRefEntry("UNIPROT", "1", "A123");
67  1 DBRefEntry ref2 = new DBRefEntry("UNIPROTKB/TREMBL", "1", "A123");
68  1 DBRefEntry ref3 = new DBRefEntry("pdb", "1", "A123");
69  1 DBRefEntry ref4 = new DBRefEntry("EMBLCDSPROTEIN", "1", "A123");
70  1 DBRefEntry ref5 = new DBRefEntry("embl", "1", "A123");
71  1 DBRefEntry ref6 = new DBRefEntry("emblCDS", "1", "A123");
72  1 DBRefEntry ref7 = new DBRefEntry("GeneDB", "1", "A123");
73  1 DBRefEntry ref8 = new DBRefEntry("PFAM", "1", "A123");
74    // ENSEMBL is a source of either dna or protein sequence data
75  1 DBRefEntry ref9 = new DBRefEntry("ENSEMBL", "1", "A123");
76  1 DBRefEntry[] refs = new DBRefEntry[] { ref1, ref2, ref3, ref4, ref5,
77    ref6, ref7, ref8, ref9 };
78   
79    /*
80    * Just the DNA refs:
81    */
82  1 DBRefEntry[] found = DBRefUtils.selectDbRefs(true, refs);
83  1 assertEquals(4, found.length);
84  1 assertSame(ref5, found[0]);
85  1 assertSame(ref6, found[1]);
86  1 assertSame(ref7, found[2]);
87  1 assertSame(ref9, found[3]);
88   
89    /*
90    * Just the protein refs:
91    */
92  1 found = DBRefUtils.selectDbRefs(false, refs);
93  1 assertEquals(4, found.length);
94  1 assertSame(ref1, found[0]);
95  1 assertSame(ref2, found[1]);
96  1 assertSame(ref4, found[2]);
97  1 assertSame(ref9, found[3]);
98    }
99   
100    /**
101    * Test the method that finds a sequence's "product" xref source databases,
102    * which may be direct (dbrefs on the sequence), or indirect (dbrefs on
103    * sequences which share a dbref with the sequence
104    */
 
105  1 toggle @Test(groups = { "Functional" }, enabled = true)
106    public void testFindXrefSourcesForSequence_proteinToDna()
107    {
108  1 SequenceI seq = new Sequence("Seq1", "MGKYQARLSS");
109  1 List<String> sources = new ArrayList<>();
110  1 AlignmentI al = new Alignment(new SequenceI[] {});
111   
112    /*
113    * first with no dbrefs to search
114    */
115  1 sources = new CrossRef(new SequenceI[] { seq }, al)
116    .findXrefSourcesForSequences(false);
117  1 assertTrue(sources.isEmpty());
118   
119    /*
120    * add some dbrefs to sequence
121    */
122    // protein db is not a candidate for findXrefSources
123  1 seq.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234"));
124    // dna coding databatases are
125  1 seq.addDBRef(new DBRefEntry("EMBL", "0", "E2345"));
126    // a second EMBL xref should not result in a duplicate
127  1 seq.addDBRef(new DBRefEntry("EMBL", "0", "E2346"));
128  1 seq.addDBRef(new DBRefEntry("EMBLCDS", "0", "E2347"));
129  1 seq.addDBRef(new DBRefEntry("GENEDB", "0", "E2348"));
130  1 seq.addDBRef(new DBRefEntry("ENSEMBL", "0", "E2349"));
131  1 seq.addDBRef(new DBRefEntry("ENSEMBLGENOMES", "0", "E2350"));
132  1 sources = new CrossRef(new SequenceI[] { seq }, al)
133    .findXrefSourcesForSequences(false);
134    // method is patched to remove EMBL from the sources to match
135  1 assertEquals(4, sources.size());
136  1 assertEquals("[EMBLCDS, GENEDB, ENSEMBL, ENSEMBLGENOMES]",
137    sources.toString());
138   
139    /*
140    * add a sequence to the alignment which has a dbref to UNIPROT|A1234
141    * and others to dna coding databases
142    */
143  1 sources.clear();
144  1 seq.setDBRefs(null);
145  1 seq.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234"));
146  1 seq.addDBRef(new DBRefEntry("EMBLCDS", "0", "E2347"));
147  1 SequenceI seq2 = new Sequence("Seq2", "MGKYQARLSS");
148  1 seq2.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234"));
149  1 seq2.addDBRef(new DBRefEntry("EMBL", "0", "E2345"));
150  1 seq2.addDBRef(new DBRefEntry("GENEDB", "0", "E2348"));
151    // TODO include ENSEMBLGENOMES in DBRefSource.DNACODINGDBS ?
152  1 al.addSequence(seq2);
153  1 sources = new CrossRef(new SequenceI[] { seq, seq2 }, al)
154    .findXrefSourcesForSequences(false);
155    // method removed EMBL from sources to match
156  1 assertEquals(2, sources.size());
157  1 assertEquals("[EMBLCDS, GENEDB]", sources.toString());
158    }
159   
160    /**
161    * Test for finding 'product' sequences for the case where only an indirect
162    * xref is found - not on the nucleotide sequence but on a peptide sequence in
163    * the alignment which which it shares a nucleotide dbref
164    */
 
165  1 toggle @Test(groups = { "Functional" }, enabled = true)
166    public void testFindXrefSequences_indirectDbrefToProtein()
167    {
168    /*
169    * Alignment setup:
170    * - nucleotide dbref EMBL|AF039662
171    * - peptide dbrefs EMBL|AF039662, UNIPROT|Q9ZTS2
172    */
173  1 SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
174  1 emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
175  1 SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS");
176  1 uniprotSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
177  1 uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
178   
179    /*
180    * Find UNIPROT xrefs for nucleotide
181    * - it has no UNIPROT dbref of its own
182    * - but peptide with matching nucleotide dbref does, so is returned
183    */
184  1 AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq });
185  1 Alignment xrefs = new CrossRef(new SequenceI[] { emblSeq }, al)
186    .findXrefSequences("UNIPROT", true);
187  1 assertEquals(1, xrefs.getHeight());
188  1 assertSame(uniprotSeq, xrefs.getSequenceAt(0));
189    }
190   
191    /**
192    * Test for finding 'product' sequences for the case where only an indirect
193    * xref is found - not on the peptide sequence but on a nucleotide sequence in
194    * the alignment which which it shares a protein dbref
195    */
 
196  1 toggle @Test(groups = { "Functional" }, enabled = true)
197    public void testFindXrefSequences_indirectDbrefToNucleotide()
198    {
199    /*
200    * Alignment setup:
201    * - peptide dbref UNIPROT|Q9ZTS2
202    * - nucleotide dbref EMBL|AF039662, UNIPROT|Q9ZTS2
203    */
204  1 SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS");
205  1 uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
206  1 SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
207  1 emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
208  1 emblSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
209   
210    /*
211    * find EMBL xrefs for peptide sequence - it has no direct
212    * dbrefs, but the 'corresponding' nucleotide sequence does, so is returned
213    */
214    /*
215    * Find EMBL xrefs for peptide
216    * - it has no EMBL dbref of its own
217    * - but nucleotide with matching peptide dbref does, so is returned
218    */
219  1 AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq });
220  1 Alignment xrefs = new CrossRef(new SequenceI[] { uniprotSeq }, al)
221    .findXrefSequences("EMBL", false);
222  1 assertEquals(1, xrefs.getHeight());
223  1 assertSame(emblSeq, xrefs.getSequenceAt(0));
224    }
225   
226    /**
227    * Test for finding 'product' sequences for the case where the selected
228    * sequence has no dbref to the desired source, and there are no indirect
229    * references via another sequence in the alignment
230    */
 
231  1 toggle @Test(groups = { "Functional" })
232    public void testFindXrefSequences_noDbrefs()
233    {
234    /*
235    * two nucleotide sequences, one with UNIPROT dbref
236    */
237  1 SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
238  1 dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
239  1 SequenceI dna2 = new Sequence("AJ307031", "AAACCCTTT");
240   
241    /*
242    * find UNIPROT xrefs for peptide sequence - it has no direct
243    * dbrefs, and the other sequence (which has a UNIPROT dbref) is not
244    * equatable to it, so no results found
245    */
246  1 AlignmentI al = new Alignment(new SequenceI[] { dna1, dna2 });
247  1 Alignment xrefs = new CrossRef(new SequenceI[] { dna2 }, al)
248    .findXrefSequences("UNIPROT", true);
249  1 assertNull(xrefs);
250    }
251   
252    /**
253    * Tests for the method that searches an alignment (with one sequence
254    * excluded) for protein/nucleotide sequences with a given cross-reference
255    */
 
256  1 toggle @Test(groups = { "Functional" }, enabled = true)
257    public void testSearchDataset()
258    {
259    /*
260    * nucleotide sequence with UNIPROT AND EMBL dbref
261    * peptide sequence with UNIPROT dbref
262    */
263  1 SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
264  1 Mapping map = new Mapping(new Sequence("pep2", "MLAVSRG"), new MapList(
265    new int[] { 1, 21 }, new int[] { 1, 7 }, 3, 1));
266  1 DBRefEntry dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map);
267  1 dna1.addDBRef(dbref);
268  1 dna1.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
269  1 SequenceI pep1 = new Sequence("Q9ZTS2", "MLAVSRGQ");
270  1 dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
271  1 pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
272  1 AlignmentI al = new Alignment(new SequenceI[] { dna1, pep1 });
273   
274  1 List<SequenceI> result = new ArrayList<>();
275   
276    /*
277    * first search for a dbref nowhere on the alignment:
278    */
279  1 dbref = new DBRefEntry("UNIPROT", "0", "P30419");
280  1 CrossRef testee = new CrossRef(al.getSequencesArray(), al);
281  1 AlignedCodonFrame acf = new AlignedCodonFrame();
282  1 boolean found = testee.searchDataset(true, dna1, dbref, result, acf,
283    true);
284  1 assertFalse(found);
285  1 assertTrue(result.isEmpty());
286  1 assertTrue(acf.isEmpty());
287   
288    /*
289    * search for a protein sequence with dbref UNIPROT:Q9ZTS2
290    */
291  1 acf = new AlignedCodonFrame();
292  1 dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
293  1 found = testee.searchDataset(!dna1.isProtein(), dna1, dbref, result,
294    acf, false); // search dataset with a protein xref from a dna
295    // sequence to locate the protein product
296  1 assertTrue(found);
297  1 assertEquals(1, result.size());
298  1 assertSame(pep1, result.get(0));
299  1 assertTrue(acf.isEmpty());
300   
301    /*
302    * search for a nucleotide sequence with dbref UNIPROT:Q9ZTS2
303    */
304  1 result.clear();
305  1 acf = new AlignedCodonFrame();
306  1 dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
307  1 found = testee.searchDataset(!pep1.isProtein(), pep1, dbref, result,
308    acf, false); // search dataset with a protein's direct dbref to
309    // locate dna sequences with matching xref
310  1 assertTrue(found);
311  1 assertEquals(1, result.size());
312  1 assertSame(dna1, result.get(0));
313    // should now have a mapping from dna to pep1
314  1 List<SequenceToSequenceMapping> mappings = acf.getMappings();
315  1 assertEquals(1, mappings.size());
316  1 SequenceToSequenceMapping mapping = mappings.get(0);
317  1 assertSame(dna1, mapping.getFromSeq());
318  1 assertSame(pep1, mapping.getMapping().getTo());
319  1 MapList mapList = mapping.getMapping().getMap();
320  1 assertEquals(1, mapList.getToRatio());
321  1 assertEquals(3, mapList.getFromRatio());
322  1 assertEquals(1, mapList.getFromRanges().size());
323  1 assertEquals(1, mapList.getFromRanges().get(0)[0]);
324  1 assertEquals(21, mapList.getFromRanges().get(0)[1]);
325  1 assertEquals(1, mapList.getToRanges().size());
326  1 assertEquals(1, mapList.getToRanges().get(0)[0]);
327  1 assertEquals(7, mapList.getToRanges().get(0)[1]);
328    }
329   
330    /**
331    * Test for finding 'product' sequences for the case where the selected
332    * sequence has a dbref with a mapping to a sequence. This represents the case
333    * where either
334    * <ul>
335    * <li>a fetched sequence is already decorated with its cross-reference (e.g.
336    * EMBL + translation), or</li>
337    * <li>Get Cross-References has been done once resulting in instantiated
338    * cross-reference mappings</li>
339    * </ul>
340    */
 
341  1 toggle @Test(groups = { "Functional" })
342    public void testFindXrefSequences_fromDbRefMap()
343    {
344    /*
345    * scenario: nucleotide sequence AF039662
346    * with dbref + mapping to Q9ZTS2 and P30419
347    * which themselves each have a dbref and feature
348    */
349  1 SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
350  1 SequenceI pep1 = new Sequence("Q9ZTS2", "MALFQRSV");
351  1 SequenceI pep2 = new Sequence("P30419", "MTRRSQIF");
352  1 dna1.createDatasetSequence();
353  1 pep1.createDatasetSequence();
354  1 pep2.createDatasetSequence();
355   
356  1 pep1.getDatasetSequence().addDBRef(
357    new DBRefEntry("Pfam", "0", "PF00111"));
358  1 pep1.addSequenceFeature(new SequenceFeature("type", "desc", 12, 14, 1f,
359    "group"));
360  1 pep2.getDatasetSequence().addDBRef(new DBRefEntry("PDB", "0", "3JTK"));
361  1 pep2.addSequenceFeature(new SequenceFeature("type2", "desc2", 13, 15,
362    12f, "group2"));
363   
364  1 MapList mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 },
365    3, 1);
366  1 Mapping map = new Mapping(pep1, mapList);
367  1 DBRefEntry dbRef1 = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map);
368  1 dna1.getDatasetSequence().addDBRef(dbRef1);
369  1 mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 }, 3, 1);
370  1 map = new Mapping(pep2, mapList);
371  1 DBRefEntry dbRef2 = new DBRefEntry("UNIPROT", "0", "P30419", map);
372  1 dna1.getDatasetSequence().addDBRef(dbRef2);
373   
374    /*
375    * find UNIPROT xrefs for nucleotide sequence - it should pick up
376    * mapped sequences
377    */
378  1 AlignmentI al = new Alignment(new SequenceI[] { dna1 });
379  1 Alignment xrefs = new CrossRef(new SequenceI[] { dna1 }, al)
380    .findXrefSequences("UNIPROT", true);
381  1 assertEquals(2, xrefs.getHeight());
382   
383    /*
384    * cross-refs alignment holds copies of the mapped sequences
385    * including copies of their dbrefs and features
386    */
387  1 checkCopySequence(pep1, xrefs.getSequenceAt(0));
388  1 checkCopySequence(pep2, xrefs.getSequenceAt(1));
389    }
390   
391    /**
392    * Helper method that verifies that 'copy' has the same name, start, end,
393    * sequence and dataset sequence object as 'original' (but is not the same
394    * object)
395    *
396    * @param copy
397    * @param original
398    */
 
399  2 toggle private void checkCopySequence(SequenceI copy, SequenceI original)
400    {
401  2 assertNotSame(copy, original);
402  2 assertSame(copy.getDatasetSequence(), original.getDatasetSequence());
403  2 assertEquals(copy.getName(), original.getName());
404  2 assertEquals(copy.getStart(), original.getStart());
405  2 assertEquals(copy.getEnd(), original.getEnd());
406  2 assertEquals(copy.getSequenceAsString(), original.getSequenceAsString());
407    }
408   
409    /**
410    * Test for finding 'product' sequences for the case where the selected
411    * sequence has a dbref with no mapping, triggering a fetch from database
412    */
 
413  0 toggle @Test(groups = { "Functional" })
414    public void testFindXrefSequences_withFetch()
415    {
416  0 SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
417  0 dna1.addDBRef(new DBRefEntry("UNIPROT", "ENA:0", "Q9ZTS2"));
418  0 dna1.addDBRef(new DBRefEntry("UNIPROT", "ENA:0", "P30419"));
419  0 dna1.addDBRef(new DBRefEntry("UNIPROT", "ENA:0", "P00314"));
420  0 final SequenceI pep1 = new Sequence("Q9ZTS2", "MYQLIRSSW");
421  0 pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
422   
423  0 final SequenceI pep2 = new Sequence("P00314", "MRKLLAASG");
424  0 pep2.addDBRef(new DBRefEntry("UNIPROT", "0", "P00314"));
425   
426    /*
427    * argument false suppresses adding DAS sources
428    * todo: define an interface type SequenceFetcherI and mock that
429    */
430  0 SequenceFetcher mockFetcher = new SequenceFetcher()
431    {
 
432  0 toggle @Override
433    public boolean isFetchable(String source)
434    {
435  0 return true;
436    }
437   
 
438  0 toggle @Override
439    public SequenceI[] getSequences(List<DBRefEntry> refs, boolean dna)
440    {
441  0 return new SequenceI[] { pep1, pep2 };
442    }
443    };
444  0 SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
445   
446    /*
447    * find UNIPROT xrefs for nucleotide sequence
448    */
449  0 AlignmentI al = new Alignment(new SequenceI[] { dna1 });
450  0 Alignment xrefs = new CrossRef(new SequenceI[] { dna1 }, al)
451    .findXrefSequences("UNIPROT", true);
452  0 Test failure here assertEquals(2, xrefs.getHeight());
453  0 assertSame(pep1, xrefs.getSequenceAt(0));
454  0 assertSame(pep2, xrefs.getSequenceAt(1));
455    }
456   
 
457  0 toggle @AfterClass
458    public void tearDown()
459    {
460  0 SequenceFetcherFactory.setSequenceFetcher(null);
461    }
462   
463    /**
464    * Test for finding 'product' sequences for the case where both gene and
465    * transcript sequences have dbrefs to Uniprot.
466    */
 
467  0 toggle @Test(groups = { "Functional" })
468    public void testFindXrefSequences_forGeneAndTranscripts()
469    {
470    /*
471    * 'gene' sequence
472    */
473  0 SequenceI gene = new Sequence("ENSG00000157764", "CGCCTCCCTTCCCC");
474  0 gene.addDBRef(new DBRefEntry("UNIPROT", "0", "P15056"));
475  0 gene.addDBRef(new DBRefEntry("UNIPROT", "0", "H7C5K3"));
476   
477    /*
478    * 'transcript' with CDS feature (supports mapping to protein)
479    */
480  0 SequenceI braf001 = new Sequence("ENST00000288602", "taagATGGCGGCGCTGa");
481  0 braf001.addDBRef(new DBRefEntry("UNIPROT", "0", "P15056"));
482  0 braf001.addSequenceFeature(new SequenceFeature("CDS", "", 5, 16, 0f,
483    null));
484   
485    /*
486    * 'spliced transcript' with CDS ranges
487    */
488  0 SequenceI braf002 = new Sequence("ENST00000497784", "gCAGGCtaTCTGTTCaa");
489  0 braf002.addDBRef(new DBRefEntry("UNIPROT", "ENSEMBL|0", "H7C5K3"));
490  0 braf002.addSequenceFeature(new SequenceFeature("CDS", "", 2, 6, 0f,
491    null));
492  0 braf002.addSequenceFeature(new SequenceFeature("CDS", "", 9, 15, 0f,
493    null));
494   
495    /*
496    * TODO code is fragile - use of SequenceIdMatcher depends on fetched
497    * sequences having a name starting Source|Accession
498    * which happens to be true for Uniprot,PDB,EMBL but not Pfam,Rfam,Ensembl
499    */
500  0 final SequenceI pep1 = new Sequence("UNIPROT|P15056", "MAAL");
501  0 pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "P15056"));
502  0 final SequenceI pep2 = new Sequence("UNIPROT|H7C5K3", "QALF");
503  0 pep2.addDBRef(new DBRefEntry("UNIPROT", "0", "H7C5K3"));
504    /*
505    * argument false suppresses adding DAS sources
506    * todo: define an interface type SequenceFetcherI and mock that
507    */
508  0 SequenceFetcher mockFetcher = new SequenceFetcher()
509    {
 
510  0 toggle @Override
511    public boolean isFetchable(String source)
512    {
513  0 return true;
514    }
515   
 
516  0 toggle @Override
517    public SequenceI[] getSequences(List<DBRefEntry> refs, boolean dna)
518    {
519  0 return new SequenceI[] { pep1, pep2 };
520    }
521    };
522  0 SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
523   
524    /*
525    * find UNIPROT xrefs for gene and transcripts
526    * verify that
527    * - the two proteins are retrieved but not duplicated
528    * - mappings are built from transcript (CDS) to proteins
529    * - no mappings from gene to proteins
530    */
531  0 SequenceI[] seqs = new SequenceI[] { gene, braf001, braf002 };
532  0 AlignmentI al = new Alignment(seqs);
533  0 Alignment xrefs = new CrossRef(seqs, al).findXrefSequences("UNIPROT",
534    true);
535  0 Test failure here assertEquals(2, xrefs.getHeight());
536  0 assertSame(pep1, xrefs.getSequenceAt(0));
537  0 assertSame(pep2, xrefs.getSequenceAt(1));
538    }
539   
540    /**
541    * <pre>
542    * Test that emulates this (real but simplified) case:
543    * Alignment: DBrefs
544    * UNIPROT|P0CE19 EMBL|J03321, EMBL|X06707, EMBL|M19487
545    * UNIPROT|P0CE20 EMBL|J03321, EMBL|X06707, EMBL|X07547
546    * Find cross-references for EMBL. These are mocked here as
547    * EMBL|J03321 with mappings to P0CE18, P0CE19, P0CE20
548    * EMBL|X06707 with mappings to P0CE17, P0CE19, P0CE20
549    * EMBL|M19487 with mappings to P0CE19, Q46432
550    * EMBL|X07547 with mappings to P0CE20, B0BCM4
551    * EMBL sequences are first 'fetched' (mocked here) for P0CE19.
552    * The 3 EMBL sequences are added to the alignment dataset.
553    * Their dbrefs to Uniprot products P0CE19 and P0CE20 should be matched in the
554    * alignment dataset and updated to reference the original Uniprot sequences.
555    * For the second Uniprot sequence, the J03321 and X06707 xrefs should be
556    * resolved from the dataset, and only the X07547 dbref fetched.
557    * So the end state to verify is:
558    * - 4 cross-ref sequences returned: J03321, X06707, M19487, X07547
559    * - P0CE19/20 dbrefs to EMBL sequences now have mappings
560    * - J03321 dbrefs to P0CE19/20 mapped to original Uniprot sequences
561    * - X06707 dbrefs to P0CE19/20 mapped to original Uniprot sequences
562    * </pre>
563    */
 
564  0 toggle @Test(groups = { "Functional" })
565    public void testFindXrefSequences_uniprotEmblManyToMany()
566    {
567    /*
568    * Uniprot sequences, both with xrefs to EMBL|J03321
569    * and EMBL|X07547
570    */
571  0 SequenceI p0ce19 = new Sequence("UNIPROT|P0CE19", "KPFG");
572  0 p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "J03321"));
573  0 p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "X06707"));
574  0 p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "M19487"));
575  0 SequenceI p0ce20 = new Sequence("UNIPROT|P0CE20", "PFGK");
576  0 p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "J03321"));
577  0 p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "X06707"));
578  0 p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "X07547"));
579   
580    /*
581    * EMBL sequences to be 'fetched', complete with dbrefs and mappings
582    * to their protein products (CDS location and translations are provided
583    * in EMBL XML); these should be matched to, and replaced with,
584    * the corresponding uniprot sequences after fetching
585    */
586   
587    /*
588    * J03321 with mappings to P0CE19 and P0CE20
589    */
590  0 final SequenceI j03321 = new Sequence("EMBL|J03321", "AAACCCTTTGGGAAAA");
591  0 DBRefEntry dbref1 = new DBRefEntry("UNIPROT", "0", "P0CE19");
592  0 MapList mapList = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 },
593    3, 1);
594  0 Mapping map = new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"),
595    mapList);
596    // add a dbref to the mapped to sequence - should get copied to p0ce19
597  0 map.getTo().addDBRef(new DBRefEntry("PIR", "0", "S01875"));
598  0 dbref1.setMap(map);
599  0 j03321.addDBRef(dbref1);
600  0 DBRefEntry dbref2 = new DBRefEntry("UNIPROT", "0", "P0CE20");
601  0 mapList = new MapList(new int[] { 4, 15 }, new int[] { 2, 5 }, 3, 1);
602  0 dbref2.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"),
603    new MapList(mapList)));
604  0 j03321.addDBRef(dbref2);
605   
606    /*
607    * X06707 with mappings to P0CE19 and P0CE20
608    */
609  0 final SequenceI x06707 = new Sequence("EMBL|X06707", "atgAAACCCTTTGGG");
610  0 DBRefEntry dbref3 = new DBRefEntry("UNIPROT", "0", "P0CE19");
611  0 MapList map2 = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 }, 3,
612    1);
613  0 dbref3.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"), map2));
614  0 x06707.addDBRef(dbref3);
615  0 DBRefEntry dbref4 = new DBRefEntry("UNIPROT", "0", "P0CE20");
616  0 MapList map3 = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 }, 3,
617    1);
618  0 dbref4.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"), map3));
619  0 x06707.addDBRef(dbref4);
620   
621    /*
622    * M19487 with mapping to P0CE19 and Q46432
623    */
624  0 final SequenceI m19487 = new Sequence("EMBL|M19487", "AAACCCTTTGGG");
625  0 DBRefEntry dbref5 = new DBRefEntry("UNIPROT", "0", "P0CE19");
626  0 dbref5.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"),
627    new MapList(mapList)));
628  0 m19487.addDBRef(dbref5);
629  0 DBRefEntry dbref6 = new DBRefEntry("UNIPROT", "0", "Q46432");
630  0 dbref6.setMap(new Mapping(new Sequence("UNIPROT|Q46432", "KPFG"),
631    new MapList(mapList)));
632  0 m19487.addDBRef(dbref6);
633   
634    /*
635    * X07547 with mapping to P0CE20 and B0BCM4
636    */
637  0 final SequenceI x07547 = new Sequence("EMBL|X07547", "cccAAACCCTTTGGG");
638  0 DBRefEntry dbref7 = new DBRefEntry("UNIPROT", "0", "P0CE20");
639  0 dbref7.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"),
640    new MapList(map2)));
641  0 x07547.addDBRef(dbref7);
642  0 DBRefEntry dbref8 = new DBRefEntry("UNIPROT", "0", "B0BCM4");
643  0 dbref8.setMap(new Mapping(new Sequence("UNIPROT|B0BCM4", "KPFG"),
644    new MapList(map2)));
645  0 x07547.addDBRef(dbref8);
646   
647    /*
648    * mock sequence fetcher to 'return' the EMBL sequences
649    * TODO: Mockito would allow .thenReturn().thenReturn() here,
650    * and also capture and verification of the parameters
651    * passed in calls to getSequences() - important to verify that
652    * duplicate sequence fetches are not requested
653    */
654  0 SequenceFetcher mockFetcher = new SequenceFetcher()
655    {
656    int call = 0;
657   
 
658  0 toggle @Override
659    public boolean isFetchable(String source)
660    {
661  0 return true;
662    }
663   
 
664  0 toggle @Override
665    public SequenceI[] getSequences(List<DBRefEntry> refs, boolean dna)
666    {
667  0 call++;
668  0 if (call == 1)
669    {
670  0 assertEquals("Expected 3 embl seqs in first fetch", 3,
671    refs.size());
672  0 return new SequenceI[] { j03321, x06707, m19487 };
673    }
674    else
675    {
676  0 assertEquals("Expected 1 embl seq in second fetch", 1,
677    refs.size());
678  0 return new SequenceI[] { x07547 };
679    }
680    }
681    };
682  0 SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
683   
684    /*
685    * find EMBL xrefs for Uniprot seqs and verify that
686    * - the EMBL xref'd sequences are retrieved without duplicates
687    * - mappings are added to the Uniprot dbrefs
688    * - mappings in the EMBL-to-Uniprot dbrefs are updated to the
689    * alignment sequences
690    * - dbrefs on the EMBL sequences are added to the original dbrefs
691    */
692  0 SequenceI[] seqs = new SequenceI[] { p0ce19, p0ce20 };
693  0 AlignmentI al = new Alignment(seqs);
694  0 Alignment xrefs = new CrossRef(seqs, al).findXrefSequences("EMBL",
695    false);
696   
697    /*
698    * verify retrieved sequences
699    */
700  0 assertNotNull(xrefs);
701  0 assertEquals(4, xrefs.getHeight());
702  0 assertSame(j03321, xrefs.getSequenceAt(0));
703  0 assertSame(x06707, xrefs.getSequenceAt(1));
704  0 assertSame(m19487, xrefs.getSequenceAt(2));
705  0 assertSame(x07547, xrefs.getSequenceAt(3));
706   
707    /*
708    * verify mappings added to Uniprot-to-EMBL dbrefs
709    */
710  0 Mapping mapping = p0ce19.getDBRefs()[0].getMap();
711  0 Test failure here assertSame(j03321, mapping.getTo());
712  0 mapping = p0ce19.getDBRefs()[1].getMap();
713  0 assertSame(x06707, mapping.getTo());
714  0 mapping = p0ce20.getDBRefs()[0].getMap();
715  0 assertSame(j03321, mapping.getTo());
716  0 mapping = p0ce20.getDBRefs()[1].getMap();
717  0 assertSame(x06707, mapping.getTo());
718   
719    /*
720    * verify dbrefs on EMBL are mapped to alignment seqs
721    */
722  0 assertSame(p0ce19, j03321.getDBRefs()[0].getMap().getTo());
723  0 assertSame(p0ce20, j03321.getDBRefs()[1].getMap().getTo());
724  0 assertSame(p0ce19, x06707.getDBRefs()[0].getMap().getTo());
725  0 assertSame(p0ce20, x06707.getDBRefs()[1].getMap().getTo());
726   
727    /*
728    * verify new dbref on EMBL dbref mapping is copied to the
729    * original Uniprot sequence
730    */
731  0 assertEquals(4, p0ce19.getDBRefs().length);
732  0 assertEquals("PIR", p0ce19.getDBRefs()[3].getSource());
733  0 assertEquals("S01875", p0ce19.getDBRefs()[3].getAccessionId());
734    }
735   
 
736  1 toggle @Test(groups = "Functional")
737    public void testSameSequence()
738    {
739  1 assertTrue(CrossRef.sameSequence(null, null));
740  1 SequenceI seq1 = new Sequence("seq1", "ABCDEF");
741  1 assertFalse(CrossRef.sameSequence(seq1, null));
742  1 assertFalse(CrossRef.sameSequence(null, seq1));
743  1 assertTrue(CrossRef.sameSequence(seq1, new Sequence("seq2", "ABCDEF")));
744  1 assertTrue(CrossRef.sameSequence(seq1, new Sequence("seq2", "abcdef")));
745  1 assertFalse(CrossRef
746    .sameSequence(seq1, new Sequence("seq2", "ABCDE-F")));
747  1 assertFalse(CrossRef.sameSequence(seq1, new Sequence("seq2", "BCDEF")));
748    }
749    }