Clover icon

Coverage Report

  1. Project Clover database Wed Nov 13 2024 16:21:17 GMT
  2. Package jalview.analysis

File CrossRefTest.java

 

Code metrics

2
276
21
1
777
447
22
0.08
13.14
21
1.05

Classes

Class Line # Actions
CrossRefTest 56 276 22
0.5752508657.5%
 

Contributing tests

This file is covered by 8 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.analysis;
22   
23    import static org.testng.AssertJUnit.assertEquals;
24    import static org.testng.AssertJUnit.assertFalse;
25    import static org.testng.AssertJUnit.assertNotNull;
26    import static org.testng.AssertJUnit.assertNotSame;
27    import static org.testng.AssertJUnit.assertNull;
28    import static org.testng.AssertJUnit.assertSame;
29    import static org.testng.AssertJUnit.assertTrue;
30   
31    import java.util.ArrayList;
32    import java.util.Arrays;
33    import java.util.List;
34   
35    import org.testng.annotations.AfterClass;
36    import org.testng.annotations.BeforeClass;
37    import org.testng.annotations.BeforeMethod;
38    import org.testng.annotations.Test;
39   
40    import jalview.bin.Cache;
41    import jalview.datamodel.AlignedCodonFrame;
42    import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
43    import jalview.datamodel.Alignment;
44    import jalview.datamodel.AlignmentI;
45    import jalview.datamodel.DBRefEntry;
46    import jalview.datamodel.Mapping;
47    import jalview.datamodel.Sequence;
48    import jalview.datamodel.SequenceFeature;
49    import jalview.datamodel.SequenceI;
50    import jalview.gui.JvOptionPane;
51    import jalview.util.DBRefUtils;
52    import jalview.util.MapList;
53    import jalview.ws.SequenceFetcher;
54    import jalview.ws.SequenceFetcherFactory;
55   
 
56    public class CrossRefTest
57    {
58   
 
59  1 toggle @BeforeClass(alwaysRun = true)
60    public void setUpJvOptionPane()
61    {
62  1 JvOptionPane.setInteractiveMode(false);
63  1 JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
64    }
65   
 
66  8 toggle @BeforeMethod(alwaysRun = true)
67    public void loadProperties()
68    {
69  8 Cache.loadProperties("test/jalview/util/comparisonTestProps.jvprops");
70    }
71   
 
72  1 toggle @Test(groups = { "Functional" })
73    public void testFindXDbRefs()
74    {
75  1 DBRefEntry ref1 = new DBRefEntry("UNIPROT", "1", "A123");
76  1 DBRefEntry ref2 = new DBRefEntry("UNIPROTKB/TREMBL", "1", "A123");
77  1 DBRefEntry ref3 = new DBRefEntry("pdb", "1", "A123");
78  1 DBRefEntry ref4 = new DBRefEntry("EMBLCDSPROTEIN", "1", "A123");
79  1 DBRefEntry ref5 = new DBRefEntry("embl", "1", "A123");
80  1 DBRefEntry ref6 = new DBRefEntry("emblCDS", "1", "A123");
81  1 DBRefEntry ref7 = new DBRefEntry("GeneDB", "1", "A123");
82  1 DBRefEntry ref8 = new DBRefEntry("PFAM", "1", "A123");
83    // ENSEMBL is a source of either dna or protein sequence data
84  1 DBRefEntry ref9 = new DBRefEntry("ENSEMBL", "1", "A123");
85  1 List<DBRefEntry> refs = Arrays
86    .asList(new DBRefEntry[]
87    { ref1, ref2, ref3, ref4, ref5, ref6, ref7, ref8, ref9 });
88   
89    /*
90    * Just the DNA refs:
91    */
92  1 List<DBRefEntry> found = DBRefUtils.selectDbRefs(true, refs);
93  1 assertEquals(4, found.size());
94  1 assertSame(ref5, found.get(0));
95  1 assertSame(ref6, found.get(1));
96  1 assertSame(ref7, found.get(2));
97  1 assertSame(ref9, found.get(3));
98   
99    /*
100    * Just the protein refs:
101    */
102  1 found = DBRefUtils.selectDbRefs(false, refs);
103  1 assertEquals(4, found.size());
104  1 assertSame(ref1, found.get(0));
105  1 assertSame(ref2, found.get(1));
106  1 assertSame(ref4, found.get(2));
107  1 assertSame(ref9, found.get(3));
108    }
109   
110    /**
111    * Test the method that finds a sequence's "product" xref source databases,
112    * which may be direct (dbrefs on the sequence), or indirect (dbrefs on
113    * sequences which share a dbref with the sequence
114    */
 
115  1 toggle @Test(groups = { "Functional" }, enabled = true)
116    public void testFindXrefSourcesForSequence_proteinToDna()
117    {
118  1 SequenceI seq = new Sequence("Seq1", "MGKYQARLSS");
119  1 List<String> sources = new ArrayList<>();
120  1 AlignmentI al = new Alignment(new SequenceI[] {});
121   
122    /*
123    * first with no dbrefs to search
124    */
125  1 sources = new CrossRef(new SequenceI[] { seq }, al)
126    .findXrefSourcesForSequences(false);
127  1 assertTrue(sources.isEmpty());
128   
129    /*
130    * add some dbrefs to sequence
131    */
132    // protein db is not a candidate for findXrefSources
133  1 seq.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234"));
134    // dna coding databatases are
135  1 seq.addDBRef(new DBRefEntry("EMBL", "0", "E2345"));
136    // a second EMBL xref should not result in a duplicate
137  1 seq.addDBRef(new DBRefEntry("EMBL", "0", "E2346"));
138  1 seq.addDBRef(new DBRefEntry("EMBLCDS", "0", "E2347"));
139  1 seq.addDBRef(new DBRefEntry("GENEDB", "0", "E2348"));
140  1 seq.addDBRef(new DBRefEntry("ENSEMBL", "0", "E2349"));
141  1 seq.addDBRef(new DBRefEntry("ENSEMBLGENOMES", "0", "E2350"));
142  1 sources = new CrossRef(new SequenceI[] { seq }, al)
143    .findXrefSourcesForSequences(false);
144    // method is patched to remove EMBL from the sources to match
145  1 assertEquals(4, sources.size());
146  1 assertEquals("[EMBLCDS, GENEDB, ENSEMBL, ENSEMBLGENOMES]",
147    sources.toString());
148   
149    /*
150    * add a sequence to the alignment which has a dbref to UNIPROT|A1234
151    * and others to dna coding databases
152    */
153  1 sources.clear();
154  1 seq.setDBRefs(null);
155  1 seq.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234"));
156  1 seq.addDBRef(new DBRefEntry("EMBLCDS", "0", "E2347"));
157  1 SequenceI seq2 = new Sequence("Seq2", "MGKYQARLSS");
158  1 seq2.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234"));
159  1 seq2.addDBRef(new DBRefEntry("EMBL", "0", "E2345"));
160  1 seq2.addDBRef(new DBRefEntry("GENEDB", "0", "E2348"));
161    // TODO include ENSEMBLGENOMES in DBRefSource.DNACODINGDBS ?
162  1 al.addSequence(seq2);
163  1 sources = new CrossRef(new SequenceI[] { seq, seq2 }, al)
164    .findXrefSourcesForSequences(false);
165    // method removed EMBL from sources to match
166  1 assertEquals(2, sources.size());
167  1 assertEquals("[EMBLCDS, GENEDB]", sources.toString());
168    }
169   
170    /**
171    * Test for finding 'product' sequences for the case where only an indirect
172    * xref is found - not on the nucleotide sequence but on a peptide sequence in
173    * the alignment which which it shares a nucleotide dbref
174    */
 
175  1 toggle @Test(groups = { "Functional" }, enabled = true)
176    public void testFindXrefSequences_indirectDbrefToProtein()
177    {
178    /*
179    * Alignment setup:
180    * - nucleotide dbref EMBL|AF039662
181    * - peptide dbrefs EMBL|AF039662, UNIPROT|Q9ZTS2
182    */
183  1 SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
184  1 emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
185  1 SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS");
186  1 uniprotSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
187  1 uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
188   
189    /*
190    * Find UNIPROT xrefs for nucleotide
191    * - it has no UNIPROT dbref of its own
192    * - but peptide with matching nucleotide dbref does, so is returned
193    */
194  1 AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq });
195  1 Alignment xrefs = new CrossRef(new SequenceI[] { emblSeq }, al)
196    .findXrefSequences("UNIPROT", true);
197  1 System.err.println("xrefs=" + xrefs);
198  1 assertEquals(1, xrefs.getHeight());
199  1 assertSame(uniprotSeq, xrefs.getSequenceAt(0));
200    }
201   
202    /**
203    * Test for finding 'product' sequences for the case where only an indirect
204    * xref is found - not on the peptide sequence but on a nucleotide sequence in
205    * the alignment which which it shares a protein dbref
206    */
 
207  1 toggle @Test(groups = { "Functional" }, enabled = true)
208    public void testFindXrefSequences_indirectDbrefToNucleotide()
209    {
210    /*
211    * Alignment setup:
212    * - peptide dbref UNIPROT|Q9ZTS2
213    * - nucleotide dbref EMBL|AF039662, UNIPROT|Q9ZTS2
214    */
215  1 SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS");
216  1 uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
217  1 SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
218  1 emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
219  1 emblSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
220   
221    /*
222    * find EMBL xrefs for peptide sequence - it has no direct
223    * dbrefs, but the 'corresponding' nucleotide sequence does, so is returned
224    */
225    /*
226    * Find EMBL xrefs for peptide
227    * - it has no EMBL dbref of its own
228    * - but nucleotide with matching peptide dbref does, so is returned
229    */
230  1 AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq });
231  1 Alignment xrefs = new CrossRef(new SequenceI[] { uniprotSeq }, al)
232    .findXrefSequences("EMBL", false);
233  1 assertEquals(1, xrefs.getHeight());
234  1 assertSame(emblSeq, xrefs.getSequenceAt(0));
235    }
236   
237    /**
238    * Test for finding 'product' sequences for the case where the selected
239    * sequence has no dbref to the desired source, and there are no indirect
240    * references via another sequence in the alignment
241    */
 
242  1 toggle @Test(groups = { "Functional" })
243    public void testFindXrefSequences_noDbrefs()
244    {
245    /*
246    * two nucleotide sequences, one with UNIPROT dbref
247    */
248  1 SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
249  1 dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
250  1 SequenceI dna2 = new Sequence("AJ307031", "AAACCCTTT");
251   
252    /*
253    * find UNIPROT xrefs for peptide sequence - it has no direct
254    * dbrefs, and the other sequence (which has a UNIPROT dbref) is not
255    * equatable to it, so no results found
256    */
257  1 AlignmentI al = new Alignment(new SequenceI[] { dna1, dna2 });
258  1 Alignment xrefs = new CrossRef(new SequenceI[] { dna2 }, al)
259    .findXrefSequences("UNIPROT", true);
260  1 assertNull(xrefs);
261    }
262   
263    /**
264    * Tests for the method that searches an alignment (with one sequence
265    * excluded) for protein/nucleotide sequences with a given cross-reference
266    */
 
267  1 toggle @Test(groups = { "Functional" }, enabled = true)
268    public void testSearchDataset()
269    {
270    /*
271    * nucleotide sequence with UNIPROT AND EMBL dbref
272    * peptide sequence with UNIPROT dbref
273    */
274  1 SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
275  1 Mapping map = new Mapping(new Sequence("pep2", "MLAVSRG"),
276    new MapList(new int[]
277    { 1, 21 }, new int[] { 1, 7 }, 3, 1));
278  1 DBRefEntry dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map);
279  1 dna1.addDBRef(dbref);
280  1 dna1.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
281  1 SequenceI pep1 = new Sequence("Q9ZTS2", "MLAVSRGQ");
282  1 dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
283  1 pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
284  1 AlignmentI al = new Alignment(new SequenceI[] { dna1, pep1 });
285   
286  1 List<SequenceI> result = new ArrayList<>();
287   
288    /*
289    * first search for a dbref nowhere on the alignment:
290    */
291  1 dbref = new DBRefEntry("UNIPROT", "0", "P30419");
292  1 CrossRef testee = new CrossRef(al.getSequencesArray(), al);
293  1 AlignedCodonFrame acf = new AlignedCodonFrame();
294  1 boolean found = testee.searchDataset(true, dna1, dbref, result, acf,
295    true, DBRefUtils.SEARCH_MODE_FULL);
296  1 assertFalse(found);
297  1 assertTrue(result.isEmpty());
298  1 assertTrue(acf.isEmpty());
299   
300    /*
301    * search for a protein sequence with dbref UNIPROT:Q9ZTS2
302    */
303  1 acf = new AlignedCodonFrame();
304  1 dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
305  1 found = testee.searchDataset(!dna1.isProtein(), dna1, dbref, result,
306    acf, false, DBRefUtils.SEARCH_MODE_FULL); // search dataset with a
307    // protein xref from a dna
308    // sequence to locate the protein product
309  1 assertTrue(found);
310  1 assertEquals(1, result.size());
311  1 assertSame(pep1, result.get(0));
312  1 assertTrue(acf.isEmpty());
313   
314    /*
315    * search for a nucleotide sequence with dbref UNIPROT:Q9ZTS2
316    */
317  1 result.clear();
318  1 acf = new AlignedCodonFrame();
319  1 dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
320  1 found = testee.searchDataset(!pep1.isProtein(), pep1, dbref, result,
321    acf, false, DBRefUtils.SEARCH_MODE_FULL); // search dataset with a
322    // protein's direct dbref
323    // to
324    // locate dna sequences with matching xref
325  1 assertTrue(found);
326  1 assertEquals(1, result.size());
327  1 assertSame(dna1, result.get(0));
328    // should now have a mapping from dna to pep1
329  1 List<SequenceToSequenceMapping> mappings = acf.getMappings();
330  1 assertEquals(1, mappings.size());
331  1 SequenceToSequenceMapping mapping = mappings.get(0);
332  1 assertSame(dna1, mapping.getFromSeq());
333  1 assertSame(pep1, mapping.getMapping().getTo());
334  1 MapList mapList = mapping.getMapping().getMap();
335  1 assertEquals(1, mapList.getToRatio());
336  1 assertEquals(3, mapList.getFromRatio());
337  1 assertEquals(1, mapList.getFromRanges().size());
338  1 assertEquals(1, mapList.getFromRanges().get(0)[0]);
339  1 assertEquals(21, mapList.getFromRanges().get(0)[1]);
340  1 assertEquals(1, mapList.getToRanges().size());
341  1 assertEquals(1, mapList.getToRanges().get(0)[0]);
342  1 assertEquals(7, mapList.getToRanges().get(0)[1]);
343    }
344   
345    /**
346    * Test for finding 'product' sequences for the case where the selected
347    * sequence has a dbref with a mapping to a sequence. This represents the case
348    * where either
349    * <ul>
350    * <li>a fetched sequence is already decorated with its cross-reference (e.g.
351    * EMBL + translation), or</li>
352    * <li>Get Cross-References has been done once resulting in instantiated
353    * cross-reference mappings</li>
354    * </ul>
355    */
 
356  1 toggle @Test(groups = { "Functional" })
357    public void testFindXrefSequences_fromDbRefMap()
358    {
359    /*
360    * scenario: nucleotide sequence AF039662
361    * with dbref + mapping to Q9ZTS2 and P30419
362    * which themselves each have a dbref and feature
363    */
364  1 SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
365  1 SequenceI pep1 = new Sequence("Q9ZTS2", "MALFQRSV");
366  1 SequenceI pep2 = new Sequence("P30419", "MTRRSQIF");
367  1 dna1.createDatasetSequence();
368  1 pep1.createDatasetSequence();
369  1 pep2.createDatasetSequence();
370   
371  1 pep1.getDatasetSequence()
372    .addDBRef(new DBRefEntry("Pfam", "0", "PF00111"));
373  1 pep1.addSequenceFeature(
374    new SequenceFeature("type", "desc", 12, 14, 1f, "group"));
375  1 pep2.getDatasetSequence().addDBRef(new DBRefEntry("PDB", "0", "3JTK"));
376  1 pep2.addSequenceFeature(
377    new SequenceFeature("type2", "desc2", 13, 15, 12f, "group2"));
378   
379  1 MapList mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 },
380    3, 1);
381  1 Mapping map = new Mapping(pep1, mapList);
382  1 DBRefEntry dbRef1 = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map);
383  1 dna1.getDatasetSequence().addDBRef(dbRef1);
384  1 mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 }, 3, 1);
385  1 map = new Mapping(pep2, mapList);
386  1 DBRefEntry dbRef2 = new DBRefEntry("UNIPROT", "0", "P30419", map);
387  1 dna1.getDatasetSequence().addDBRef(dbRef2);
388   
389    /*
390    * find UNIPROT xrefs for nucleotide sequence - it should pick up
391    * mapped sequences
392    */
393  1 AlignmentI al = new Alignment(new SequenceI[] { dna1 });
394  1 Alignment xrefs = new CrossRef(new SequenceI[] { dna1 }, al)
395    .findXrefSequences("UNIPROT", true);
396  1 assertEquals(2, xrefs.getHeight());
397   
398    /*
399    * cross-refs alignment holds copies of the mapped sequences
400    * including copies of their dbrefs and features
401    */
402  1 checkCopySequence(pep1, xrefs.getSequenceAt(0));
403  1 checkCopySequence(pep2, xrefs.getSequenceAt(1));
404    }
405   
406    /**
407    * Helper method that verifies that 'copy' has the same name, start, end,
408    * sequence and dataset sequence object as 'original' (but is not the same
409    * object)
410    *
411    * @param copy
412    * @param original
413    */
 
414  2 toggle private void checkCopySequence(SequenceI copy, SequenceI original)
415    {
416  2 assertNotSame(copy, original);
417  2 assertSame(copy.getDatasetSequence(), original.getDatasetSequence());
418  2 assertEquals(copy.getName(), original.getName());
419  2 assertEquals(copy.getStart(), original.getStart());
420  2 assertEquals(copy.getEnd(), original.getEnd());
421  2 assertEquals(copy.getSequenceAsString(),
422    original.getSequenceAsString());
423    }
424   
425    /**
426    * Test for finding 'product' sequences for the case where the selected
427    * sequence has a dbref with no mapping, triggering a fetch from database
428    */
 
429  0 toggle @Test(groups = { "Functional_Failing" })
430    public void testFindXrefSequences_withFetch()
431    {
432    // JBPNote: this fails because pep1 and pep2 do not have DbRefEntrys with
433    // mappings
434    // Fix#1 would be to revise the test data so it fits with 2.11.2+ Jalview
435    // assumptions
436    // that ENA retrievals yield dbrefs with Mappings
437   
438  0 SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
439  0 dna1.addDBRef(new DBRefEntry("UNIPROT", "ENA:0", "Q9ZTS2"));
440  0 dna1.addDBRef(new DBRefEntry("UNIPROT", "ENA:0", "P30419"));
441  0 dna1.addDBRef(new DBRefEntry("UNIPROT", "ENA:0", "P00314"));
442  0 final SequenceI pep1 = new Sequence("Q9ZTS2", "MYQLIRSSW");
443  0 pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2", null, true));
444   
445  0 final SequenceI pep2 = new Sequence("P00314", "MRKLLAASG");
446  0 pep2.addDBRef(new DBRefEntry("UNIPROT", "0", "P00314", null, true));
447   
448    /*
449    * argument false suppresses adding DAS sources
450    * todo: define an interface type SequenceFetcherI and mock that
451    */
452  0 SequenceFetcher mockFetcher = new SequenceFetcher()
453    {
 
454  0 toggle @Override
455    public boolean isFetchable(String source)
456    {
457  0 return true;
458    }
459   
 
460  0 toggle @Override
461    public SequenceI[] getSequences(List<DBRefEntry> refs, boolean dna)
462    {
463  0 return new SequenceI[] { pep1, pep2 };
464    }
465    };
466  0 SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
467   
468    /*
469    * find UNIPROT xrefs for nucleotide sequence
470    */
471  0 AlignmentI al = new Alignment(new SequenceI[] { dna1 });
472  0 Alignment xrefs = new CrossRef(new SequenceI[] { dna1 }, al)
473    .findXrefSequences("UNIPROT", true);
474  0 assertEquals(2, xrefs.getHeight());
475  0 assertSame(pep1, xrefs.getSequenceAt(0));
476  0 assertSame(pep2, xrefs.getSequenceAt(1));
477    }
478   
 
479  1 toggle @AfterClass(alwaysRun = true)
480    public void tearDown()
481    {
482  1 SequenceFetcherFactory.setSequenceFetcher(null);
483    }
484   
485    /**
486    * Test for finding 'product' sequences for the case where both gene and
487    * transcript sequences have dbrefs to Uniprot.
488    */
 
489  0 toggle @Test(groups = { "Functional_Failing" })
490    public void testFindXrefSequences_forGeneAndTranscripts()
491    {
492    /*
493    * 'gene' sequence
494    */
495  0 SequenceI gene = new Sequence("ENSG00000157764", "CGCCTCCCTTCCCC");
496  0 gene.addDBRef(new DBRefEntry("UNIPROT", "0", "P15056"));
497  0 gene.addDBRef(new DBRefEntry("UNIPROT", "0", "H7C5K3"));
498   
499    /*
500    * 'transcript' with CDS feature (supports mapping to protein)
501    */
502  0 SequenceI braf001 = new Sequence("ENST00000288602",
503    "taagATGGCGGCGCTGa");
504  0 braf001.addDBRef(new DBRefEntry("UNIPROT", "0", "P15056"));
505  0 braf001.addSequenceFeature(
506    new SequenceFeature("CDS", "", 5, 16, 0f, null));
507   
508    /*
509    * 'spliced transcript' with CDS ranges
510    */
511  0 SequenceI braf002 = new Sequence("ENST00000497784",
512    "gCAGGCtaTCTGTTCaa");
513  0 braf002.addDBRef(new DBRefEntry("UNIPROT", "ENSEMBL|0", "H7C5K3"));
514  0 braf002.addSequenceFeature(
515    new SequenceFeature("CDS", "", 2, 6, 0f, null));
516  0 braf002.addSequenceFeature(
517    new SequenceFeature("CDS", "", 9, 15, 0f, null));
518   
519    /*
520    * TODO code is fragile - use of SequenceIdMatcher depends on fetched
521    * sequences having a name starting Source|Accession
522    * which happens to be true for Uniprot,PDB,EMBL but not Pfam,Rfam,Ensembl
523    */
524  0 final SequenceI pep1 = new Sequence("UNIPROT|P15056", "MAAL");
525  0 pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "P15056"));
526  0 final SequenceI pep2 = new Sequence("UNIPROT|H7C5K3", "QALF");
527  0 pep2.addDBRef(new DBRefEntry("UNIPROT", "0", "H7C5K3"));
528    /*
529    * argument false suppresses adding DAS sources
530    * todo: define an interface type SequenceFetcherI and mock that
531    */
532  0 SequenceFetcher mockFetcher = new SequenceFetcher()
533    {
 
534  0 toggle @Override
535    public boolean isFetchable(String source)
536    {
537  0 return true;
538    }
539   
 
540  0 toggle @Override
541    public SequenceI[] getSequences(List<DBRefEntry> refs, boolean dna)
542    {
543  0 return new SequenceI[] { pep1, pep2 };
544    }
545    };
546  0 SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
547   
548    /*
549    * find UNIPROT xrefs for gene and transcripts
550    * verify that
551    * - the two proteins are retrieved but not duplicated
552    * - mappings are built from transcript (CDS) to proteins
553    * - no mappings from gene to proteins
554    */
555  0 SequenceI[] seqs = new SequenceI[] { gene, braf001, braf002 };
556  0 AlignmentI al = new Alignment(seqs);
557  0 Alignment xrefs = new CrossRef(seqs, al).findXrefSequences("UNIPROT",
558    true);
559  0 assertEquals(2, xrefs.getHeight());
560  0 assertSame(pep1, xrefs.getSequenceAt(0));
561  0 assertSame(pep2, xrefs.getSequenceAt(1));
562    }
563   
564    /**
565    * <pre>
566    * Test that emulates this (real but simplified) case:
567    * Alignment: DBrefs
568    * UNIPROT|P0CE19 EMBL|J03321, EMBL|X06707, EMBL|M19487
569    * UNIPROT|P0CE20 EMBL|J03321, EMBL|X06707, EMBL|X07547
570    * Find cross-references for EMBL. These are mocked here as
571    * EMBL|J03321 with mappings to P0CE18, P0CE19, P0CE20
572    * EMBL|X06707 with mappings to P0CE17, P0CE19, P0CE20
573    * EMBL|M19487 with mappings to P0CE19, Q46432
574    * EMBL|X07547 with mappings to P0CE20, B0BCM4
575    * EMBL sequences are first 'fetched' (mocked here) for P0CE19.
576    * The 3 EMBL sequences are added to the alignment dataset.
577    * Their dbrefs to Uniprot products P0CE19 and P0CE20 should be matched in the
578    * alignment dataset and updated to reference the original Uniprot sequences.
579    * For the second Uniprot sequence, the J03321 and X06707 xrefs should be
580    * resolved from the dataset, and only the X07547 dbref fetched.
581    * So the end state to verify is:
582    * - 4 cross-ref sequences returned: J03321, X06707, M19487, X07547
583    * - P0CE19/20 dbrefs to EMBL sequences now have mappings
584    * - J03321 dbrefs to P0CE19/20 mapped to original Uniprot sequences
585    * - X06707 dbrefs to P0CE19/20 mapped to original Uniprot sequences
586    * </pre>
587    */
 
588  0 toggle @Test(groups = { "Functional_Failing" })
589    public void testFindXrefSequences_uniprotEmblManyToMany()
590    {
591    /*
592    * Uniprot sequences, both with xrefs to EMBL|J03321
593    * and EMBL|X07547
594    */
595  0 SequenceI p0ce19 = new Sequence("UNIPROT|P0CE19", "KPFG");
596  0 p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "J03321"));
597  0 p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "X06707"));
598  0 p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "M19487"));
599  0 SequenceI p0ce20 = new Sequence("UNIPROT|P0CE20", "PFGK");
600  0 p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "J03321"));
601  0 p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "X06707"));
602  0 p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "X07547"));
603   
604    /*
605    * EMBL sequences to be 'fetched', complete with dbrefs and mappings
606    * to their protein products (CDS location and translations are provided
607    * in EMBL XML); these should be matched to, and replaced with,
608    * the corresponding uniprot sequences after fetching
609    */
610   
611    /*
612    * J03321 with mappings to P0CE19 and P0CE20
613    */
614  0 final SequenceI j03321 = new Sequence("EMBL|J03321",
615    "AAACCCTTTGGGAAAA");
616  0 DBRefEntry dbref1 = new DBRefEntry("UNIPROT", "0", "P0CE19");
617  0 MapList mapList = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 },
618    3, 1);
619  0 Mapping map = new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"),
620    mapList);
621    // add a dbref to the mapped to sequence - should get copied to p0ce19
622  0 map.getTo().addDBRef(new DBRefEntry("PIR", "0", "S01875"));
623  0 dbref1.setMap(map);
624  0 j03321.addDBRef(dbref1);
625  0 DBRefEntry dbref2 = new DBRefEntry("UNIPROT", "0", "P0CE20");
626  0 mapList = new MapList(new int[] { 4, 15 }, new int[] { 2, 5 }, 3, 1);
627  0 dbref2.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"),
628    new MapList(mapList)));
629  0 j03321.addDBRef(dbref2);
630   
631    /*
632    * X06707 with mappings to P0CE19 and P0CE20
633    */
634  0 final SequenceI x06707 = new Sequence("EMBL|X06707", "atgAAACCCTTTGGG");
635  0 DBRefEntry dbref3 = new DBRefEntry("UNIPROT", "0", "P0CE19");
636  0 MapList map2 = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 }, 3,
637    1);
638  0 dbref3.setMap(
639    new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"), map2));
640  0 x06707.addDBRef(dbref3);
641  0 DBRefEntry dbref4 = new DBRefEntry("UNIPROT", "0", "P0CE20");
642  0 MapList map3 = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 }, 3,
643    1);
644  0 dbref4.setMap(
645    new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"), map3));
646  0 x06707.addDBRef(dbref4);
647   
648    /*
649    * M19487 with mapping to P0CE19 and Q46432
650    */
651  0 final SequenceI m19487 = new Sequence("EMBL|M19487", "AAACCCTTTGGG");
652  0 DBRefEntry dbref5 = new DBRefEntry("UNIPROT", "0", "P0CE19");
653  0 dbref5.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"),
654    new MapList(mapList)));
655  0 m19487.addDBRef(dbref5);
656  0 DBRefEntry dbref6 = new DBRefEntry("UNIPROT", "0", "Q46432");
657  0 dbref6.setMap(new Mapping(new Sequence("UNIPROT|Q46432", "KPFG"),
658    new MapList(mapList)));
659  0 m19487.addDBRef(dbref6);
660   
661    /*
662    * X07547 with mapping to P0CE20 and B0BCM4
663    */
664  0 final SequenceI x07547 = new Sequence("EMBL|X07547", "cccAAACCCTTTGGG");
665  0 DBRefEntry dbref7 = new DBRefEntry("UNIPROT", "0", "P0CE20");
666  0 dbref7.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"),
667    new MapList(map2)));
668  0 x07547.addDBRef(dbref7);
669  0 DBRefEntry dbref8 = new DBRefEntry("UNIPROT", "0", "B0BCM4");
670  0 dbref8.setMap(new Mapping(new Sequence("UNIPROT|B0BCM4", "KPFG"),
671    new MapList(map2)));
672  0 x07547.addDBRef(dbref8);
673   
674    /*
675    * mock sequence fetcher to 'return' the EMBL sequences
676    * TODO: Mockito would allow .thenReturn().thenReturn() here,
677    * and also capture and verification of the parameters
678    * passed in calls to getSequences() - important to verify that
679    * duplicate sequence fetches are not requested
680    */
681  0 SequenceFetcher mockFetcher = new SequenceFetcher()
682    {
683    int call = 0;
684   
 
685  0 toggle @Override
686    public boolean isFetchable(String source)
687    {
688  0 return true;
689    }
690   
 
691  0 toggle @Override
692    public SequenceI[] getSequences(List<DBRefEntry> refs, boolean dna)
693    {
694  0 call++;
695  0 if (call == 1)
696    {
697  0 assertEquals("Expected 3 embl seqs in first fetch", 3,
698    refs.size());
699  0 return new SequenceI[] { j03321, x06707, m19487 };
700    }
701    else
702    {
703  0 assertEquals("Expected 1 embl seq in second fetch", 1,
704    refs.size());
705  0 return new SequenceI[] { x07547 };
706    }
707    }
708    };
709  0 SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
710   
711    /*
712    * find EMBL xrefs for Uniprot seqs and verify that
713    * - the EMBL xref'd sequences are retrieved without duplicates
714    * - mappings are added to the Uniprot dbrefs
715    * - mappings in the EMBL-to-Uniprot dbrefs are updated to the
716    * alignment sequences
717    * - dbrefs on the EMBL sequences are added to the original dbrefs
718    */
719  0 SequenceI[] seqs = new SequenceI[] { p0ce19, p0ce20 };
720  0 AlignmentI al = new Alignment(seqs);
721  0 Alignment xrefs = new CrossRef(seqs, al).findXrefSequences("EMBL",
722    false);
723   
724    /*
725    * verify retrieved sequences
726    */
727  0 assertNotNull(xrefs);
728  0 assertEquals(4, xrefs.getHeight());
729  0 assertSame(j03321, xrefs.getSequenceAt(0));
730  0 assertSame(x06707, xrefs.getSequenceAt(1));
731  0 assertSame(m19487, xrefs.getSequenceAt(2));
732  0 assertSame(x07547, xrefs.getSequenceAt(3));
733   
734    /*
735    * verify mappings added to Uniprot-to-EMBL dbrefs
736    */
737  0 Mapping mapping = p0ce19.getDBRefs().get(0).getMap();
738  0 assertSame(j03321, mapping.getTo());
739  0 mapping = p0ce19.getDBRefs().get(1).getMap();
740  0 assertSame(x06707, mapping.getTo());
741  0 mapping = p0ce20.getDBRefs().get(0).getMap();
742  0 assertSame(j03321, mapping.getTo());
743  0 mapping = p0ce20.getDBRefs().get(1).getMap();
744  0 assertSame(x06707, mapping.getTo());
745   
746    /*
747    * verify dbrefs on EMBL are mapped to alignment seqs
748    */
749   
750  0 assertSame(p0ce19, j03321.getDBRefs().get(0).getMap().getTo());
751  0 assertSame(p0ce20, j03321.getDBRefs().get(1).getMap().getTo());
752  0 assertSame(p0ce19, x06707.getDBRefs().get(0).getMap().getTo());
753  0 assertSame(p0ce20, x06707.getDBRefs().get(1).getMap().getTo());
754   
755    /*
756    * verify new dbref on EMBL dbref mapping is copied to the
757    * original Uniprot sequence
758    */
759  0 assertEquals(4, p0ce19.getDBRefs().size());
760  0 assertEquals("PIR", p0ce19.getDBRefs().get(3).getSource());
761  0 assertEquals("S01875", p0ce19.getDBRefs().get(3).getAccessionId());
762    }
763   
 
764  1 toggle @Test(groups = "Functional")
765    public void testSameSequence()
766    {
767  1 assertTrue(CrossRef.sameSequence(null, null));
768  1 SequenceI seq1 = new Sequence("seq1", "ABCDEF");
769  1 assertFalse(CrossRef.sameSequence(seq1, null));
770  1 assertFalse(CrossRef.sameSequence(null, seq1));
771  1 assertTrue(CrossRef.sameSequence(seq1, new Sequence("seq2", "ABCDEF")));
772  1 assertTrue(CrossRef.sameSequence(seq1, new Sequence("seq2", "abcdef")));
773  1 assertFalse(
774    CrossRef.sameSequence(seq1, new Sequence("seq2", "ABCDE-F")));
775  1 assertFalse(CrossRef.sameSequence(seq1, new Sequence("seq2", "BCDEF")));
776    }
777    }