Clover icon

jalviewX

  1. Project Clover database Wed Oct 31 2018 15:13:58 GMT
  2. Package jalview.gui

File CrossRefAction.java

 

Coverage histogram

../../img/srcFileCovDistChart0.png
56% of files have more coverage

Code metrics

68
137
9
1
531
325
53
0.39
15.22
9
5.89

Classes

Class Line # Actions
CrossRefAction 57 137 53 214
0.00%
 

Contributing tests

No tests hitting this source file were found.

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.gui;
22   
23    import jalview.analysis.AlignmentUtils;
24    import jalview.analysis.CrossRef;
25    import jalview.api.AlignmentViewPanel;
26    import jalview.api.FeatureSettingsModelI;
27    import jalview.bin.Cache;
28    import jalview.datamodel.Alignment;
29    import jalview.datamodel.AlignmentI;
30    import jalview.datamodel.DBRefEntry;
31    import jalview.datamodel.DBRefSource;
32    import jalview.datamodel.GeneLociI;
33    import jalview.datamodel.SequenceI;
34    import jalview.ext.ensembl.EnsemblInfo;
35    import jalview.ext.ensembl.EnsemblMap;
36    import jalview.io.gff.SequenceOntologyI;
37    import jalview.structure.StructureSelectionManager;
38    import jalview.util.DBRefUtils;
39    import jalview.util.MapList;
40    import jalview.util.MappingUtils;
41    import jalview.util.MessageManager;
42    import jalview.ws.SequenceFetcher;
43   
44    import java.util.ArrayList;
45    import java.util.HashMap;
46    import java.util.List;
47    import java.util.Map;
48    import java.util.Set;
49   
50    /**
51    * Factory constructor and runnable for discovering and displaying
52    * cross-references for a set of aligned sequences
53    *
54    * @author jprocter
55    *
56    */
 
57    public class CrossRefAction implements Runnable
58    {
59    private AlignFrame alignFrame;
60   
61    private SequenceI[] sel;
62   
63    private final boolean _odna;
64   
65    private String source;
66   
67    List<AlignmentViewPanel> xrefViews = new ArrayList<>();
68   
 
69  0 toggle List<AlignmentViewPanel> getXrefViews()
70    {
71  0 return xrefViews;
72    }
73   
 
74  0 toggle @Override
75    public void run()
76    {
77  0 final long sttime = System.currentTimeMillis();
78  0 alignFrame.setProgressBar(MessageManager.formatMessage(
79    "status.searching_for_sequences_from", new Object[]
80    { source }), sttime);
81  0 try
82    {
83  0 AlignmentI alignment = alignFrame.getViewport().getAlignment();
84  0 AlignmentI dataset = alignment.getDataset() == null ? alignment
85    : alignment.getDataset();
86  0 boolean dna = alignment.isNucleotide();
87  0 if (_odna != dna)
88    {
89  0 System.err
90    .println("Conflict: showProducts for alignment originally "
91  0 + "thought to be " + (_odna ? "DNA" : "Protein")
92  0 + " now searching for " + (dna ? "DNA" : "Protein")
93    + " Context.");
94    }
95  0 AlignmentI xrefs = new CrossRef(sel, dataset)
96    .findXrefSequences(source, dna);
97  0 if (xrefs == null)
98    {
99  0 return;
100    }
101   
102    /*
103    * try to look up chromosomal coordinates for nucleotide
104    * sequences (if not already retrieved)
105    */
106  0 findGeneLoci(xrefs.getSequences());
107   
108    /*
109    * get display scheme (if any) to apply to features
110    */
111  0 FeatureSettingsModelI featureColourScheme = new SequenceFetcher()
112    .getFeatureColourScheme(source);
113   
114  0 AlignmentI xrefsAlignment = makeCrossReferencesAlignment(dataset,
115    xrefs);
116  0 if (!dna)
117    {
118  0 xrefsAlignment = AlignmentUtils.makeCdsAlignment(
119    xrefsAlignment.getSequencesArray(), dataset, sel);
120  0 xrefsAlignment.alignAs(alignment);
121    }
122   
123    /*
124    * If we are opening a splitframe, make a copy of this alignment (sharing the same dataset
125    * sequences). If we are DNA, drop introns and update mappings
126    */
127  0 AlignmentI copyAlignment = null;
128   
129  0 if (Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true))
130    {
131  0 copyAlignment = copyAlignmentForSplitFrame(alignment, dataset, dna,
132    xrefs, xrefsAlignment);
133  0 if (copyAlignment == null)
134    {
135  0 return; // failed
136    }
137    }
138   
139    /*
140    * build AlignFrame(s) according to available alignment data
141    */
142  0 AlignFrame newFrame = new AlignFrame(xrefsAlignment,
143    AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
144  0 if (Cache.getDefault("HIDE_INTRONS", true))
145    {
146  0 newFrame.hideFeatureColumns(SequenceOntologyI.EXON, false);
147    }
148  0 String newtitle = String.format("%s %s %s",
149  0 dna ? MessageManager.getString("label.proteins")
150    : MessageManager.getString("label.nucleotides"),
151    MessageManager.getString("label.for"), alignFrame.getTitle());
152  0 newFrame.setTitle(newtitle);
153   
154  0 if (copyAlignment == null)
155    {
156    /*
157    * split frame display is turned off in preferences file
158    */
159  0 Desktop.addInternalFrame(newFrame, newtitle,
160    AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
161  0 xrefViews.add(newFrame.alignPanel);
162  0 return; // via finally clause
163    }
164   
165  0 AlignFrame copyThis = new AlignFrame(copyAlignment,
166    AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
167  0 copyThis.setTitle(alignFrame.getTitle());
168   
169  0 boolean showSequenceFeatures = alignFrame.getViewport()
170    .isShowSequenceFeatures();
171  0 newFrame.setShowSeqFeatures(showSequenceFeatures);
172  0 copyThis.setShowSeqFeatures(showSequenceFeatures);
173  0 FeatureRenderer myFeatureStyling = alignFrame.alignPanel
174    .getSeqPanel().seqCanvas.getFeatureRenderer();
175   
176    /*
177    * copy feature rendering settings to split frame
178    */
179  0 FeatureRenderer fr1 = newFrame.alignPanel.getSeqPanel().seqCanvas
180    .getFeatureRenderer();
181  0 fr1.transferSettings(myFeatureStyling);
182  0 fr1.findAllFeatures(true);
183  0 FeatureRenderer fr2 = copyThis.alignPanel.getSeqPanel().seqCanvas
184    .getFeatureRenderer();
185  0 fr2.transferSettings(myFeatureStyling);
186  0 fr2.findAllFeatures(true);
187   
188    /*
189    * apply 'database source' feature configuration
190    * if any was found
191    */
192    // TODO is this the feature colouring for the original
193    // alignment or the fetched xrefs? either could be Ensembl
194  0 newFrame.getViewport().applyFeaturesStyle(featureColourScheme);
195  0 copyThis.getViewport().applyFeaturesStyle(featureColourScheme);
196   
197  0 SplitFrame sf = new SplitFrame(dna ? copyThis : newFrame,
198  0 dna ? newFrame : copyThis);
199  0 newFrame.setVisible(true);
200  0 copyThis.setVisible(true);
201  0 String linkedTitle = MessageManager
202    .getString("label.linked_view_title");
203  0 Desktop.addInternalFrame(sf, linkedTitle, -1, -1);
204  0 sf.adjustInitialLayout();
205   
206    // finally add the top, then bottom frame to the view list
207  0 xrefViews.add(dna ? copyThis.alignPanel : newFrame.alignPanel);
208  0 xrefViews.add(!dna ? copyThis.alignPanel : newFrame.alignPanel);
209   
210    } catch (OutOfMemoryError e)
211    {
212  0 new OOMWarning("whilst fetching crossreferences", e);
213    } catch (Throwable e)
214    {
215  0 Cache.log.error("Error when finding crossreferences", e);
216    } finally
217    {
218  0 alignFrame.setProgressBar(MessageManager.formatMessage(
219    "status.finished_searching_for_sequences_from", new Object[]
220    { source }), sttime);
221    }
222    }
223   
224    /**
225    * Tries to add chromosomal coordinates to any nucleotide sequence which does
226    * not already have them. Coordinates are retrieved from Ensembl given an
227    * Ensembl identifier, either on the sequence itself or on a peptide sequence
228    * it has a reference to.
229    *
230    * <pre>
231    * Example (human):
232    * - fetch EMBLCDS cross-references for Uniprot entry P30419
233    * - the EMBL sequences do not have xrefs to Ensembl
234    * - the Uniprot entry has xrefs to
235    * ENSP00000258960, ENSP00000468424, ENST00000258960, ENST00000592782
236    * - either of the transcript ids can be used to retrieve gene loci e.g.
237    * http://rest.ensembl.org/map/cds/ENST00000592782/1..100000
238    * Example (invertebrate):
239    * - fetch EMBLCDS cross-references for Uniprot entry Q43517 (FER1_SOLLC)
240    * - the Uniprot entry has an xref to ENSEMBLPLANTS Solyc10g044520.1.1
241    * - can retrieve gene loci with
242    * http://rest.ensemblgenomes.org/map/cds/Solyc10g044520.1.1/1..100000
243    * </pre>
244    *
245    * @param sequences
246    */
 
247  0 toggle public static void findGeneLoci(List<SequenceI> sequences)
248    {
249  0 Map<DBRefEntry, GeneLociI> retrievedLoci = new HashMap<>();
250  0 for (SequenceI seq : sequences)
251    {
252  0 findGeneLoci(seq, retrievedLoci);
253    }
254    }
255   
256    /**
257    * Tres to find chromosomal coordinates for the sequence, by searching its
258    * direct and indirect cross-references for Ensembl. If the loci have already
259    * been retrieved, just reads them out of the map of retrievedLoci; this is
260    * the case of an alternative transcript for the same protein. Otherwise calls
261    * a REST service to retrieve the loci, and if successful, adds them to the
262    * sequence and to the retrievedLoci.
263    *
264    * @param seq
265    * @param retrievedLoci
266    */
 
267  0 toggle static void findGeneLoci(SequenceI seq,
268    Map<DBRefEntry, GeneLociI> retrievedLoci)
269    {
270    /*
271    * don't replace any existing chromosomal coordinates
272    */
273  0 if (seq == null || seq.isProtein() || seq.getGeneLoci() != null
274    || seq.getDBRefs() == null)
275    {
276  0 return;
277    }
278   
279  0 Set<String> ensemblDivisions = new EnsemblInfo().getDivisions();
280   
281    /*
282    * first look for direct dbrefs from sequence to Ensembl
283    */
284  0 String[] divisionsArray = ensemblDivisions
285    .toArray(new String[ensemblDivisions.size()]);
286  0 DBRefEntry[] seqRefs = seq.getDBRefs();
287  0 DBRefEntry[] directEnsemblRefs = DBRefUtils.selectRefs(seqRefs,
288    divisionsArray);
289  0 if (directEnsemblRefs != null)
290    {
291  0 for (DBRefEntry ensemblRef : directEnsemblRefs)
292    {
293  0 if (fetchGeneLoci(seq, ensemblRef, retrievedLoci))
294    {
295  0 return;
296    }
297    }
298    }
299   
300    /*
301    * else look for indirect dbrefs from sequence to Ensembl
302    */
303  0 for (DBRefEntry dbref : seq.getDBRefs())
304    {
305  0 if (dbref.getMap() != null && dbref.getMap().getTo() != null)
306    {
307  0 DBRefEntry[] dbrefs = dbref.getMap().getTo().getDBRefs();
308  0 DBRefEntry[] indirectEnsemblRefs = DBRefUtils.selectRefs(dbrefs,
309    divisionsArray);
310  0 if (indirectEnsemblRefs != null)
311    {
312  0 for (DBRefEntry ensemblRef : indirectEnsemblRefs)
313    {
314  0 if (fetchGeneLoci(seq, ensemblRef, retrievedLoci))
315    {
316  0 return;
317    }
318    }
319    }
320    }
321    }
322    }
323   
324    /**
325    * Retrieves chromosomal coordinates for the Ensembl (or EnsemblGenomes)
326    * identifier in dbref. If successful, and the sequence length matches gene
327    * loci length, then add it to the sequence, and to the retrievedLoci map.
328    * Answers true if successful, else false.
329    *
330    * @param seq
331    * @param dbref
332    * @param retrievedLoci
333    * @return
334    */
 
335  0 toggle static boolean fetchGeneLoci(SequenceI seq, DBRefEntry dbref,
336    Map<DBRefEntry, GeneLociI> retrievedLoci)
337    {
338  0 String accession = dbref.getAccessionId();
339  0 String division = dbref.getSource();
340   
341    /*
342    * hack: ignore cross-references to Ensembl protein ids
343    * (or use map/translation perhaps?)
344    * todo: is there an equivalent in EnsemblGenomes?
345    */
346  0 if (accession.startsWith("ENSP"))
347    {
348  0 return false;
349    }
350  0 EnsemblMap mapper = new EnsemblMap();
351   
352    /*
353    * try CDS mapping first
354    */
355  0 GeneLociI geneLoci = mapper.getCdsMapping(division, accession, 1,
356    seq.getLength());
357  0 if (geneLoci != null)
358    {
359  0 MapList map = geneLoci.getMap();
360  0 int mappedFromLength = MappingUtils.getLength(map.getFromRanges());
361  0 if (mappedFromLength == seq.getLength())
362    {
363  0 seq.setGeneLoci(geneLoci.getSpeciesId(), geneLoci.getAssemblyId(),
364    geneLoci.getChromosomeId(), geneLoci.getMap());
365  0 retrievedLoci.put(dbref, geneLoci);
366  0 return true;
367    }
368    }
369   
370    /*
371    * else try CDNA mapping
372    */
373  0 geneLoci = mapper.getCdnaMapping(division, accession, 1,
374    seq.getLength());
375  0 if (geneLoci != null)
376    {
377  0 MapList map = geneLoci.getMap();
378  0 int mappedFromLength = MappingUtils.getLength(map.getFromRanges());
379  0 if (mappedFromLength == seq.getLength())
380    {
381  0 seq.setGeneLoci(geneLoci.getSpeciesId(), geneLoci.getAssemblyId(),
382    geneLoci.getChromosomeId(), geneLoci.getMap());
383  0 retrievedLoci.put(dbref, geneLoci);
384  0 return true;
385    }
386    }
387   
388  0 return false;
389    }
390   
391    /**
392    * @param alignment
393    * @param dataset
394    * @param dna
395    * @param xrefs
396    * @param xrefsAlignment
397    * @return
398    */
 
399  0 toggle protected AlignmentI copyAlignmentForSplitFrame(AlignmentI alignment,
400    AlignmentI dataset, boolean dna, AlignmentI xrefs,
401    AlignmentI xrefsAlignment)
402    {
403  0 AlignmentI copyAlignment;
404  0 boolean copyAlignmentIsAligned = false;
405  0 if (dna)
406    {
407  0 copyAlignment = AlignmentUtils.makeCdsAlignment(sel, dataset,
408    xrefsAlignment.getSequencesArray());
409  0 if (copyAlignment.getHeight() == 0)
410    {
411  0 JvOptionPane.showMessageDialog(alignFrame,
412    MessageManager.getString("label.cant_map_cds"),
413    MessageManager.getString("label.operation_failed"),
414    JvOptionPane.OK_OPTION);
415  0 System.err.println("Failed to make CDS alignment");
416  0 return null;
417    }
418   
419    /*
420    * pending getting Embl transcripts to 'align',
421    * we are only doing this for Ensembl
422    */
423    // TODO proper criteria for 'can align as cdna'
424  0 if (DBRefSource.ENSEMBL.equalsIgnoreCase(source)
425    || AlignmentUtils.looksLikeEnsembl(alignment))
426    {
427  0 copyAlignment.alignAs(alignment);
428  0 copyAlignmentIsAligned = true;
429    }
430    }
431    else
432    {
433  0 copyAlignment = AlignmentUtils.makeCopyAlignment(sel,
434    xrefs.getSequencesArray(), dataset);
435    }
436  0 copyAlignment
437    .setGapCharacter(alignFrame.viewport.getGapCharacter());
438   
439  0 StructureSelectionManager ssm = StructureSelectionManager
440    .getStructureSelectionManager(Desktop.instance);
441   
442    /*
443    * register any new mappings for sequence mouseover etc
444    * (will not duplicate any previously registered mappings)
445    */
446  0 ssm.registerMappings(dataset.getCodonFrames());
447   
448  0 if (copyAlignment.getHeight() <= 0)
449    {
450  0 System.err.println(
451    "No Sequences generated for xRef type " + source);
452  0 return null;
453    }
454   
455    /*
456    * align protein to dna
457    */
458  0 if (dna && copyAlignmentIsAligned)
459    {
460  0 xrefsAlignment.alignAs(copyAlignment);
461    }
462    else
463    {
464    /*
465    * align cdna to protein - currently only if
466    * fetching and aligning Ensembl transcripts!
467    */
468    // TODO: generalise for other sources of locus/transcript/cds data
469  0 if (dna && DBRefSource.ENSEMBL.equalsIgnoreCase(source))
470    {
471  0 copyAlignment.alignAs(xrefsAlignment);
472    }
473    }
474   
475  0 return copyAlignment;
476    }
477   
478    /**
479    * Makes an alignment containing the given sequences, and adds them to the
480    * given dataset, which is also set as the dataset for the new alignment
481    *
482    * TODO: refactor to DatasetI method
483    *
484    * @param dataset
485    * @param seqs
486    * @return
487    */
 
488  0 toggle protected AlignmentI makeCrossReferencesAlignment(AlignmentI dataset,
489    AlignmentI seqs)
490    {
491  0 SequenceI[] sprods = new SequenceI[seqs.getHeight()];
492  0 for (int s = 0; s < sprods.length; s++)
493    {
494  0 sprods[s] = (seqs.getSequenceAt(s)).deriveSequence();
495  0 if (dataset.getSequences() == null || !dataset.getSequences()
496    .contains(sprods[s].getDatasetSequence()))
497    {
498  0 dataset.addSequence(sprods[s].getDatasetSequence());
499    }
500  0 sprods[s].updatePDBIds();
501    }
502  0 Alignment al = new Alignment(sprods);
503  0 al.setDataset(dataset);
504  0 return al;
505    }
506   
507    /**
508    * Constructor
509    *
510    * @param af
511    * @param seqs
512    * @param fromDna
513    * @param dbSource
514    */
 
515  0 toggle CrossRefAction(AlignFrame af, SequenceI[] seqs, boolean fromDna,
516    String dbSource)
517    {
518  0 this.alignFrame = af;
519  0 this.sel = seqs;
520  0 this._odna = fromDna;
521  0 this.source = dbSource;
522    }
523   
 
524  0 toggle public static CrossRefAction getHandlerFor(final SequenceI[] sel,
525    final boolean fromDna, final String source,
526    final AlignFrame alignFrame)
527    {
528  0 return new CrossRefAction(alignFrame, sel, fromDna, source);
529    }
530   
531    }