Clover icon

jalviewX

  1. Project Clover database Wed Oct 31 2018 15:13:58 GMT
  2. Package jalview.util

File DBRefUtils.java

 

Coverage histogram

../../img/srcFileCovDistChart7.png
28% of files have more coverage

Code metrics

102
153
20
2
733
449
132
0.86
7.65
10
6.6

Classes

Class Line # Actions
DBRefUtils 41 153 132 100
0.636363663.6%
DBRefUtils.DbRefComp 236 0 0 0
-1.0 -
 

Contributing tests

This file is covered by 170 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.util;
22   
23    import jalview.datamodel.DBRefEntry;
24    import jalview.datamodel.DBRefSource;
25    import jalview.datamodel.PDBEntry;
26    import jalview.datamodel.SequenceI;
27   
28    import java.util.ArrayList;
29    import java.util.Arrays;
30    import java.util.HashMap;
31    import java.util.HashSet;
32    import java.util.List;
33    import java.util.Map;
34    import java.util.Set;
35   
36    import com.stevesoft.pat.Regex;
37   
38    /**
39    * Utilities for handling DBRef objects and their collections.
40    */
 
41    public class DBRefUtils
42    {
43    /*
44    * lookup from lower-case form of a name to its canonical (standardised) form
45    */
46    private static Map<String, String> canonicalSourceNameLookup = new HashMap<String, String>();
47   
48    private static Map<String, String> dasCoordinateSystemsLookup = new HashMap<String, String>();
49   
 
50  1 toggle static
51    {
52    // TODO load these from a resource file?
53  1 canonicalSourceNameLookup.put("uniprotkb/swiss-prot",
54    DBRefSource.UNIPROT);
55  1 canonicalSourceNameLookup.put("uniprotkb/trembl", DBRefSource.UNIPROT);
56   
57    // Ensembl values for dbname in xref REST service:
58  1 canonicalSourceNameLookup.put("uniprot/sptrembl", DBRefSource.UNIPROT);
59  1 canonicalSourceNameLookup.put("uniprot/swissprot", DBRefSource.UNIPROT);
60   
61  1 canonicalSourceNameLookup.put("pdb", DBRefSource.PDB);
62  1 canonicalSourceNameLookup.put("ensembl", DBRefSource.ENSEMBL);
63    // Ensembl Gn and Tr are for Ensembl genomic and transcript IDs as served
64    // from ENA.
65  1 canonicalSourceNameLookup.put("ensembl-tr", DBRefSource.ENSEMBL);
66  1 canonicalSourceNameLookup.put("ensembl-gn", DBRefSource.ENSEMBL);
67   
68    // Make sure we have lowercase entries for all canonical string lookups
69  1 Set<String> keys = canonicalSourceNameLookup.keySet();
70  1 for (String k : keys)
71    {
72  8 canonicalSourceNameLookup.put(k.toLowerCase(),
73    canonicalSourceNameLookup.get(k));
74    }
75   
76  1 dasCoordinateSystemsLookup.put("pdbresnum", DBRefSource.PDB);
77  1 dasCoordinateSystemsLookup.put("uniprot", DBRefSource.UNIPROT);
78  1 dasCoordinateSystemsLookup.put("embl", DBRefSource.EMBL);
79    // dasCoordinateSystemsLookup.put("embl", DBRefSource.EMBLCDS);
80    }
81   
82    /**
83    * Returns those DBRefEntry objects whose source identifier (once converted to
84    * Jalview's canonical form) is in the list of sources to search for. Returns
85    * null if no matches found.
86    *
87    * @param dbrefs
88    * DBRefEntry objects to search
89    * @param sources
90    * array of sources to select
91    * @return
92    */
 
93  11167 toggle public static DBRefEntry[] selectRefs(DBRefEntry[] dbrefs,
94    String[] sources)
95    {
96  11167 if (dbrefs == null || sources == null)
97    {
98  3635 return dbrefs;
99    }
100  7532 HashSet<String> srcs = new HashSet<String>();
101  7532 for (String src : sources)
102    {
103  31583 srcs.add(src.toUpperCase());
104    }
105   
106  7532 List<DBRefEntry> res = new ArrayList<DBRefEntry>();
107  7532 for (DBRefEntry dbr : dbrefs)
108    {
109  8668 String source = getCanonicalName(dbr.getSource());
110  8668 if (srcs.contains(source.toUpperCase()))
111    {
112  3295 res.add(dbr);
113    }
114    }
115   
116  7532 if (res.size() > 0)
117    {
118  3194 DBRefEntry[] reply = new DBRefEntry[res.size()];
119  3194 return res.toArray(reply);
120    }
121  4338 return null;
122    }
123   
124    /**
125    * isDasCoordinateSystem
126    *
127    * @param string
128    * String
129    * @param dBRefEntry
130    * DBRefEntry
131    * @return boolean true if Source DBRefEntry is compatible with DAS
132    * CoordinateSystem name
133    */
134   
 
135  12 toggle public static boolean isDasCoordinateSystem(String string,
136    DBRefEntry dBRefEntry)
137    {
138  12 if (string == null || dBRefEntry == null)
139    {
140  3 return false;
141    }
142  9 String coordsys = dasCoordinateSystemsLookup.get(string.toLowerCase());
143  9 return coordsys == null ? false
144    : coordsys.equals(dBRefEntry.getSource());
145    }
146   
147    /**
148    * look up source in an internal list of database reference sources and return
149    * the canonical jalview name for the source, or the original string if it has
150    * no canonical form.
151    *
152    * @param source
153    * @return canonical jalview source (one of jalview.datamodel.DBRefSource.*)
154    * or original source
155    */
 
156  34440 toggle public static String getCanonicalName(String source)
157    {
158  34440 if (source == null)
159    {
160  1 return null;
161    }
162  34439 String canonical = canonicalSourceNameLookup.get(source.toLowerCase());
163  34439 return canonical == null ? source : canonical;
164    }
165   
166    /**
167    * Returns a (possibly empty) list of those references that match the given
168    * entry. Currently uses a comparator which matches if
169    * <ul>
170    * <li>database sources are the same</li>
171    * <li>accession ids are the same</li>
172    * <li>both have no mapping, or the mappings are the same</li>
173    * </ul>
174    *
175    * @param ref
176    * Set of references to search
177    * @param entry
178    * pattern to match
179    * @return
180    */
 
181  42 toggle public static List<DBRefEntry> searchRefs(DBRefEntry[] ref,
182    DBRefEntry entry)
183    {
184  42 return searchRefs(ref, entry,
185    matchDbAndIdAndEitherMapOrEquivalentMapList);
186    }
187   
188    /**
189    * Returns a list of those references that match the given accession id
190    * <ul>
191    * <li>database sources are the same</li>
192    * <li>accession ids are the same</li>
193    * <li>both have no mapping, or the mappings are the same</li>
194    * </ul>
195    *
196    * @param refs
197    * Set of references to search
198    * @param accId
199    * accession id to match
200    * @return
201    */
 
202  3 toggle public static List<DBRefEntry> searchRefs(DBRefEntry[] refs, String accId)
203    {
204  3 return searchRefs(refs, new DBRefEntry("", "", accId), matchId);
205    }
206   
207    /**
208    * Returns a (possibly empty) list of those references that match the given
209    * entry, according to the given comparator.
210    *
211    * @param refs
212    * an array of database references to search
213    * @param entry
214    * an entry to compare against
215    * @param comparator
216    * @return
217    */
 
218  45 toggle static List<DBRefEntry> searchRefs(DBRefEntry[] refs, DBRefEntry entry,
219    DbRefComp comparator)
220    {
221  45 List<DBRefEntry> rfs = new ArrayList<DBRefEntry>();
222  45 if (refs == null || entry == null)
223    {
224  0 return rfs;
225    }
226  78 for (int i = 0; i < refs.length; i++)
227    {
228  33 if (comparator.matches(entry, refs[i]))
229    {
230  18 rfs.add(refs[i]);
231    }
232    }
233  45 return rfs;
234    }
235   
 
236    interface DbRefComp
237    {
238    public boolean matches(DBRefEntry refa, DBRefEntry refb);
239    }
240   
241    /**
242    * match on all non-null fields in refa
243    */
244    // TODO unused - remove?
245    public static DbRefComp matchNonNullonA = new DbRefComp()
246    {
 
247  0 toggle @Override
248    public boolean matches(DBRefEntry refa, DBRefEntry refb)
249    {
250  0 if (refa.getSource() == null
251    || DBRefUtils.getCanonicalName(refb.getSource()).equals(
252    DBRefUtils.getCanonicalName(refa.getSource())))
253    {
254  0 if (refa.getVersion() == null
255    || refb.getVersion().equals(refa.getVersion()))
256    {
257  0 if (refa.getAccessionId() == null
258    || refb.getAccessionId().equals(refa.getAccessionId()))
259    {
260  0 if (refa.getMap() == null || (refb.getMap() != null
261    && refb.getMap().equals(refa.getMap())))
262    {
263  0 return true;
264    }
265    }
266    }
267    }
268  0 return false;
269    }
270    };
271   
272    /**
273    * either field is null or field matches for all of source, version, accession
274    * id and map.
275    */
276    // TODO unused - remove?
277    public static DbRefComp matchEitherNonNull = new DbRefComp()
278    {
 
279  0 toggle @Override
280    public boolean matches(DBRefEntry refa, DBRefEntry refb)
281    {
282  0 if (nullOrEqualSource(refa.getSource(), refb.getSource())
283    && nullOrEqual(refa.getVersion(), refb.getVersion())
284    && nullOrEqual(refa.getAccessionId(), refb.getAccessionId())
285    && nullOrEqual(refa.getMap(), refb.getMap()))
286    {
287  0 return true;
288    }
289  0 return false;
290    }
291    };
292   
293    /**
294    * accession ID and DB must be identical. Version is ignored. Map is either
295    * not defined or is a match (or is compatible?)
296    */
297    // TODO unused - remove?
298    public static DbRefComp matchDbAndIdAndEitherMap = new DbRefComp()
299    {
 
300  0 toggle @Override
301    public boolean matches(DBRefEntry refa, DBRefEntry refb)
302    {
303  0 if (refa.getSource() != null && refb.getSource() != null
304    && DBRefUtils.getCanonicalName(refb.getSource()).equals(
305    DBRefUtils.getCanonicalName(refa.getSource())))
306    {
307    // We dont care about version
308  0 if (refa.getAccessionId() != null && refb.getAccessionId() != null
309    // FIXME should be && not || here?
310    || refb.getAccessionId().equals(refa.getAccessionId()))
311    {
312  0 if ((refa.getMap() == null || refb.getMap() == null)
313    || (refa.getMap() != null && refb.getMap() != null
314    && refb.getMap().equals(refa.getMap())))
315    {
316  0 return true;
317    }
318    }
319    }
320  0 return false;
321    }
322    };
323   
324    /**
325    * accession ID and DB must be identical. Version is ignored. No map on either
326    * or map but no maplist on either or maplist of map on a is the complement of
327    * maplist of map on b.
328    */
329    // TODO unused - remove?
330    public static DbRefComp matchDbAndIdAndComplementaryMapList = new DbRefComp()
331    {
 
332  0 toggle @Override
333    public boolean matches(DBRefEntry refa, DBRefEntry refb)
334    {
335  0 if (refa.getSource() != null && refb.getSource() != null
336    && DBRefUtils.getCanonicalName(refb.getSource()).equals(
337    DBRefUtils.getCanonicalName(refa.getSource())))
338    {
339    // We dont care about version
340  0 if (refa.getAccessionId() != null && refb.getAccessionId() != null
341    || refb.getAccessionId().equals(refa.getAccessionId()))
342    {
343  0 if ((refa.getMap() == null && refb.getMap() == null)
344    || (refa.getMap() != null && refb.getMap() != null))
345    {
346  0 if ((refb.getMap().getMap() == null
347    && refa.getMap().getMap() == null)
348    || (refb.getMap().getMap() != null
349    && refa.getMap().getMap() != null
350    && refb.getMap().getMap().getInverse()
351    .equals(refa.getMap().getMap())))
352    {
353  0 return true;
354    }
355    }
356    }
357    }
358  0 return false;
359    }
360    };
361   
362    /**
363    * accession ID and DB must be identical. Version is ignored. No map on both
364    * or or map but no maplist on either or maplist of map on a is equivalent to
365    * the maplist of map on b.
366    */
367    // TODO unused - remove?
368    public static DbRefComp matchDbAndIdAndEquivalentMapList = new DbRefComp()
369    {
 
370  0 toggle @Override
371    public boolean matches(DBRefEntry refa, DBRefEntry refb)
372    {
373  0 if (refa.getSource() != null && refb.getSource() != null
374    && DBRefUtils.getCanonicalName(refb.getSource()).equals(
375    DBRefUtils.getCanonicalName(refa.getSource())))
376    {
377    // We dont care about version
378    // if ((refa.getVersion()==null || refb.getVersion()==null)
379    // || refb.getVersion().equals(refa.getVersion()))
380    // {
381  0 if (refa.getAccessionId() != null && refb.getAccessionId() != null
382    || refb.getAccessionId().equals(refa.getAccessionId()))
383    {
384  0 if (refa.getMap() == null && refb.getMap() == null)
385    {
386  0 return true;
387    }
388  0 if (refa.getMap() != null && refb.getMap() != null
389    && ((refb.getMap().getMap() == null
390    && refa.getMap().getMap() == null)
391    || (refb.getMap().getMap() != null
392    && refa.getMap().getMap() != null
393    && refb.getMap().getMap()
394    .equals(refa.getMap().getMap()))))
395    {
396  0 return true;
397    }
398    }
399    }
400  0 return false;
401    }
402    };
403   
404    /**
405    * accession ID and DB must be identical, or null on a. Version is ignored. No
406    * map on either or map but no maplist on either or maplist of map on a is
407    * equivalent to the maplist of map on b.
408    */
409    public static DbRefComp matchDbAndIdAndEitherMapOrEquivalentMapList = new DbRefComp()
410    {
 
411  26 toggle @Override
412    public boolean matches(DBRefEntry refa, DBRefEntry refb)
413    {
414  26 if (refa.getSource() != null && refb.getSource() != null
415    && DBRefUtils.getCanonicalName(refb.getSource()).equals(
416    DBRefUtils.getCanonicalName(refa.getSource())))
417    {
418    // We dont care about version
419   
420  15 if (refa.getAccessionId() == null
421    || refa.getAccessionId().equals(refb.getAccessionId()))
422    {
423  14 if (refa.getMap() == null || refb.getMap() == null)
424    {
425  12 return true;
426    }
427  2 if ((refa.getMap() != null && refb.getMap() != null)
428    && (refb.getMap().getMap() == null
429    && refa.getMap().getMap() == null)
430    || (refb.getMap().getMap() != null
431    && refa.getMap().getMap() != null
432    && (refb.getMap().getMap()
433    .equals(refa.getMap().getMap()))))
434    {
435  1 return true;
436    }
437    }
438    }
439  13 return false;
440    }
441    };
442   
443    /**
444    * accession ID only must be identical.
445    */
446    public static DbRefComp matchId = new DbRefComp()
447    {
 
448  7 toggle @Override
449    public boolean matches(DBRefEntry refa, DBRefEntry refb)
450    {
451  7 if (refa.getAccessionId() != null && refb.getAccessionId() != null
452    && refb.getAccessionId().equals(refa.getAccessionId()))
453    {
454  5 return true;
455    }
456  2 return false;
457    }
458    };
459   
460    /**
461    * Parses a DBRefEntry and adds it to the sequence, also a PDBEntry if the
462    * database is PDB.
463    * <p>
464    * Used by file parsers to generate DBRefs from annotation within file (eg
465    * Stockholm)
466    *
467    * @param dbname
468    * @param version
469    * @param acn
470    * @param seq
471    * where to annotate with reference
472    * @return parsed version of entry that was added to seq (if any)
473    */
 
474  1630 toggle public static DBRefEntry parseToDbRef(SequenceI seq, String dbname,
475    String version, String acn)
476    {
477  1630 DBRefEntry ref = null;
478  1630 if (dbname != null)
479    {
480  1630 String locsrc = DBRefUtils.getCanonicalName(dbname);
481  1630 if (locsrc.equals(DBRefSource.PDB))
482    {
483    /*
484    * Check for PFAM style stockhom PDB accession id citation e.g.
485    * "1WRI A; 7-80;"
486    */
487  27 Regex r = new com.stevesoft.pat.Regex(
488    "([0-9][0-9A-Za-z]{3})\\s*(.?)\\s*;\\s*([0-9]+)-([0-9]+)");
489  27 if (r.search(acn.trim()))
490    {
491  27 String pdbid = r.stringMatched(1);
492  27 String chaincode = r.stringMatched(2);
493  27 if (chaincode == null)
494    {
495  0 chaincode = " ";
496    }
497    // String mapstart = r.stringMatched(3);
498    // String mapend = r.stringMatched(4);
499  27 if (chaincode.equals(" "))
500    {
501  0 chaincode = "_";
502    }
503    // construct pdb ref.
504  27 ref = new DBRefEntry(locsrc, version, pdbid + chaincode);
505  27 PDBEntry pdbr = new PDBEntry();
506  27 pdbr.setId(pdbid);
507  27 pdbr.setType(PDBEntry.Type.PDB);
508  27 pdbr.setChainCode(chaincode);
509  27 seq.addPDBId(pdbr);
510    }
511    else
512    {
513  0 System.err.println("Malformed PDB DR line:" + acn);
514    }
515    }
516    else
517    {
518    // default:
519  1603 ref = new DBRefEntry(locsrc, version, acn);
520    }
521    }
522  1630 if (ref != null)
523    {
524  1630 seq.addDBRef(ref);
525    }
526  1630 return ref;
527    }
528   
529    /**
530    * Returns true if either object is null, or they are equal
531    *
532    * @param o1
533    * @param o2
534    * @return
535    */
 
536  0 toggle public static boolean nullOrEqual(Object o1, Object o2)
537    {
538  0 if (o1 == null || o2 == null)
539    {
540  0 return true;
541    }
542  0 return o1.equals(o2);
543    }
544   
545    /**
546    * canonicalise source string before comparing. null is always wildcard
547    *
548    * @param o1
549    * - null or source string to compare
550    * @param o2
551    * - null or source string to compare
552    * @return true if either o1 or o2 are null, or o1 equals o2 under
553    * DBRefUtils.getCanonicalName
554    * (o1).equals(DBRefUtils.getCanonicalName(o2))
555    */
 
556  0 toggle public static boolean nullOrEqualSource(String o1, String o2)
557    {
558  0 if (o1 == null || o2 == null)
559    {
560  0 return true;
561    }
562  0 return DBRefUtils.getCanonicalName(o1)
563    .equals(DBRefUtils.getCanonicalName(o2));
564    }
565   
566    /**
567    * Selects just the DNA or protein references from a set of references
568    *
569    * @param selectDna
570    * if true, select references to 'standard' DNA databases, else to
571    * 'standard' peptide databases
572    * @param refs
573    * a set of references to select from
574    * @return
575    */
 
576  10565 toggle public static DBRefEntry[] selectDbRefs(boolean selectDna,
577    DBRefEntry[] refs)
578    {
579  10565 return selectRefs(refs,
580  10565 selectDna ? DBRefSource.DNACODINGDBS : DBRefSource.PROTEINDBS);
581    // could attempt to find other cross
582    // refs here - ie PDB xrefs
583    // (not dna, not protein seq)
584    }
585   
586    /**
587    * Returns the (possibly empty) list of those supplied dbrefs which have the
588    * specified source database, with a case-insensitive match of source name
589    *
590    * @param dbRefs
591    * @param source
592    * @return
593    */
 
594  9 toggle public static List<DBRefEntry> searchRefsForSource(DBRefEntry[] dbRefs,
595    String source)
596    {
597  9 List<DBRefEntry> matches = new ArrayList<DBRefEntry>();
598  9 if (dbRefs != null && source != null)
599    {
600  7 for (DBRefEntry dbref : dbRefs)
601    {
602  15 if (source.equalsIgnoreCase(dbref.getSource()))
603    {
604  15 matches.add(dbref);
605    }
606    }
607    }
608  9 return matches;
609    }
610   
611    /**
612    * promote direct database references to primary for nucleotide or protein
613    * sequences if they have an appropriate primary ref
614    * <table>
615    * <tr>
616    * <th>Seq Type</th>
617    * <th>Primary DB</th>
618    * <th>Direct which will be promoted</th>
619    * </tr>
620    * <tr align=center>
621    * <td>peptides</td>
622    * <td>Ensembl</td>
623    * <td>Uniprot</td>
624    * </tr>
625    * <tr align=center>
626    * <td>peptides</td>
627    * <td>Ensembl</td>
628    * <td>Uniprot</td>
629    * </tr>
630    * <tr align=center>
631    * <td>dna</td>
632    * <td>Ensembl</td>
633    * <td>ENA</td>
634    * </tr>
635    * </table>
636    *
637    * @param sequence
638    */
 
639  2381 toggle public static void ensurePrimaries(SequenceI sequence)
640    {
641  2381 List<DBRefEntry> pr = sequence.getPrimaryDBRefs();
642  2381 if (pr.size() == 0)
643    {
644    // nothing to do
645  1909 return;
646    }
647  472 List<DBRefEntry> selfs = new ArrayList<DBRefEntry>();
648    {
649  472 DBRefEntry[] selfArray = selectDbRefs(!sequence.isProtein(),
650    sequence.getDBRefs());
651  472 if (selfArray == null || selfArray.length == 0)
652    {
653    // nothing to do
654  164 return;
655    }
656  308 selfs.addAll(Arrays.asList(selfArray));
657    }
658   
659    // filter non-primary refs
660  308 for (DBRefEntry p : pr)
661    {
662  708 while (selfs.contains(p))
663    {
664  353 selfs.remove(p);
665    }
666    }
667  308 List<DBRefEntry> toPromote = new ArrayList<DBRefEntry>();
668   
669  308 for (DBRefEntry p : pr)
670    {
671  355 List<String> promType = new ArrayList<String>();
672  355 if (sequence.isProtein())
673    {
674  337 switch (getCanonicalName(p.getSource()))
675    {
676  322 case DBRefSource.UNIPROT:
677    // case DBRefSource.UNIPROTKB:
678    // case DBRefSource.UP_NAME:
679    // search for and promote ensembl
680  322 promType.add(DBRefSource.ENSEMBL);
681  322 break;
682  7 case DBRefSource.ENSEMBL:
683    // search for and promote Uniprot
684  7 promType.add(DBRefSource.UNIPROT);
685  7 break;
686    }
687    }
688    else
689    {
690    // TODO: promote transcript refs
691    }
692   
693    // collate candidates and promote them
694  355 DBRefEntry[] candidates = selectRefs(selfs.toArray(new DBRefEntry[0]),
695    promType.toArray(new String[0]));
696  355 if (candidates != null)
697    {
698  2 for (DBRefEntry cand : candidates)
699    {
700  2 if (cand.hasMap())
701    {
702  2 if (cand.getMap().getTo() != null
703    && cand.getMap().getTo() != sequence)
704    {
705    // can't promote refs with mappings to other sequences
706  2 continue;
707    }
708  0 if (cand.getMap().getMap().getFromLowest() != sequence
709    .getStart()
710    && cand.getMap().getMap().getFromHighest() != sequence
711    .getEnd())
712    {
713    // can't promote refs with mappings from a region of this sequence
714    // - eg CDS
715  0 continue;
716    }
717    }
718    // and promote
719  0 cand.setVersion(p.getVersion() + " (promoted)");
720  0 selfs.remove(cand);
721  0 toPromote.add(cand);
722  0 if (!cand.isPrimaryCandidate())
723    {
724  0 System.out.println(
725    "Warning: Couldn't promote dbref " + cand.toString()
726    + " for sequence " + sequence.toString());
727    }
728    }
729    }
730    }
731    }
732   
733    }