Clover icon

Coverage Report

  1. Project Clover database Thu Aug 13 2020 12:04:21 BST
  2. Package jalview.ws.dbsources

File Uniprot.java

 

Coverage histogram

../../../img/srcFileCovDistChart8.png
20% of files have more coverage

Code metrics

52
135
17
1
521
337
53
0.39
7.94
17
3.12

Classes

Class Line # Actions
Uniprot 68 135 53
0.7549019575.5%
 

Contributing tests

This file is covered by 102 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.ws.dbsources;
22   
23    import jalview.bin.Cache;
24    import jalview.datamodel.Alignment;
25    import jalview.datamodel.AlignmentI;
26    import jalview.datamodel.DBRefEntry;
27    import jalview.datamodel.DBRefSource;
28    import jalview.datamodel.PDBEntry;
29    import jalview.datamodel.Sequence;
30    import jalview.datamodel.SequenceFeature;
31    import jalview.datamodel.SequenceI;
32    import jalview.schemes.ResidueProperties;
33    import jalview.util.StringUtils;
34    import jalview.ws.seqfetcher.DbSourceProxyImpl;
35    import jalview.xml.binding.embl.ROOT;
36    import jalview.xml.binding.uniprot.DbReferenceType;
37    import jalview.xml.binding.uniprot.Entry;
38    import jalview.xml.binding.uniprot.FeatureType;
39    import jalview.xml.binding.uniprot.LocationType;
40    import jalview.xml.binding.uniprot.PositionType;
41    import jalview.xml.binding.uniprot.PropertyType;
42   
43    import java.io.InputStream;
44    import java.net.URL;
45    import java.net.URLConnection;
46    import java.util.ArrayList;
47    import java.util.List;
48    import java.util.Vector;
49   
50    import javax.xml.bind.JAXBContext;
51    import javax.xml.bind.JAXBElement;
52    import javax.xml.bind.JAXBException;
53    import javax.xml.stream.FactoryConfigurationError;
54    import javax.xml.stream.XMLInputFactory;
55    import javax.xml.stream.XMLStreamException;
56    import javax.xml.stream.XMLStreamReader;
57   
58    import com.stevesoft.pat.Regex;
59   
60    /**
61    * This class queries the Uniprot database for sequence data, unmarshals the
62    * returned XML, and converts it to Jalview Sequence records (including attached
63    * database references and sequence features)
64    *
65    * @author JimP
66    *
67    */
 
68    public class Uniprot extends DbSourceProxyImpl
69    {
70    private static final String DEFAULT_UNIPROT_DOMAIN = "https://www.uniprot.org";
71   
72    private static final String BAR_DELIMITER = "|";
73   
74    /**
75    * Constructor
76    */
 
77  13 toggle public Uniprot()
78    {
79  13 super();
80    }
81   
 
82  0 toggle private String getDomain()
83    {
84  0 return Cache.getDefault("UNIPROT_DOMAIN", DEFAULT_UNIPROT_DOMAIN);
85    }
86   
87    /*
88    * (non-Javadoc)
89    *
90    * @see jalview.ws.DbSourceProxy#getAccessionSeparator()
91    */
 
92  0 toggle @Override
93    public String getAccessionSeparator()
94    {
95  0 return null;
96    }
97   
98    /*
99    * (non-Javadoc)
100    *
101    * @see jalview.ws.DbSourceProxy#getAccessionValidator()
102    */
 
103  0 toggle @Override
104    public Regex getAccessionValidator()
105    {
106  0 return new Regex("([A-Z]+[0-9]+[A-Z0-9]+|[A-Z0-9]+_[A-Z0-9]+)");
107    }
108   
109    /*
110    * (non-Javadoc)
111    *
112    * @see jalview.ws.DbSourceProxy#getDbSource()
113    */
 
114  1762 toggle @Override
115    public String getDbSource()
116    {
117  1762 return DBRefSource.UNIPROT;
118    }
119   
120    /*
121    * (non-Javadoc)
122    *
123    * @see jalview.ws.DbSourceProxy#getDbVersion()
124    */
 
125  1 toggle @Override
126    public String getDbVersion()
127    {
128  1 return "0"; // we really don't know what version we're on.
129    }
130   
131    /*
132    * (non-Javadoc)
133    *
134    * @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[])
135    */
 
136  0 toggle @Override
137    public AlignmentI getSequenceRecords(String queries) throws Exception
138    {
139  0 startQuery();
140  0 try
141    {
142  0 queries = queries.toUpperCase().replaceAll(
143    "(UNIPROT\\|?|UNIPROT_|UNIREF\\d+_|UNIREF\\d+\\|?)", "");
144  0 AlignmentI al = null;
145   
146  0 String downloadstring = getDomain() + "/uniprot/" + queries
147    + ".xml";
148   
149  0 URL url = new URL(downloadstring);
150  0 URLConnection urlconn = url.openConnection();
151  0 InputStream istr = urlconn.getInputStream();
152  0 List<Entry> entries = getUniprotEntries(istr);
153  0 if (entries != null)
154    {
155  0 List<SequenceI> seqs = new ArrayList<>();
156  0 for (Entry entry : entries)
157    {
158  0 seqs.add(uniprotEntryToSequence(entry));
159    }
160  0 al = new Alignment(seqs.toArray(new SequenceI[seqs.size()]));
161    }
162   
163  0 stopQuery();
164  0 return al;
165    } catch (Exception e)
166    {
167  0 throw (e);
168    } finally
169    {
170  0 stopQuery();
171    }
172    }
173   
174    /**
175    * Converts an Entry object (bound from Uniprot XML) to a Jalview Sequence
176    *
177    * @param entry
178    * @return
179    */
 
180  1 toggle SequenceI uniprotEntryToSequence(Entry entry)
181    {
182  1 String id = getUniprotEntryId(entry);
183    /*
184    * Sequence should not include any whitespace, but JAXB leaves these in
185    */
186  1 String seqString = entry.getSequence().getValue().replaceAll("\\s*",
187    "");
188   
189  1 SequenceI sequence = new Sequence(id,
190    seqString);
191  1 sequence.setDescription(getUniprotEntryDescription(entry));
192   
193    /*
194    * add a 'self' DBRefEntry for each accession
195    */
196  1 final String dbVersion = getDbVersion();
197  1 List<DBRefEntry> dbRefs = new ArrayList<>();
198  1 for (String accessionId : entry.getAccession())
199    {
200  2 DBRefEntry dbRef = new DBRefEntry(DBRefSource.UNIPROT, dbVersion,
201    accessionId);
202  2 dbRefs.add(dbRef);
203    }
204   
205    /*
206    * add a DBRefEntry for each dbReference element in the XML;
207    * also add a PDBEntry if type="PDB";
208    * also add an EMBLCDS dbref if protein sequence id is given
209    * also add an Ensembl dbref " " " " " "
210    */
211  1 Vector<PDBEntry> pdbRefs = new Vector<>();
212  1 for (DbReferenceType dbref : entry.getDbReference())
213    {
214  3 String type = dbref.getType();
215  3 DBRefEntry dbr = new DBRefEntry(type,
216    DBRefSource.UNIPROT + ":" + dbVersion, dbref.getId());
217  3 dbRefs.add(dbr);
218  3 if ("PDB".equals(type))
219    {
220  1 pdbRefs.add(new PDBEntry(dbr));
221    }
222  3 if ("EMBL".equals(type))
223    {
224    /*
225    * e.g. Uniprot accession Q9BXM7 has
226    * <dbReference type="EMBL" id="M19359">
227    * <property type="protein sequence ID" value="AAA40981.1"/>
228    * <property type="molecule type" value="Genomic_DNA"/>
229    * </dbReference>
230    */
231  1 String cdsId = getProperty(dbref.getProperty(),
232    "protein sequence ID");
233  1 if (cdsId != null && cdsId.trim().length() > 0)
234    {
235    // remove version
236  1 String[] vrs = cdsId.split("\\.");
237  1 String version = vrs.length > 1 ? vrs[1]
238    : DBRefSource.UNIPROT + ":" + dbVersion;
239  1 dbr = new DBRefEntry(DBRefSource.EMBLCDS, version, vrs[0]);
240  1 dbRefs.add(dbr);
241    }
242    }
243  3 if ("Ensembl".equals(type))
244    {
245    /*
246    * e.g. Uniprot accession Q9BXM7 has
247    * <dbReference type="Ensembl" id="ENST00000321556">
248    * <molecule id="Q9BXM7-1"/>
249    * <property type="protein sequence ID" value="ENSP00000364204"/>
250    * <property type="gene ID" value="ENSG00000158828"/>
251    * </dbReference>
252    */
253  0 String cdsId = getProperty(dbref.getProperty(),
254    "protein sequence ID");
255  0 if (cdsId != null && cdsId.trim().length() > 0)
256    {
257  0 dbr = new DBRefEntry(DBRefSource.ENSEMBL,
258    DBRefSource.UNIPROT + ":" + dbVersion, cdsId.trim());
259  0 dbRefs.add(dbr);
260    }
261    }
262    }
263   
264    /*
265    * create features; they have either begin and end, or position, in XML
266    */
267  1 sequence.setPDBId(pdbRefs);
268  1 if (entry.getFeature() != null)
269    {
270  1 for (FeatureType uf : entry.getFeature())
271    {
272  9 LocationType location = uf.getLocation();
273  9 int start = 0;
274  9 int end = 0;
275  9 if (location.getPosition() != null)
276    {
277  4 start = location.getPosition().getPosition().intValue();
278  4 end = start;
279    }
280    else
281    {
282  5 start = location.getBegin().getPosition().intValue();
283  5 end = location.getEnd().getPosition().intValue();
284    }
285  9 SequenceFeature sf = new SequenceFeature(uf.getType(),
286    getDescription(uf), start, end, "Uniprot");
287  9 sf.setStatus(uf.getStatus());
288  9 sequence.addSequenceFeature(sf);
289    }
290    }
291  1 for (DBRefEntry dbr : dbRefs)
292    {
293  6 sequence.addDBRef(dbr);
294    }
295  1 return sequence;
296    }
297   
298    /**
299    * A helper method that builds a sequence feature description
300    *
301    * @param feature
302    * @return
303    */
 
304  21 toggle static String getDescription(FeatureType feature)
305    {
306  21 String orig = feature.getOriginal();
307  21 List<String> variants = feature.getVariation();
308  21 StringBuilder sb = new StringBuilder();
309   
310    /*
311    * append variant in standard format if present
312    * e.g. p.Arg59Lys
313    * multiple variants are split over lines using <br>
314    */
315  21 boolean asHtml = false;
316  21 if (orig != null && !orig.isEmpty() && variants != null
317    && !variants.isEmpty())
318    {
319  14 int p = 0;
320  14 for (String var : variants)
321    {
322    // TODO proper HGVS nomenclature for delins structural variations
323    // http://varnomen.hgvs.org/recommendations/protein/variant/delins/
324    // for now we are pragmatic - any orig/variant sequence longer than
325    // three characters is shown with single-character notation rather than
326    // three-letter notation
327  21 sb.append("p.");
328  21 if (orig.length() < 4)
329    {
330  36 for (int c = 0, clen = orig.length(); c < clen; c++)
331    {
332  21 char origchar = orig.charAt(c);
333  21 String orig3 = ResidueProperties.aa2Triplet.get("" + origchar);
334  21 sb.append(orig3 == null ? origchar
335    : StringUtils.toSentenceCase(orig3));
336    }
337    }
338    else
339    {
340  6 sb.append(orig);
341    }
342   
343  21 LocationType location = feature.getLocation();
344  21 PositionType start = location.getPosition() == null
345    ? location.getBegin()
346    : location.getPosition();
347  21 sb.append(Integer.toString(start.getPosition().intValue()));
348   
349  21 if (var.length() < 4)
350    {
351  52 for (int c = 0, clen = var.length(); c < clen; c++)
352    {
353  34 char varchar = var.charAt(c);
354  34 String var3 = ResidueProperties.aa2Triplet.get("" + varchar);
355   
356  34 sb.append(var3 != null ? StringUtils.toSentenceCase(var3)
357    : "" + varchar);
358    }
359    }
360    else
361    {
362  3 sb.append(var);
363    }
364  21 if (++p != variants.size())
365    {
366  7 sb.append("<br/>&nbsp;&nbsp;");
367  7 asHtml = true;
368    }
369    else
370    {
371  14 sb.append(" ");
372    }
373    }
374    }
375  21 String description = feature.getDescription();
376  21 if (description != null)
377    {
378  18 sb.append(description);
379    }
380  21 if (asHtml)
381    {
382  7 sb.insert(0, "<html>");
383  7 sb.append("</html>");
384    }
385   
386  21 return sb.toString();
387    }
388   
389    /**
390    * A helper method that searches the list of properties for one with the given
391    * key, and if found returns the property value, else returns null
392    *
393    * @param properties
394    * @param key
395    * @return
396    */
 
397  5 toggle static String getProperty(List<PropertyType> properties, String key)
398    {
399  5 String value = null;
400  5 if (properties != null)
401    {
402  5 for (PropertyType prop : properties)
403    {
404  7 if (key.equals(prop.getType()))
405    {
406  5 value = prop.getValue();
407  5 break;
408    }
409    }
410    }
411  5 return value;
412    }
413   
414    /**
415    * Extracts xml element entry/protein/recommendedName/fullName
416    *
417    * @param entry
418    * @return
419    */
 
420  2 toggle static String getUniprotEntryDescription(Entry entry)
421    {
422  2 String desc = "";
423  2 if (entry.getProtein() != null
424    && entry.getProtein().getRecommendedName() != null)
425    {
426    // fullName is mandatory if recommendedName is present
427  2 desc = entry.getProtein().getRecommendedName().getFullName()
428    .getValue();
429    }
430  2 return desc;
431    }
432   
433    /**
434    * Constructs a sequence id by concatenating all entry/name elements with '|'
435    * separator
436    *
437    * @param entry
438    * @return
439    */
 
440  2 toggle static String getUniprotEntryId(Entry entry)
441    {
442  2 StringBuilder name = new StringBuilder(32);
443  2 for (String n : entry.getName())
444    {
445  4 if (name.length() > 0)
446    {
447  2 name.append(BAR_DELIMITER);
448    }
449  4 name.append(n);
450    }
451  2 return name.toString();
452    }
453   
454    /*
455    * (non-Javadoc)
456    *
457    * @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String)
458    */
 
459  0 toggle @Override
460    public boolean isValidReference(String accession)
461    {
462    // TODO: make the following a standard validator
463  0 return (accession == null || accession.length() < 2) ? false
464    : getAccessionValidator().search(accession);
465    }
466   
467    /**
468    * return LDHA_CHICK uniprot entry
469    */
 
470  0 toggle @Override
471    public String getTestQuery()
472    {
473  0 return "P00340";
474    }
475   
 
476  1754 toggle @Override
477    public String getDbName()
478    {
479  1754 return "Uniprot"; // getDbSource();
480    }
481   
 
482  0 toggle @Override
483    public int getTier()
484    {
485  0 return 0;
486    }
487   
488    /**
489    * Reads the reply to the EBI Fetch Uniprot data query, unmarshals it to an
490    * Uniprot object, and returns the enclosed Entry objects, or null on any
491    * failure
492    *
493    * @param is
494    * @return
495    */
 
496  4 toggle public List<Entry> getUniprotEntries(InputStream is)
497    {
498  4 List<Entry> entries = null;
499  4 try
500    {
501  4 JAXBContext jc = JAXBContext
502    .newInstance("jalview.xml.binding.uniprot");
503  4 XMLStreamReader streamReader = XMLInputFactory.newInstance()
504    .createXMLStreamReader(is);
505  4 javax.xml.bind.Unmarshaller um = jc.createUnmarshaller();
506  4 JAXBElement<jalview.xml.binding.uniprot.Uniprot> uniprotElement =
507    um.unmarshal(streamReader, jalview.xml.binding.uniprot.Uniprot.class);
508  4 jalview.xml.binding.uniprot.Uniprot uniprot = uniprotElement.getValue();
509   
510  4 if (uniprot != null && !uniprot.getEntry().isEmpty())
511    {
512  4 entries = uniprot.getEntry();
513    }
514    } catch (JAXBException | XMLStreamException
515    | FactoryConfigurationError e)
516    {
517  0 e.printStackTrace();
518    }
519  4 return entries;
520    }
521    }