Clover icon

Coverage Report

  1. Project Clover database Thu Dec 4 2025 16:11:35 GMT
  2. Package jalview.ws.dbsources

File Uniprot.java

 

Coverage histogram

../../../img/srcFileCovDistChart7.png
30% of files have more coverage

Code metrics

76
168
17
1
614
420
70
0.42
9.88
17
4.12

Classes

Class Line # Actions
Uniprot 71 168 70
0.701149470.1%
 

Contributing tests

This file is covered by 217 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.ws.dbsources;
22   
23    import java.io.InputStream;
24    import java.net.HttpURLConnection;
25    import java.net.URL;
26    import java.util.ArrayList;
27    import java.util.List;
28    import java.util.Locale;
29    import java.util.Vector;
30   
31    import javax.xml.bind.JAXBContext;
32    import javax.xml.bind.JAXBElement;
33    import javax.xml.bind.JAXBException;
34    import javax.xml.stream.FactoryConfigurationError;
35    import javax.xml.stream.XMLInputFactory;
36    import javax.xml.stream.XMLStreamException;
37    import javax.xml.stream.XMLStreamReader;
38   
39    import com.stevesoft.pat.Regex;
40   
41    import jalview.bin.Cache;
42    import jalview.bin.Console;
43    import jalview.datamodel.Alignment;
44    import jalview.datamodel.AlignmentI;
45    import jalview.datamodel.DBRefEntry;
46    import jalview.datamodel.DBRefSource;
47    import jalview.datamodel.PDBEntry;
48    import jalview.datamodel.Sequence;
49    import jalview.datamodel.SequenceFeature;
50    import jalview.datamodel.SequenceI;
51    import jalview.schemes.ResidueProperties;
52    import jalview.util.Platform;
53    import jalview.util.HttpUtils;
54    import jalview.util.StringUtils;
55    import jalview.ws.seqfetcher.DbSourceProxyImpl;
56    import jalview.xml.binding.uniprot.DbReferenceType;
57    import jalview.xml.binding.uniprot.Entry;
58    import jalview.xml.binding.uniprot.FeatureType;
59    import jalview.xml.binding.uniprot.LocationType;
60    import jalview.xml.binding.uniprot.PositionType;
61    import jalview.xml.binding.uniprot.PropertyType;
62   
63    /**
64    * This class queries the Uniprot database for sequence data, unmarshals the
65    * returned XML, and converts it to Jalview Sequence records (including attached
66    * database references and sequence features)
67    *
68    * @author JimP
69    *
70    */
 
71    public class Uniprot extends DbSourceProxyImpl
72    {
73    private static final String DEFAULT_UNIPROT_DOMAIN = "https://rest.uniprot.org";
74   
75    private static final String BAR_DELIMITER = "|";
76    private static Regex ACCESSION_REGEX;
77   
78    /**
79    * Constructor
80    */
 
81  23 toggle public Uniprot()
82    {
83  23 super();
84    }
85   
 
86  0 toggle private String getDomain()
87    {
88  0 return Cache.getDefault("UNIPROT_DOMAIN", DEFAULT_UNIPROT_DOMAIN);
89    }
90   
91    /*
92    * (non-Javadoc)
93    *
94    * @see jalview.ws.DbSourceProxy#getAccessionSeparator()
95    */
 
96  0 toggle @Override
97    public String getAccessionSeparator()
98    {
99  0 return null;
100    }
101   
102    /*
103    * (non-Javadoc)
104    *
105    * @see jalview.ws.DbSourceProxy#getAccessionValidator()
106    */
 
107  0 toggle @Override
108    public Regex getAccessionValidator()
109    {
110  0 if (ACCESSION_REGEX == null)
111    {
112  0 ACCESSION_REGEX = Platform
113    .newRegex("([A-Z]+[0-9]+[A-Z0-9]+|[A-Z0-9]+_[A-Z0-9]+)");
114    }
115  0 return ACCESSION_REGEX;
116    }
117   
118    /*
119    * (non-Javadoc)
120    *
121    * @see jalview.ws.DbSourceProxy#getDbSource()
122    */
 
123  8140 toggle @Override
124    public String getDbSource()
125    {
126  8140 return DBRefSource.UNIPROT;
127    }
128   
129    /*
130    * (non-Javadoc)
131    *
132    * @see jalview.ws.DbSourceProxy#getDbVersion()
133    */
 
134  2 toggle @Override
135    public String getDbVersion()
136    {
137  2 return "0"; // we really don't know what version we're on.
138    }
139   
140    /*
141    * (non-Javadoc)
142    *
143    * @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[])
144    */
 
145  0 toggle @Override
146    public AlignmentI getSequenceRecords(String queries) throws Exception
147    {
148  0 startQuery();
149  0 try
150    {
151  0 queries = queries.toUpperCase(Locale.ROOT).replaceAll(
152    "(UNIPROT\\|?|UNIPROT_|UNIREF\\d+_|UNIREF\\d+\\|?)", "");
153  0 AlignmentI al = null;
154   
155  0 String downloadstring = getDomain() + "/uniprotkb/" + queries
156    + ".xml";
157   
158  0 URL url = new URL(downloadstring);
159    // TODO: JAL-4107 - Verify this behaves correctly after 2.12 ? Why the change ? (Jim can't remember!)
160  0 HttpURLConnection urlconn = (HttpURLConnection) HttpUtils
161    .openConnection(url);
162    // anything other than 200 means we don't have data
163    // TODO: JAL-3882 reuse the EnsemblRestClient's fair
164    // use/backoff logic to retry when the server tells us to go away
165  0 if (urlconn.getResponseCode() == 200)
166    {
167  0 InputStream istr = urlconn.getInputStream();
168  0 List<Entry> entries = getUniprotEntries(istr);
169  0 if (entries != null)
170    {
171  0 List<SequenceI> seqs = new ArrayList<>();
172  0 for (Entry entry : entries)
173    {
174  0 seqs.add(uniprotEntryToSequence(entry));
175    }
176  0 al = new Alignment(seqs.toArray(new SequenceI[seqs.size()]));
177    }
178    }
179   
180  0 stopQuery();
181  0 return al;
182   
183    } catch (Exception e)
184    {
185  0 throw (e);
186    } finally
187    {
188  0 stopQuery();
189    }
190    }
191   
192    /**
193    * Converts an Entry object (bound from Uniprot XML) to a Jalview Sequence
194    *
195    * @param entry
196    * @return
197    */
 
198  2 toggle SequenceI uniprotEntryToSequence(Entry entry)
199    {
200  2 String id = getUniprotEntryId(entry);
201    /*
202    * Sequence should not include any whitespace, but JAXB leaves these in
203    */
204  2 String seqString = entry.getSequence().getValue().replaceAll("\\s*",
205    "");
206   
207  2 SequenceI sequence = new Sequence(id, seqString);
208  2 sequence.setDescription(getUniprotEntryDescription(entry));
209  2 final String uniprotRecordVersion = "" + entry.getVersion();
210    /*
211    * add a 'self' DBRefEntry for each accession
212    */
213  2 final String dbVersion = getDbVersion();
214  2 List<DBRefEntry> dbRefs = new ArrayList<>();
215  2 boolean canonical = true;
216  2 for (String accessionId : entry.getAccession())
217    {
218  4 DBRefEntry dbRef = new DBRefEntry(DBRefSource.UNIPROT,
219    uniprotRecordVersion, accessionId, null, canonical);
220  4 canonical = false;
221  4 dbRefs.add(dbRef);
222    }
223   
224    /*
225    * add a DBRefEntry for each dbReference element in the XML;
226    * also add a PDBEntry if type="PDB";
227    * also add an EMBLCDS dbref if protein sequence id is given
228    * also add an Ensembl dbref " " " " " "
229    */
230  2 Vector<PDBEntry> pdbRefs = new Vector<>();
231  2 for (DbReferenceType dbref : entry.getDbReference())
232    {
233  48 String type = dbref.getType();
234  48 DBRefEntry dbr = new DBRefEntry(type,
235    DBRefSource.UNIPROT + ":" + dbVersion, dbref.getId());
236  48 dbRefs.add(dbr);
237  48 if ("PDB".equals(type))
238    {
239  1 pdbRefs.add(new PDBEntry(dbr));
240    }
241  48 if ("EMBL".equals(type))
242    {
243    /*
244    * e.g. Uniprot accession Q9BXM7 has
245    * <dbReference type="EMBL" id="M19359">
246    * <property type="protein sequence ID" value="AAA40981.1"/>
247    * <property type="molecule type" value="Genomic_DNA"/>
248    * </dbReference>
249    */
250  9 String cdsId = getProperty(dbref.getProperty(),
251    "protein sequence ID");
252  9 if (cdsId != null && cdsId.trim().length() > 0)
253    {
254    // remove version
255  9 String[] vrs = cdsId.split("\\.");
256  9 String version = vrs.length > 1 ? vrs[1]
257    : DBRefSource.UNIPROT + ":" + uniprotRecordVersion;
258  9 dbr = new DBRefEntry(DBRefSource.EMBLCDS, version, vrs[0]);
259    // TODO: process VARIANT features to allow EMBLCDS record's product to
260    // match Uniprot
261  9 dbr.setCanonical(true);
262  9 dbRefs.add(dbr);
263    }
264    }
265    // from 2.11.2.6 - probably see a conflict here
266  48 if (type != null
267    && type.toLowerCase(Locale.ROOT).startsWith("ensembl"))
268    {
269    // remove version
270  0 String[] vrs = dbref.getId().split("\\.");
271  0 String version = vrs.length > 1 ? vrs[1]
272    : DBRefSource.UNIPROT + ":" + uniprotRecordVersion;
273  0 dbr.setAccessionId(vrs[0]);
274  0 dbr.setVersion(version);
275    /*
276    * e.g. Uniprot accession Q9BXM7 has
277    * <dbReference type="Ensembl" id="ENST00000321556">
278    * <molecule id="Q9BXM7-1"/>
279    * <property type="protein sequence ID" value="ENSP00000364204"/>
280    * <property type="gene ID" value="ENSG00000158828"/>
281    * </dbReference>
282    */
283  0 String cdsId = getProperty(dbref.getProperty(),
284    "protein sequence ID");
285  0 if (cdsId != null && cdsId.trim().length() > 0)
286    {
287    // remove version
288  0 String[] cdsVrs = cdsId.split("\\.");
289  0 String cdsVersion = cdsVrs.length > 1 ? cdsVrs[1]
290    : DBRefSource.UNIPROT + ":" + uniprotRecordVersion;
291  0 dbr = new DBRefEntry(DBRefSource.ENSEMBL,
292    DBRefSource.UNIPROT + ":" + cdsVersion, cdsVrs[0]);
293  0 dbRefs.add(dbr);
294    }
295    }
296    }
297   
298    /*
299    * create features; they have either begin and end, or position, in XML
300    */
301  2 sequence.setPDBId(pdbRefs);
302  2 if (entry.getFeature() != null)
303    {
304  2 for (FeatureType uf : entry.getFeature())
305    {
306  21 LocationType location = uf.getLocation();
307  21 int start = 0;
308  21 int end = 0;
309  21 String uncertain_start = null, uncertain_end = null,
310    uncertain_pos = null;
311  21 if (location.getPosition() != null)
312    {
313  8 if (location.getPosition().getPosition() == null
314    || "unknown".equals(location.getPosition().getStatus()))
315    {
316  0 Console.warn(
317    "Ignoring single position feature with uncertain location "
318    + uf.getType() + ":" + getDescription(uf));
319  0 uncertain_pos = location.getPosition().getStatus() == null
320    ? "unknown"
321    : location.getPosition().getStatus();
322    }
323    else
324    {
325  8 start = location.getPosition().getPosition().intValue();
326  8 end = start;
327    }
328    }
329    else
330    {
331  13 if (location.getBegin().getPosition() == null)
332    {
333  1 Console.warn(
334    "Setting start position of feature with uncertain start to 1: "
335    + uf.getType() + ":" + getDescription(uf));
336  1 start = sequence.getStart();
337  1 uncertain_start = location.getBegin().getStatus();
338    }
339    else
340    {
341  12 start = location.getBegin().getPosition().intValue();
342    }
343  13 if (location.getEnd().getPosition() == null)
344    {
345  1 Console.warn(
346    "Setting start position of feature with uncertain start to 1: "
347    + uf.getType() + ":" + getDescription(uf));
348  1 end = sequence.getEnd();
349  1 uncertain_end = location.getEnd().getStatus();
350    }
351    else
352    {
353  12 end = location.getEnd().getPosition().intValue();
354    }
355    }
356  21 SequenceFeature sf = new SequenceFeature(uf.getType(),
357    getDescription(uf), start, end, "Uniprot");
358  21 sf.setStatus(uf.getStatus());
359  21 if (uncertain_end != null)
360    {
361  1 sf.setValue("end_status", uncertain_end);
362    }
363  21 if (uncertain_start != null)
364    {
365  1 sf.setValue("start_status", uncertain_start);
366    }
367  21 if (uncertain_pos != null)
368    {
369  0 sf.setValue("pos_status", uncertain_pos);
370    }
371  21 sequence.addSequenceFeature(sf);
372    }
373    }
374  2 for (DBRefEntry dbr : dbRefs)
375    {
376  61 sequence.addDBRef(dbr);
377    }
378  2 return sequence;
379    }
380   
381    /**
382    * A helper method that builds a sequence feature description
383    *
384    * @param feature
385    * @return
386    */
 
387  35 toggle static String getDescription(FeatureType feature)
388    {
389  35 String orig = feature.getOriginal();
390  35 List<String> variants = feature.getVariation();
391  35 StringBuilder sb = new StringBuilder();
392   
393    /*
394    * append variant in standard format if present
395    * e.g. p.Arg59Lys
396    * multiple variants are split over lines using <br>
397    */
398  35 boolean asHtml = false;
399  35 if (orig != null && !orig.isEmpty() && variants != null
400    && !variants.isEmpty())
401    {
402  14 int p = 0;
403  14 for (String var : variants)
404    {
405    // TODO proper HGVS nomenclature for delins structural variations
406    // http://varnomen.hgvs.org/recommendations/protein/variant/delins/
407    // for now we are pragmatic - any orig/variant sequence longer than
408    // three characters is shown with single-character notation rather than
409    // three-letter notation
410  21 sb.append("p.");
411  21 if (orig.length() < 4)
412    {
413  36 for (int c = 0, clen = orig.length(); c < clen; c++)
414    {
415  21 char origchar = orig.charAt(c);
416  21 String orig3 = ResidueProperties.aa2Triplet.get("" + origchar);
417  21 sb.append(orig3 == null ? origchar
418    : StringUtils.toSentenceCase(orig3));
419    }
420    }
421    else
422    {
423  6 sb.append(orig);
424    }
425   
426  21 LocationType location = feature.getLocation();
427  21 PositionType start = location.getPosition() == null
428    ? location.getBegin()
429    : location.getPosition();
430  21 sb.append(Integer.toString(start.getPosition().intValue()));
431   
432  21 if (var.length() < 4)
433    {
434  52 for (int c = 0, clen = var.length(); c < clen; c++)
435    {
436  34 char varchar = var.charAt(c);
437  34 String var3 = ResidueProperties.aa2Triplet.get("" + varchar);
438   
439  34 sb.append(var3 != null ? StringUtils.toSentenceCase(var3)
440    : "" + varchar);
441    }
442    }
443    else
444    {
445  3 sb.append(var);
446    }
447  21 if (++p != variants.size())
448    {
449  7 sb.append("<br/>&nbsp;&nbsp;");
450  7 asHtml = true;
451    }
452    else
453    {
454  14 sb.append(" ");
455    }
456    }
457    }
458  35 String description = feature.getDescription();
459  35 if (description != null)
460    {
461  26 sb.append(description);
462    }
463  35 if (asHtml)
464    {
465  7 sb.insert(0, "<html>");
466  7 sb.append("</html>");
467    }
468   
469  35 return sb.toString();
470    }
471   
472    /**
473    * A helper method that searches the list of properties for one with the given
474    * key, and if found returns the property value, else returns null
475    *
476    * @param properties
477    * @param key
478    * @return
479    */
 
480  13 toggle static String getProperty(List<PropertyType> properties, String key)
481    {
482  13 String value = null;
483  13 if (properties != null)
484    {
485  13 for (PropertyType prop : properties)
486    {
487  15 if (key.equals(prop.getType()))
488    {
489  13 value = prop.getValue();
490  13 break;
491    }
492    }
493    }
494  13 return value;
495    }
496   
497    /**
498    * Extracts xml element entry/protein/recommendedName/fullName
499    *
500    * @param entry
501    * @return
502    */
 
503  3 toggle static String getUniprotEntryDescription(Entry entry)
504    {
505  3 String desc = "";
506  3 if (entry.getProtein() != null
507    && entry.getProtein().getRecommendedName() != null)
508    {
509    // fullName is mandatory if recommendedName is present
510  3 desc = entry.getProtein().getRecommendedName().getFullName()
511    .getValue();
512    }
513  3 return desc;
514    }
515   
516    /**
517    * Constructs a sequence id by concatenating all entry/name elements with '|'
518    * separator
519    *
520    * @param entry
521    * @return
522    */
 
523  3 toggle static String getUniprotEntryId(Entry entry)
524    {
525  3 StringBuilder name = new StringBuilder(32);
526  3 for (String n : entry.getName())
527    {
528  5 if (name.length() > 0)
529    {
530  2 name.append(BAR_DELIMITER);
531    }
532  5 name.append(n);
533    }
534  3 return name.toString();
535    }
536   
537    /*
538    * (non-Javadoc)
539    *
540    * @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String)
541    */
 
542  0 toggle @Override
543    public boolean isValidReference(String accession)
544    {
545    // TODO: make the following a standard validator
546  0 return (accession == null || accession.length() < 2) ? false
547    : getAccessionValidator().search(accession);
548    }
549   
550    /**
551    * return LDHA_CHICK uniprot entry
552    */
 
553  0 toggle @Override
554    public String getTestQuery()
555    {
556  0 return "P00340";
557    }
558   
 
559  8123 toggle @Override
560    public String getDbName()
561    {
562  8123 return "Uniprot"; // getDbSource();
563    }
564   
 
565  0 toggle @Override
566    public int getTier()
567    {
568  0 return 0;
569    }
570   
571    /**
572    * Reads the reply to the EBI Fetch Uniprot data query, unmarshals it to an
573    * Uniprot object, and returns the enclosed Entry objects, or null on any
574    * failure
575    *
576    * @param is
577    * @return
578    */
 
579  5 toggle public List<Entry> getUniprotEntries(InputStream is)
580    {
581  5 List<Entry> entries = null;
582  5 try
583    {
584  5 JAXBContext jc = JAXBContext
585    .newInstance("jalview.xml.binding.uniprot");
586  5 XMLStreamReader streamReader = XMLInputFactory.newInstance()
587    .createXMLStreamReader(is);
588  5 javax.xml.bind.Unmarshaller um = jc.createUnmarshaller();
589  5 JAXBElement<jalview.xml.binding.uniprot.Uniprot> uniprotElement = um
590    .unmarshal(streamReader,
591    jalview.xml.binding.uniprot.Uniprot.class);
592  5 jalview.xml.binding.uniprot.Uniprot uniprot = uniprotElement
593    .getValue();
594   
595  5 if (uniprot != null && !uniprot.getEntry().isEmpty())
596    {
597  5 entries = uniprot.getEntry();
598    }
599    } catch (JAXBException | XMLStreamException
600    | FactoryConfigurationError e)
601    {
602  0 if (e instanceof javax.xml.bind.UnmarshalException
603    && e.getCause() != null
604    && e.getCause() instanceof XMLStreamException
605    && e.getCause().getMessage().contains("[row,col]:[1,1]"))
606    {
607    // trying to parse an empty stream
608  0 return null;
609    }
610  0 e.printStackTrace();
611    }
612  5 return entries;
613    }
614    }