Clover icon

Coverage Report

  1. Project Clover database Wed Nov 13 2024 18:27:33 GMT
  2. Package jalview.ws.dbsources

File TDBeacons.java

 

Coverage histogram

../../../img/srcFileCovDistChart0.png
59% of files have more coverage

Code metrics

52
135
17
1
526
339
53
0.39
7.94
17
3.12

Classes

Class Line # Actions
TDBeacons 69 135 53
0.00%
 

Contributing tests

No tests hitting this source file were found.

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.ws.dbsources;
22   
23    import java.io.InputStream;
24    import java.net.URL;
25    import java.net.URLConnection;
26    import java.util.ArrayList;
27    import java.util.List;
28    import java.util.Locale;
29    import java.util.Vector;
30   
31    import javax.xml.bind.JAXBContext;
32    import javax.xml.bind.JAXBElement;
33    import javax.xml.bind.JAXBException;
34    import javax.xml.stream.FactoryConfigurationError;
35    import javax.xml.stream.XMLInputFactory;
36    import javax.xml.stream.XMLStreamException;
37    import javax.xml.stream.XMLStreamReader;
38   
39    import com.stevesoft.pat.Regex;
40   
41    import jalview.bin.Cache;
42    import jalview.datamodel.Alignment;
43    import jalview.datamodel.AlignmentI;
44    import jalview.datamodel.DBRefEntry;
45    import jalview.datamodel.DBRefSource;
46    import jalview.datamodel.PDBEntry;
47    import jalview.datamodel.Sequence;
48    import jalview.datamodel.SequenceFeature;
49    import jalview.datamodel.SequenceI;
50    import jalview.schemes.ResidueProperties;
51    import jalview.util.HttpUtils;
52    import jalview.util.StringUtils;
53    import jalview.ws.seqfetcher.DbSourceProxyImpl;
54    import jalview.xml.binding.uniprot.DbReferenceType;
55    import jalview.xml.binding.uniprot.Entry;
56    import jalview.xml.binding.uniprot.FeatureType;
57    import jalview.xml.binding.uniprot.LocationType;
58    import jalview.xml.binding.uniprot.PositionType;
59    import jalview.xml.binding.uniprot.PropertyType;
60   
61    /**
62    * This class queries the Uniprot database for sequence data, unmarshals the
63    * returned XML, and converts it to Jalview Sequence records (including attached
64    * database references and sequence features)
65    *
66    * @author JimP
67    *
68    */
 
69    public class TDBeacons extends DbSourceProxyImpl
70    {
71    private static final String DEFAULT_UNIPROT_DOMAIN = "https://www.uniprot.org";
72   
73    private static final String BAR_DELIMITER = "|";
74   
75    private static final String DEFAULT_THREEDBEACONS_DOMAIN = "https://wwwdev.ebi.ac.uk/pdbe/pdbe-kb/3dbeacons-hub-api/uniprot/summary/";
76   
77    /**
78    * Constructor
79    */
 
80  0 toggle public TDBeacons()
81    {
82  0 super();
83    }
84   
 
85  0 toggle private String getDomain()
86    {
87  0 return Cache.getDefault("UNIPROT_DOMAIN", DEFAULT_UNIPROT_DOMAIN);
88    // return Cache.getDefault("3DB_DOMAIN", DEFAULT_THREEDBEACONS_DOMAIN );
89    }
90   
91    /*
92    * (non-Javadoc)
93    *
94    * @see jalview.ws.DbSourceProxy#getAccessionSeparator()
95    */
 
96  0 toggle @Override
97    public String getAccessionSeparator()
98    {
99  0 return null;
100    }
101   
102    /*
103    * (non-Javadoc)
104    *
105    * @see jalview.ws.DbSourceProxy#getAccessionValidator()
106    */
 
107  0 toggle @Override
108    public Regex getAccessionValidator()
109    {
110  0 return new Regex("([A-Z]+[0-9]+[A-Z0-9]+|[A-Z0-9]+_[A-Z0-9]+)");
111    }
112   
113    /*
114    * (non-Javadoc)
115    *
116    * @see jalview.ws.DbSourceProxy#getDbSource()
117    */
 
118  0 toggle @Override
119    public String getDbSource()
120    {
121  0 return "3d-beacons";// DBRefSource.UNIPROT;
122    }
123   
124    /*
125    * (non-Javadoc)
126    *
127    * @see jalview.ws.DbSourceProxy#getDbVersion()
128    */
 
129  0 toggle @Override
130    public String getDbVersion()
131    {
132  0 return "0"; // we really don't know what version we're on.
133    }
134   
135    /*
136    * (non-Javadoc)
137    *
138    * @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[])
139    */
 
140  0 toggle @Override
141    public AlignmentI getSequenceRecords(String queries) throws Exception
142    {
143  0 startQuery();
144  0 try
145    {
146  0 queries = queries.toUpperCase(Locale.ROOT).replaceAll(
147    "(UNIPROT\\|?|UNIPROT_|UNIREF\\d+_|UNIREF\\d+\\|?)", "");
148  0 AlignmentI al = null;
149   
150  0 String downloadstring = getDomain() + "/uniprot/" + queries + ".xml";
151    // String downloadstring = getDomain() + queries + ".json";
152   
153  0 URL url = new URL(downloadstring);
154  0 URLConnection urlconn = HttpUtils.openConnection(url);
155  0 InputStream istr = urlconn.getInputStream();
156  0 List<Entry> entries = getUniprotEntries(istr);
157  0 if (entries != null)
158    {
159  0 List<SequenceI> seqs = new ArrayList<>();
160  0 for (Entry entry : entries)
161    {
162  0 seqs.add(uniprotEntryToSequence(entry));
163    }
164  0 al = new Alignment(seqs.toArray(new SequenceI[seqs.size()]));
165    }
166   
167  0 stopQuery();
168  0 return al;
169    } catch (Exception e)
170    {
171  0 throw (e);
172    } finally
173    {
174  0 stopQuery();
175    }
176    }
177   
178    /**
179    * Converts an Entry object (bound from Uniprot XML) to a Jalview Sequence
180    *
181    * @param entry
182    * @return
183    */
 
184  0 toggle SequenceI uniprotEntryToSequence(Entry entry)
185    {
186  0 String id = getUniprotEntryId(entry);
187    /*
188    * Sequence should not include any whitespace, but JAXB leaves these in
189    */
190  0 String seqString = entry.getSequence().getValue().replaceAll("\\s*",
191    "");
192   
193  0 SequenceI sequence = new Sequence(id, seqString);
194  0 sequence.setDescription(getUniprotEntryDescription(entry));
195   
196    /*
197    * add a 'self' DBRefEntry for each accession
198    */
199  0 final String dbVersion = getDbVersion();
200  0 List<DBRefEntry> dbRefs = new ArrayList<>();
201  0 for (String accessionId : entry.getAccession())
202    {
203  0 DBRefEntry dbRef = new DBRefEntry(DBRefSource.UNIPROT, dbVersion,
204    accessionId);
205  0 dbRefs.add(dbRef);
206    }
207   
208    /*
209    * add a DBRefEntry for each dbReference element in the XML;
210    * also add a PDBEntry if type="PDB";
211    * also add an EMBLCDS dbref if protein sequence id is given
212    * also add an Ensembl dbref " " " " " "
213    */
214  0 Vector<PDBEntry> pdbRefs = new Vector<>();
215  0 for (DbReferenceType dbref : entry.getDbReference())
216    {
217  0 String type = dbref.getType();
218  0 DBRefEntry dbr = new DBRefEntry(type,
219    DBRefSource.UNIPROT + ":" + dbVersion, dbref.getId());
220  0 dbRefs.add(dbr);
221  0 if ("PDB".equals(type))
222    {
223  0 pdbRefs.add(new PDBEntry(dbr));
224    }
225  0 if ("EMBL".equals(type))
226    {
227    /*
228    * e.g. Uniprot accession Q9BXM7 has
229    * <dbReference type="EMBL" id="M19359">
230    * <property type="protein sequence ID" value="AAA40981.1"/>
231    * <property type="molecule type" value="Genomic_DNA"/>
232    * </dbReference>
233    */
234  0 String cdsId = getProperty(dbref.getProperty(),
235    "protein sequence ID");
236  0 if (cdsId != null && cdsId.trim().length() > 0)
237    {
238    // remove version
239  0 String[] vrs = cdsId.split("\\.");
240  0 String version = vrs.length > 1 ? vrs[1]
241    : DBRefSource.UNIPROT + ":" + dbVersion;
242  0 dbr = new DBRefEntry(DBRefSource.EMBLCDS, version, vrs[0]);
243  0 dbRefs.add(dbr);
244    }
245    }
246  0 if ("Ensembl".equals(type))
247    {
248    /*
249    * e.g. Uniprot accession Q9BXM7 has
250    * <dbReference type="Ensembl" id="ENST00000321556">
251    * <molecule id="Q9BXM7-1"/>
252    * <property type="protein sequence ID" value="ENSP00000364204"/>
253    * <property type="gene ID" value="ENSG00000158828"/>
254    * </dbReference>
255    */
256  0 String cdsId = getProperty(dbref.getProperty(),
257    "protein sequence ID");
258  0 if (cdsId != null && cdsId.trim().length() > 0)
259    {
260  0 dbr = new DBRefEntry(DBRefSource.ENSEMBL,
261    DBRefSource.UNIPROT + ":" + dbVersion, cdsId.trim());
262  0 dbRefs.add(dbr);
263    }
264    }
265    }
266   
267    /*
268    * create features; they have either begin and end, or position, in XML
269    */
270  0 sequence.setPDBId(pdbRefs);
271  0 if (entry.getFeature() != null)
272    {
273  0 for (FeatureType uf : entry.getFeature())
274    {
275  0 LocationType location = uf.getLocation();
276  0 int start = 0;
277  0 int end = 0;
278  0 if (location.getPosition() != null)
279    {
280  0 start = location.getPosition().getPosition().intValue();
281  0 end = start;
282    }
283    else
284    {
285  0 start = location.getBegin().getPosition().intValue();
286  0 end = location.getEnd().getPosition().intValue();
287    }
288  0 SequenceFeature sf = new SequenceFeature(uf.getType(),
289    getDescription(uf), start, end, "Uniprot");
290  0 sf.setStatus(uf.getStatus());
291  0 sequence.addSequenceFeature(sf);
292    }
293    }
294  0 for (DBRefEntry dbr : dbRefs)
295    {
296  0 sequence.addDBRef(dbr);
297    }
298  0 return sequence;
299    }
300   
301    /**
302    * A helper method that builds a sequence feature description
303    *
304    * @param feature
305    * @return
306    */
 
307  0 toggle static String getDescription(FeatureType feature)
308    {
309  0 String orig = feature.getOriginal();
310  0 List<String> variants = feature.getVariation();
311  0 StringBuilder sb = new StringBuilder();
312   
313    /*
314    * append variant in standard format if present
315    * e.g. p.Arg59Lys
316    * multiple variants are split over lines using <br>
317    */
318  0 boolean asHtml = false;
319  0 if (orig != null && !orig.isEmpty() && variants != null
320    && !variants.isEmpty())
321    {
322  0 int p = 0;
323  0 for (String var : variants)
324    {
325    // TODO proper HGVS nomenclature for delins structural variations
326    // http://varnomen.hgvs.org/recommendations/protein/variant/delins/
327    // for now we are pragmatic - any orig/variant sequence longer than
328    // three characters is shown with single-character notation rather than
329    // three-letter notation
330  0 sb.append("p.");
331  0 if (orig.length() < 4)
332    {
333  0 for (int c = 0, clen = orig.length(); c < clen; c++)
334    {
335  0 char origchar = orig.charAt(c);
336  0 String orig3 = ResidueProperties.aa2Triplet.get("" + origchar);
337  0 sb.append(orig3 == null ? origchar
338    : StringUtils.toSentenceCase(orig3));
339    }
340    }
341    else
342    {
343  0 sb.append(orig);
344    }
345   
346  0 LocationType location = feature.getLocation();
347  0 PositionType start = location.getPosition() == null
348    ? location.getBegin()
349    : location.getPosition();
350  0 sb.append(Integer.toString(start.getPosition().intValue()));
351   
352  0 if (var.length() < 4)
353    {
354  0 for (int c = 0, clen = var.length(); c < clen; c++)
355    {
356  0 char varchar = var.charAt(c);
357  0 String var3 = ResidueProperties.aa2Triplet.get("" + varchar);
358   
359  0 sb.append(var3 != null ? StringUtils.toSentenceCase(var3)
360    : "" + varchar);
361    }
362    }
363    else
364    {
365  0 sb.append(var);
366    }
367  0 if (++p != variants.size())
368    {
369  0 sb.append("<br/>&nbsp;&nbsp;");
370  0 asHtml = true;
371    }
372    else
373    {
374  0 sb.append(" ");
375    }
376    }
377    }
378  0 String description = feature.getDescription();
379  0 if (description != null)
380    {
381  0 sb.append(description);
382    }
383  0 if (asHtml)
384    {
385  0 sb.insert(0, "<html>");
386  0 sb.append("</html>");
387    }
388   
389  0 return sb.toString();
390    }
391   
392    /**
393    * A helper method that searches the list of properties for one with the given
394    * key, and if found returns the property value, else returns null
395    *
396    * @param properties
397    * @param key
398    * @return
399    */
 
400  0 toggle static String getProperty(List<PropertyType> properties, String key)
401    {
402  0 String value = null;
403  0 if (properties != null)
404    {
405  0 for (PropertyType prop : properties)
406    {
407  0 if (key.equals(prop.getType()))
408    {
409  0 value = prop.getValue();
410  0 break;
411    }
412    }
413    }
414  0 return value;
415    }
416   
417    /**
418    * Extracts xml element entry/protein/recommendedName/fullName
419    *
420    * @param entry
421    * @return
422    */
 
423  0 toggle static String getUniprotEntryDescription(Entry entry)
424    {
425  0 String desc = "";
426  0 if (entry.getProtein() != null
427    && entry.getProtein().getRecommendedName() != null)
428    {
429    // fullName is mandatory if recommendedName is present
430  0 desc = entry.getProtein().getRecommendedName().getFullName()
431    .getValue();
432    }
433  0 return desc;
434    }
435   
436    /**
437    * Constructs a sequence id by concatenating all entry/name elements with '|'
438    * separator
439    *
440    * @param entry
441    * @return
442    */
 
443  0 toggle static String getUniprotEntryId(Entry entry)
444    {
445  0 StringBuilder name = new StringBuilder(32);
446  0 for (String n : entry.getName())
447    {
448  0 if (name.length() > 0)
449    {
450  0 name.append(BAR_DELIMITER);
451    }
452  0 name.append(n);
453    }
454  0 return name.toString();
455    }
456   
457    /*
458    * (non-Javadoc)
459    *
460    * @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String)
461    */
 
462  0 toggle @Override
463    public boolean isValidReference(String accession)
464    {
465    // TODO: make the following a standard validator
466  0 return (accession == null || accession.length() < 2) ? false
467    : getAccessionValidator().search(accession);
468    }
469   
470    /**
471    * return LDHA_CHICK uniprot entry
472    */
 
473  0 toggle @Override
474    public String getTestQuery()
475    {
476  0 return "P00340";
477    }
478   
 
479  0 toggle @Override
480    public String getDbName()
481    {
482  0 return "Uniprot"; // getDbSource();
483    }
484   
 
485  0 toggle @Override
486    public int getTier()
487    {
488  0 return 0;
489    }
490   
491    /**
492    * Reads the reply to the EBI Fetch Uniprot data query, unmarshals it to an
493    * Uniprot object, and returns the enclosed Entry objects, or null on any
494    * failure
495    *
496    * @param is
497    * @return
498    */
 
499  0 toggle public List<Entry> getUniprotEntries(InputStream is)
500    {
501  0 List<Entry> entries = null;
502  0 try
503    {
504  0 JAXBContext jc = JAXBContext
505    .newInstance("jalview.xml.binding.uniprot");
506  0 XMLStreamReader streamReader = XMLInputFactory.newInstance()
507    .createXMLStreamReader(is);
508  0 javax.xml.bind.Unmarshaller um = jc.createUnmarshaller();
509  0 JAXBElement<jalview.xml.binding.uniprot.Uniprot> uniprotElement = um
510    .unmarshal(streamReader,
511    jalview.xml.binding.uniprot.Uniprot.class);
512  0 jalview.xml.binding.uniprot.Uniprot uniprot = uniprotElement
513    .getValue();
514   
515  0 if (uniprot != null && !uniprot.getEntry().isEmpty())
516    {
517  0 entries = uniprot.getEntry();
518    }
519    } catch (JAXBException | XMLStreamException
520    | FactoryConfigurationError e)
521    {
522  0 e.printStackTrace();
523    }
524  0 return entries;
525    }
526    }