Clover icon

jalviewX

  1. Project Clover database Wed Oct 31 2018 15:13:58 GMT
  2. Package jalview.ws.dbsources

File Uniprot.java

 

Coverage histogram

../../../img/srcFileCovDistChart7.png
28% of files have more coverage

Code metrics

28
95
15
1
371
236
35
0.37
6.33
15
2.33

Classes

Class Line # Actions
Uniprot 54 95 35 50
0.637681263.8%
 

Contributing tests

This file is covered by 82 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.ws.dbsources;
22   
23    import jalview.bin.Cache;
24    import jalview.datamodel.Alignment;
25    import jalview.datamodel.AlignmentI;
26    import jalview.datamodel.DBRefEntry;
27    import jalview.datamodel.DBRefSource;
28    import jalview.datamodel.PDBEntry;
29    import jalview.datamodel.Sequence;
30    import jalview.datamodel.SequenceFeature;
31    import jalview.datamodel.SequenceI;
32    import jalview.datamodel.xdb.uniprot.UniprotEntry;
33    import jalview.datamodel.xdb.uniprot.UniprotFeature;
34    import jalview.datamodel.xdb.uniprot.UniprotFile;
35    import jalview.ws.seqfetcher.DbSourceProxyImpl;
36   
37    import java.io.InputStream;
38    import java.io.InputStreamReader;
39    import java.io.Reader;
40    import java.net.URL;
41    import java.net.URLConnection;
42    import java.util.ArrayList;
43    import java.util.Vector;
44   
45    import org.exolab.castor.mapping.Mapping;
46    import org.exolab.castor.xml.Unmarshaller;
47   
48    import com.stevesoft.pat.Regex;
49   
50    /**
51    * @author JimP
52    *
53    */
 
54    public class Uniprot extends DbSourceProxyImpl
55    {
56    private static final String DEFAULT_UNIPROT_DOMAIN = "https://www.uniprot.org";
57   
58    private static final String BAR_DELIMITER = "|";
59   
60    /*
61    * Castor mapping loaded from uniprot_mapping.xml
62    */
63    private static Mapping map;
64   
65    /**
66    * Constructor
67    */
 
68  11 toggle public Uniprot()
69    {
70  11 super();
71    }
72   
 
73  0 toggle private String getDomain()
74    {
75  0 return Cache.getDefault("UNIPROT_DOMAIN", DEFAULT_UNIPROT_DOMAIN);
76    }
77   
78    /*
79    * (non-Javadoc)
80    *
81    * @see jalview.ws.DbSourceProxy#getAccessionSeparator()
82    */
 
83  0 toggle @Override
84    public String getAccessionSeparator()
85    {
86  0 return null;
87    }
88   
89    /*
90    * (non-Javadoc)
91    *
92    * @see jalview.ws.DbSourceProxy#getAccessionValidator()
93    */
 
94  0 toggle @Override
95    public Regex getAccessionValidator()
96    {
97  0 return new Regex("([A-Z]+[0-9]+[A-Z0-9]+|[A-Z0-9]+_[A-Z0-9]+)");
98    }
99   
100    /*
101    * (non-Javadoc)
102    *
103    * @see jalview.ws.DbSourceProxy#getDbSource()
104    */
 
105  1114 toggle @Override
106    public String getDbSource()
107    {
108  1114 return DBRefSource.UNIPROT;
109    }
110   
111    /*
112    * (non-Javadoc)
113    *
114    * @see jalview.ws.DbSourceProxy#getDbVersion()
115    */
 
116  1 toggle @Override
117    public String getDbVersion()
118    {
119  1 return "0"; // we really don't know what version we're on.
120    }
121   
122    /**
123    * Reads a file containing the reply to the EBI Fetch Uniprot data query,
124    * unmarshals it to a UniprotFile object, and returns the list of UniprotEntry
125    * data models (mapped from &lt;entry&gt; elements)
126    *
127    * @param fileReader
128    * @return
129    */
 
130  4 toggle public Vector<UniprotEntry> getUniprotEntries(Reader fileReader)
131    {
132  4 UniprotFile uni = new UniprotFile();
133  4 try
134    {
135  4 if (map == null)
136    {
137    // 1. Load the mapping information from the file
138  1 map = new Mapping(uni.getClass().getClassLoader());
139  1 URL url = getClass().getResource("/uniprot_mapping.xml");
140  1 map.loadMapping(url);
141    }
142   
143    // 2. Unmarshal the data
144  4 Unmarshaller unmar = new Unmarshaller(uni);
145  4 unmar.setIgnoreExtraElements(true);
146  4 unmar.setMapping(map);
147  4 if (fileReader != null)
148    {
149  4 uni = (UniprotFile) unmar.unmarshal(fileReader);
150    }
151    } catch (Exception e)
152    {
153  0 System.out.println("Error getUniprotEntries() " + e);
154    }
155   
156  4 return uni.getUniprotEntries();
157    }
158   
159    /*
160    * (non-Javadoc)
161    *
162    * @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[])
163    */
 
164  0 toggle @Override
165    public AlignmentI getSequenceRecords(String queries) throws Exception
166    {
167  0 startQuery();
168  0 try
169    {
170  0 queries = queries.toUpperCase().replaceAll(
171    "(UNIPROT\\|?|UNIPROT_|UNIREF\\d+_|UNIREF\\d+\\|?)", "");
172  0 AlignmentI al = null;
173   
174  0 String downloadstring = getDomain() + "/uniprot/" + queries
175    + ".xml";
176  0 URL url = null;
177  0 URLConnection urlconn = null;
178   
179  0 url = new URL(downloadstring);
180  0 urlconn = url.openConnection();
181  0 InputStream istr = urlconn.getInputStream();
182  0 Vector<UniprotEntry> entries = getUniprotEntries(
183    new InputStreamReader(istr, "UTF-8"));
184   
185  0 if (entries != null)
186    {
187  0 ArrayList<SequenceI> seqs = new ArrayList<>();
188  0 for (UniprotEntry entry : entries)
189    {
190  0 seqs.add(uniprotEntryToSequenceI(entry));
191    }
192  0 al = new Alignment(seqs.toArray(new SequenceI[0]));
193   
194    }
195  0 stopQuery();
196  0 return al;
197    } catch (Exception e)
198    {
199  0 throw (e);
200    } finally
201    {
202  0 stopQuery();
203    }
204    }
205   
206    /**
207    *
208    * @param entry
209    * UniprotEntry
210    * @return SequenceI instance created from the UniprotEntry instance
211    */
 
212  1 toggle public SequenceI uniprotEntryToSequenceI(UniprotEntry entry)
213    {
214  1 String id = getUniprotEntryId(entry);
215  1 SequenceI sequence = new Sequence(id,
216    entry.getUniprotSequence().getContent());
217  1 sequence.setDescription(getUniprotEntryDescription(entry));
218   
219  1 final String dbVersion = getDbVersion();
220  1 ArrayList<DBRefEntry> dbRefs = new ArrayList<>();
221  1 for (String accessionId : entry.getAccession())
222    {
223  2 DBRefEntry dbRef = new DBRefEntry(DBRefSource.UNIPROT, dbVersion,
224    accessionId);
225   
226    // mark dbRef as a primary reference for this sequence
227  2 dbRefs.add(dbRef);
228    }
229   
230  1 Vector<PDBEntry> onlyPdbEntries = new Vector<>();
231  1 for (PDBEntry pdb : entry.getDbReference())
232    {
233  3 DBRefEntry dbr = new DBRefEntry();
234  3 dbr.setSource(pdb.getType());
235  3 dbr.setAccessionId(pdb.getId());
236  3 dbr.setVersion(DBRefSource.UNIPROT + ":" + dbVersion);
237  3 dbRefs.add(dbr);
238  3 if ("PDB".equals(pdb.getType()))
239    {
240  1 onlyPdbEntries.addElement(pdb);
241    }
242  3 if ("EMBL".equals(pdb.getType()))
243    {
244    // look for a CDS reference and add it, too.
245  1 String cdsId = (String) pdb.getProperty("protein sequence ID");
246  1 if (cdsId != null && cdsId.trim().length() > 0)
247    {
248    // remove version
249  1 String[] vrs = cdsId.split("\\.");
250  1 dbr = new DBRefEntry(DBRefSource.EMBLCDS, vrs.length > 1 ? vrs[1]
251    : DBRefSource.UNIPROT + ":" + dbVersion, vrs[0]);
252  1 dbRefs.add(dbr);
253    }
254    }
255  3 if ("Ensembl".equals(pdb.getType()))
256    {
257    /*UniprotXML
258    * <dbReference type="Ensembl" id="ENST00000321556">
259    * <molecule id="Q9BXM7-1"/>
260    * <property type="protein sequence ID" value="ENSP00000364204"/>
261    * <property type="gene ID" value="ENSG00000158828"/>
262    * </dbReference>
263    */
264  0 String cdsId = (String) pdb.getProperty("protein sequence ID");
265  0 if (cdsId != null && cdsId.trim().length() > 0)
266    {
267  0 dbr = new DBRefEntry(DBRefSource.ENSEMBL,
268    DBRefSource.UNIPROT + ":" + dbVersion, cdsId.trim());
269  0 dbRefs.add(dbr);
270   
271    }
272    }
273    }
274   
275  1 sequence.setPDBId(onlyPdbEntries);
276  1 if (entry.getFeature() != null)
277    {
278  1 for (UniprotFeature uf : entry.getFeature())
279    {
280  6 SequenceFeature copy = new SequenceFeature(uf.getType(),
281    uf.getDescription(), uf.getBegin(), uf.getEnd(), "Uniprot");
282  6 copy.setStatus(uf.getStatus());
283  6 sequence.addSequenceFeature(copy);
284    }
285    }
286  1 for (DBRefEntry dbr : dbRefs)
287    {
288  6 sequence.addDBRef(dbr);
289    }
290  1 return sequence;
291    }
292   
293    /**
294    *
295    * @param entry
296    * UniportEntry
297    * @return protein name(s) delimited by a white space character
298    */
 
299  2 toggle public static String getUniprotEntryDescription(UniprotEntry entry)
300    {
301  2 StringBuilder desc = new StringBuilder(32);
302  2 if (entry.getProtein() != null && entry.getProtein().getName() != null)
303    {
304  2 boolean first = true;
305  2 for (String nm : entry.getProtein().getName())
306    {
307  4 if (!first)
308    {
309  2 desc.append(" ");
310    }
311  4 first = false;
312  4 desc.append(nm);
313    }
314    }
315  2 return desc.toString();
316    }
317   
318    /**
319    *
320    * @param entry
321    * UniprotEntry
322    * @return The accession id(s) and name(s) delimited by '|'.
323    */
 
324  2 toggle public static String getUniprotEntryId(UniprotEntry entry)
325    {
326  2 StringBuilder name = new StringBuilder(32);
327  2 for (String n : entry.getName())
328    {
329  4 if (name.length() > 0)
330    {
331  2 name.append(BAR_DELIMITER);
332    }
333  4 name.append(n);
334    }
335  2 return name.toString();
336    }
337   
338    /*
339    * (non-Javadoc)
340    *
341    * @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String)
342    */
 
343  0 toggle @Override
344    public boolean isValidReference(String accession)
345    {
346    // TODO: make the following a standard validator
347  0 return (accession == null || accession.length() < 2) ? false
348    : getAccessionValidator().search(accession);
349    }
350   
351    /**
352    * return LDHA_CHICK uniprot entry
353    */
 
354  0 toggle @Override
355    public String getTestQuery()
356    {
357  0 return "P00340";
358    }
359   
 
360  1108 toggle @Override
361    public String getDbName()
362    {
363  1108 return "Uniprot"; // getDbSource();
364    }
365   
 
366  0 toggle @Override
367    public int getTier()
368    {
369  0 return 0;
370    }
371    }