1 |
|
|
2 |
|
|
3 |
|
|
4 |
|
|
5 |
|
|
6 |
|
|
7 |
|
|
8 |
|
|
9 |
|
|
10 |
|
|
11 |
|
|
12 |
|
|
13 |
|
|
14 |
|
|
15 |
|
|
16 |
|
|
17 |
|
|
18 |
|
|
19 |
|
|
20 |
|
|
21 |
|
package jalview.ws.dbsources; |
22 |
|
|
23 |
|
import jalview.bin.Cache; |
24 |
|
import jalview.datamodel.Alignment; |
25 |
|
import jalview.datamodel.AlignmentI; |
26 |
|
import jalview.datamodel.DBRefEntry; |
27 |
|
import jalview.datamodel.DBRefSource; |
28 |
|
import jalview.datamodel.PDBEntry; |
29 |
|
import jalview.datamodel.Sequence; |
30 |
|
import jalview.datamodel.SequenceFeature; |
31 |
|
import jalview.datamodel.SequenceI; |
32 |
|
import jalview.datamodel.xdb.uniprot.UniprotEntry; |
33 |
|
import jalview.datamodel.xdb.uniprot.UniprotFeature; |
34 |
|
import jalview.datamodel.xdb.uniprot.UniprotFile; |
35 |
|
import jalview.ws.seqfetcher.DbSourceProxyImpl; |
36 |
|
|
37 |
|
import java.io.InputStream; |
38 |
|
import java.io.InputStreamReader; |
39 |
|
import java.io.Reader; |
40 |
|
import java.net.URL; |
41 |
|
import java.net.URLConnection; |
42 |
|
import java.util.ArrayList; |
43 |
|
import java.util.Vector; |
44 |
|
|
45 |
|
import org.exolab.castor.mapping.Mapping; |
46 |
|
import org.exolab.castor.xml.Unmarshaller; |
47 |
|
|
48 |
|
import com.stevesoft.pat.Regex; |
49 |
|
|
50 |
|
|
51 |
|
@author |
52 |
|
|
53 |
|
|
|
|
| 63.8% |
Uncovered Elements: 50 (138) |
Complexity: 35 |
Complexity Density: 0.37 |
|
54 |
|
public class Uniprot extends DbSourceProxyImpl |
55 |
|
{ |
56 |
|
private static final String DEFAULT_UNIPROT_DOMAIN = "https://www.uniprot.org"; |
57 |
|
|
58 |
|
private static final String BAR_DELIMITER = "|"; |
59 |
|
|
60 |
|
|
61 |
|
|
62 |
|
|
63 |
|
private static Mapping map; |
64 |
|
|
65 |
|
|
66 |
|
|
67 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
68 |
11 |
public Uniprot()... |
69 |
|
{ |
70 |
11 |
super(); |
71 |
|
} |
72 |
|
|
|
|
| 0% |
Uncovered Elements: 1 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
73 |
0 |
private String getDomain()... |
74 |
|
{ |
75 |
0 |
return Cache.getDefault("UNIPROT_DOMAIN", DEFAULT_UNIPROT_DOMAIN); |
76 |
|
} |
77 |
|
|
78 |
|
|
79 |
|
|
80 |
|
|
81 |
|
@see |
82 |
|
|
|
|
| 0% |
Uncovered Elements: 1 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
83 |
0 |
@Override... |
84 |
|
public String getAccessionSeparator() |
85 |
|
{ |
86 |
0 |
return null; |
87 |
|
} |
88 |
|
|
89 |
|
|
90 |
|
|
91 |
|
|
92 |
|
@see |
93 |
|
|
|
|
| 0% |
Uncovered Elements: 1 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
94 |
0 |
@Override... |
95 |
|
public Regex getAccessionValidator() |
96 |
|
{ |
97 |
0 |
return new Regex("([A-Z]+[0-9]+[A-Z0-9]+|[A-Z0-9]+_[A-Z0-9]+)"); |
98 |
|
} |
99 |
|
|
100 |
|
|
101 |
|
|
102 |
|
|
103 |
|
@see |
104 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
105 |
1114 |
@Override... |
106 |
|
public String getDbSource() |
107 |
|
{ |
108 |
1114 |
return DBRefSource.UNIPROT; |
109 |
|
} |
110 |
|
|
111 |
|
|
112 |
|
|
113 |
|
|
114 |
|
@see |
115 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
116 |
1 |
@Override... |
117 |
|
public String getDbVersion() |
118 |
|
{ |
119 |
1 |
return "0"; |
120 |
|
} |
121 |
|
|
122 |
|
|
123 |
|
|
124 |
|
|
125 |
|
|
126 |
|
|
127 |
|
@param |
128 |
|
@return |
129 |
|
|
|
|
| 88.2% |
Uncovered Elements: 2 (17) |
Complexity: 4 |
Complexity Density: 0.31 |
|
130 |
4 |
public Vector<UniprotEntry> getUniprotEntries(Reader fileReader)... |
131 |
|
{ |
132 |
4 |
UniprotFile uni = new UniprotFile(); |
133 |
4 |
try |
134 |
|
{ |
135 |
4 |
if (map == null) |
136 |
|
{ |
137 |
|
|
138 |
1 |
map = new Mapping(uni.getClass().getClassLoader()); |
139 |
1 |
URL url = getClass().getResource("/uniprot_mapping.xml"); |
140 |
1 |
map.loadMapping(url); |
141 |
|
} |
142 |
|
|
143 |
|
|
144 |
4 |
Unmarshaller unmar = new Unmarshaller(uni); |
145 |
4 |
unmar.setIgnoreExtraElements(true); |
146 |
4 |
unmar.setMapping(map); |
147 |
4 |
if (fileReader != null) |
148 |
|
{ |
149 |
4 |
uni = (UniprotFile) unmar.unmarshal(fileReader); |
150 |
|
} |
151 |
|
} catch (Exception e) |
152 |
|
{ |
153 |
0 |
System.out.println("Error getUniprotEntries() " + e); |
154 |
|
} |
155 |
|
|
156 |
4 |
return uni.getUniprotEntries(); |
157 |
|
} |
158 |
|
|
159 |
|
|
160 |
|
|
161 |
|
|
162 |
|
@see |
163 |
|
|
|
|
| 0% |
Uncovered Elements: 22 (22) |
Complexity: 3 |
Complexity Density: 0.15 |
|
164 |
0 |
@Override... |
165 |
|
public AlignmentI getSequenceRecords(String queries) throws Exception |
166 |
|
{ |
167 |
0 |
startQuery(); |
168 |
0 |
try |
169 |
|
{ |
170 |
0 |
queries = queries.toUpperCase().replaceAll( |
171 |
|
"(UNIPROT\\|?|UNIPROT_|UNIREF\\d+_|UNIREF\\d+\\|?)", ""); |
172 |
0 |
AlignmentI al = null; |
173 |
|
|
174 |
0 |
String downloadstring = getDomain() + "/uniprot/" + queries |
175 |
|
+ ".xml"; |
176 |
0 |
URL url = null; |
177 |
0 |
URLConnection urlconn = null; |
178 |
|
|
179 |
0 |
url = new URL(downloadstring); |
180 |
0 |
urlconn = url.openConnection(); |
181 |
0 |
InputStream istr = urlconn.getInputStream(); |
182 |
0 |
Vector<UniprotEntry> entries = getUniprotEntries( |
183 |
|
new InputStreamReader(istr, "UTF-8")); |
184 |
|
|
185 |
0 |
if (entries != null) |
186 |
|
{ |
187 |
0 |
ArrayList<SequenceI> seqs = new ArrayList<>(); |
188 |
0 |
for (UniprotEntry entry : entries) |
189 |
|
{ |
190 |
0 |
seqs.add(uniprotEntryToSequenceI(entry)); |
191 |
|
} |
192 |
0 |
al = new Alignment(seqs.toArray(new SequenceI[0])); |
193 |
|
|
194 |
|
} |
195 |
0 |
stopQuery(); |
196 |
0 |
return al; |
197 |
|
} catch (Exception e) |
198 |
|
{ |
199 |
0 |
throw (e); |
200 |
|
} finally |
201 |
|
{ |
202 |
0 |
stopQuery(); |
203 |
|
} |
204 |
|
} |
205 |
|
|
206 |
|
|
207 |
|
|
208 |
|
@param |
209 |
|
|
210 |
|
@return |
211 |
|
|
|
|
| 80.4% |
Uncovered Elements: 10 (51) |
Complexity: 10 |
Complexity Density: 0.27 |
|
212 |
1 |
public SequenceI uniprotEntryToSequenceI(UniprotEntry entry)... |
213 |
|
{ |
214 |
1 |
String id = getUniprotEntryId(entry); |
215 |
1 |
SequenceI sequence = new Sequence(id, |
216 |
|
entry.getUniprotSequence().getContent()); |
217 |
1 |
sequence.setDescription(getUniprotEntryDescription(entry)); |
218 |
|
|
219 |
1 |
final String dbVersion = getDbVersion(); |
220 |
1 |
ArrayList<DBRefEntry> dbRefs = new ArrayList<>(); |
221 |
1 |
for (String accessionId : entry.getAccession()) |
222 |
|
{ |
223 |
2 |
DBRefEntry dbRef = new DBRefEntry(DBRefSource.UNIPROT, dbVersion, |
224 |
|
accessionId); |
225 |
|
|
226 |
|
|
227 |
2 |
dbRefs.add(dbRef); |
228 |
|
} |
229 |
|
|
230 |
1 |
Vector<PDBEntry> onlyPdbEntries = new Vector<>(); |
231 |
1 |
for (PDBEntry pdb : entry.getDbReference()) |
232 |
|
{ |
233 |
3 |
DBRefEntry dbr = new DBRefEntry(); |
234 |
3 |
dbr.setSource(pdb.getType()); |
235 |
3 |
dbr.setAccessionId(pdb.getId()); |
236 |
3 |
dbr.setVersion(DBRefSource.UNIPROT + ":" + dbVersion); |
237 |
3 |
dbRefs.add(dbr); |
238 |
3 |
if ("PDB".equals(pdb.getType())) |
239 |
|
{ |
240 |
1 |
onlyPdbEntries.addElement(pdb); |
241 |
|
} |
242 |
3 |
if ("EMBL".equals(pdb.getType())) |
243 |
|
{ |
244 |
|
|
245 |
1 |
String cdsId = (String) pdb.getProperty("protein sequence ID"); |
246 |
1 |
if (cdsId != null && cdsId.trim().length() > 0) |
247 |
|
{ |
248 |
|
|
249 |
1 |
String[] vrs = cdsId.split("\\."); |
250 |
1 |
dbr = new DBRefEntry(DBRefSource.EMBLCDS, vrs.length > 1 ? vrs[1] |
251 |
|
: DBRefSource.UNIPROT + ":" + dbVersion, vrs[0]); |
252 |
1 |
dbRefs.add(dbr); |
253 |
|
} |
254 |
|
} |
255 |
3 |
if ("Ensembl".equals(pdb.getType())) |
256 |
|
{ |
257 |
|
|
258 |
|
|
259 |
|
|
260 |
|
|
261 |
|
|
262 |
|
|
263 |
|
|
264 |
0 |
String cdsId = (String) pdb.getProperty("protein sequence ID"); |
265 |
0 |
if (cdsId != null && cdsId.trim().length() > 0) |
266 |
|
{ |
267 |
0 |
dbr = new DBRefEntry(DBRefSource.ENSEMBL, |
268 |
|
DBRefSource.UNIPROT + ":" + dbVersion, cdsId.trim()); |
269 |
0 |
dbRefs.add(dbr); |
270 |
|
|
271 |
|
} |
272 |
|
} |
273 |
|
} |
274 |
|
|
275 |
1 |
sequence.setPDBId(onlyPdbEntries); |
276 |
1 |
if (entry.getFeature() != null) |
277 |
|
{ |
278 |
1 |
for (UniprotFeature uf : entry.getFeature()) |
279 |
|
{ |
280 |
6 |
SequenceFeature copy = new SequenceFeature(uf.getType(), |
281 |
|
uf.getDescription(), uf.getBegin(), uf.getEnd(), "Uniprot"); |
282 |
6 |
copy.setStatus(uf.getStatus()); |
283 |
6 |
sequence.addSequenceFeature(copy); |
284 |
|
} |
285 |
|
} |
286 |
1 |
for (DBRefEntry dbr : dbRefs) |
287 |
|
{ |
288 |
6 |
sequence.addDBRef(dbr); |
289 |
|
} |
290 |
1 |
return sequence; |
291 |
|
} |
292 |
|
|
293 |
|
|
294 |
|
|
295 |
|
@param |
296 |
|
|
297 |
|
@return |
298 |
|
|
|
|
| 92.3% |
Uncovered Elements: 1 (13) |
Complexity: 4 |
Complexity Density: 0.44 |
|
299 |
2 |
public static String getUniprotEntryDescription(UniprotEntry entry)... |
300 |
|
{ |
301 |
2 |
StringBuilder desc = new StringBuilder(32); |
302 |
2 |
if (entry.getProtein() != null && entry.getProtein().getName() != null) |
303 |
|
{ |
304 |
2 |
boolean first = true; |
305 |
2 |
for (String nm : entry.getProtein().getName()) |
306 |
|
{ |
307 |
4 |
if (!first) |
308 |
|
{ |
309 |
2 |
desc.append(" "); |
310 |
|
} |
311 |
4 |
first = false; |
312 |
4 |
desc.append(nm); |
313 |
|
} |
314 |
|
} |
315 |
2 |
return desc.toString(); |
316 |
|
} |
317 |
|
|
318 |
|
|
319 |
|
|
320 |
|
@param |
321 |
|
|
322 |
|
@return |
323 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (8) |
Complexity: 2 |
Complexity Density: 0.33 |
|
324 |
2 |
public static String getUniprotEntryId(UniprotEntry entry)... |
325 |
|
{ |
326 |
2 |
StringBuilder name = new StringBuilder(32); |
327 |
2 |
for (String n : entry.getName()) |
328 |
|
{ |
329 |
4 |
if (name.length() > 0) |
330 |
|
{ |
331 |
2 |
name.append(BAR_DELIMITER); |
332 |
|
} |
333 |
4 |
name.append(n); |
334 |
|
} |
335 |
2 |
return name.toString(); |
336 |
|
} |
337 |
|
|
338 |
|
|
339 |
|
|
340 |
|
|
341 |
|
@see |
342 |
|
|
|
|
| 0% |
Uncovered Elements: 3 (3) |
Complexity: 3 |
Complexity Density: 3 |
|
343 |
0 |
@Override... |
344 |
|
public boolean isValidReference(String accession) |
345 |
|
{ |
346 |
|
|
347 |
0 |
return (accession == null || accession.length() < 2) ? false |
348 |
|
: getAccessionValidator().search(accession); |
349 |
|
} |
350 |
|
|
351 |
|
|
352 |
|
|
353 |
|
|
|
|
| 0% |
Uncovered Elements: 1 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
354 |
0 |
@Override... |
355 |
|
public String getTestQuery() |
356 |
|
{ |
357 |
0 |
return "P00340"; |
358 |
|
} |
359 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
360 |
1108 |
@Override... |
361 |
|
public String getDbName() |
362 |
|
{ |
363 |
1108 |
return "Uniprot"; |
364 |
|
} |
365 |
|
|
|
|
| 0% |
Uncovered Elements: 1 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
366 |
0 |
@Override... |
367 |
|
public int getTier() |
368 |
|
{ |
369 |
0 |
return 0; |
370 |
|
} |
371 |
|
} |