Class |
Line # |
Actions |
|||
---|---|---|---|---|---|
EnsemblFeatures | 50 | 64 | 25 |
1 | /* | |
2 | * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) | |
3 | * Copyright (C) $$Year-Rel$$ The Jalview Authors | |
4 | * | |
5 | * This file is part of Jalview. | |
6 | * | |
7 | * Jalview is free software: you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation, either version 3 | |
10 | * of the License, or (at your option) any later version. | |
11 | * | |
12 | * Jalview is distributed in the hope that it will be useful, but | |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty | |
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR | |
15 | * PURPOSE. See the GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with Jalview. If not, see <http://www.gnu.org/licenses/>. | |
19 | * The Jalview Authors are detailed in the 'AUTHORS' file. | |
20 | */ | |
21 | package jalview.ext.ensembl; | |
22 | ||
23 | import jalview.datamodel.Alignment; | |
24 | import jalview.datamodel.AlignmentI; | |
25 | import jalview.datamodel.Sequence; | |
26 | import jalview.datamodel.SequenceFeature; | |
27 | import jalview.datamodel.SequenceI; | |
28 | import jalview.io.gff.SequenceOntologyI; | |
29 | import jalview.util.JSONUtils; | |
30 | import jalview.util.Platform; | |
31 | ||
32 | import java.io.BufferedReader; | |
33 | import java.io.IOException; | |
34 | import java.net.MalformedURLException; | |
35 | import java.net.URL; | |
36 | import java.util.ArrayList; | |
37 | import java.util.Iterator; | |
38 | import java.util.List; | |
39 | import java.util.Map; | |
40 | ||
41 | import org.json.simple.parser.ParseException; | |
42 | ||
43 | /** | |
44 | * A client for fetching and processing Ensembl feature data in GFF format by | |
45 | * calling the overlap REST service | |
46 | * | |
47 | * @author gmcarstairs | |
48 | * @see http://rest.ensembl.org/documentation/info/overlap_id | |
49 | */ | |
50 | class EnsemblFeatures extends EnsemblRestClient | |
51 | { | |
52 | /* | |
53 | * The default features to retrieve from Ensembl | |
54 | * can override in getSequenceRecords parameter | |
55 | */ | |
56 | private EnsemblFeatureType[] featuresWanted = { EnsemblFeatureType.cds, | |
57 | EnsemblFeatureType.exon, EnsemblFeatureType.variation }; | |
58 | ||
59 | /** | |
60 | * Default constructor (to use rest.ensembl.org) | |
61 | */ | |
62 | 0 | public EnsemblFeatures() |
63 | { | |
64 | 0 | super(); |
65 | } | |
66 | ||
67 | /** | |
68 | * Constructor given the target domain to fetch data from | |
69 | * | |
70 | * @param d | |
71 | */ | |
72 | 0 | public EnsemblFeatures(String d) |
73 | { | |
74 | 0 | super(d); |
75 | } | |
76 | ||
77 | 0 | @Override |
78 | public String getDbName() | |
79 | { | |
80 | 0 | return "ENSEMBL (features)"; |
81 | } | |
82 | ||
83 | /** | |
84 | * Makes a query to the REST overlap endpoint for the given sequence | |
85 | * identifier. This returns an 'alignment' consisting of one 'dummy sequence' | |
86 | * (the genomic sequence for which overlap features are returned by the | |
87 | * service). This sequence will have on it sequence features which are the | |
88 | * real information of interest, such as CDS regions or sequence variations. | |
89 | */ | |
90 | 0 | @Override |
91 | public AlignmentI getSequenceRecords(String query) throws IOException | |
92 | { | |
93 | // TODO: use a vararg String... for getSequenceRecords instead? | |
94 | ||
95 | 0 | List<String> queries = new ArrayList<>(); |
96 | 0 | queries.add(query); |
97 | 0 | SequenceI seq = parseFeaturesJson(queries); |
98 | 0 | if (seq == null) |
99 | 0 | return null; |
100 | 0 | return new Alignment(new SequenceI[] { seq }); |
101 | ||
102 | } | |
103 | ||
104 | /** | |
105 | * Parses the JSON response into Jalview sequence features and attaches them | |
106 | * to a dummy sequence | |
107 | * | |
108 | * @param br | |
109 | * @return | |
110 | */ | |
111 | 0 | @SuppressWarnings("unchecked") |
112 | private SequenceI parseFeaturesJson(List<String> queries) | |
113 | { | |
114 | 0 | SequenceI seq = new Sequence("Dummy", ""); |
115 | 0 | try |
116 | { | |
117 | 0 | Iterator<Object> rvals = (Iterator<Object>) getJSON(null, queries, -1, |
118 | MODE_ITERATOR, null); | |
119 | 0 | if (rvals == null) |
120 | { | |
121 | 0 | return null; |
122 | } | |
123 | 0 | while (rvals.hasNext()) |
124 | { | |
125 | 0 | try |
126 | { | |
127 | 0 | Map<String, Object> obj = (Map<String, Object>) rvals.next(); |
128 | 0 | String type = obj.get("feature_type").toString(); |
129 | 0 | int start = Integer.parseInt(obj.get("start").toString()); |
130 | 0 | int end = Integer.parseInt(obj.get("end").toString()); |
131 | 0 | String source = obj.get("source").toString(); |
132 | 0 | String strand = obj.get("strand").toString(); |
133 | 0 | Object phase = obj.get("phase"); |
134 | 0 | String alleles = JSONUtils |
135 | .arrayToStringList((List<Object>) obj.get("alleles")); | |
136 | 0 | String clinSig = JSONUtils.arrayToStringList( |
137 | (List<Object>) obj.get("clinical_significance")); | |
138 | ||
139 | /* | |
140 | * convert 'variation' to 'sequence_variant', and 'cds' to 'CDS' | |
141 | * so as to have a valid SO term for the feature type | |
142 | * ('gene', 'exon', 'transcript' don't need any conversion) | |
143 | */ | |
144 | 0 | if ("variation".equals(type)) |
145 | { | |
146 | 0 | type = SequenceOntologyI.SEQUENCE_VARIANT; |
147 | } | |
148 | 0 | else if (SequenceOntologyI.CDS.equalsIgnoreCase((type))) |
149 | { | |
150 | 0 | type = SequenceOntologyI.CDS; |
151 | } | |
152 | ||
153 | 0 | String desc = getFirstNotNull(obj, "alleles", "external_name", |
154 | JSON_ID); | |
155 | 0 | SequenceFeature sf = new SequenceFeature(type, desc, start, end, |
156 | source); | |
157 | 0 | sf.setStrand("1".equals(strand) ? "+" : "-"); |
158 | 0 | if (phase != null) |
159 | { | |
160 | 0 | sf.setPhase(phase.toString()); |
161 | } | |
162 | 0 | setFeatureAttribute(sf, obj, "id"); |
163 | 0 | setFeatureAttribute(sf, obj, "Parent"); |
164 | 0 | setFeatureAttribute(sf, obj, "consequence_type"); |
165 | 0 | sf.setValue("alleles", alleles); |
166 | 0 | sf.setValue("clinical_significance", clinSig); |
167 | ||
168 | 0 | seq.addSequenceFeature(sf); |
169 | ||
170 | } catch (Throwable t) | |
171 | { | |
172 | // ignore - keep trying other features | |
173 | } | |
174 | } | |
175 | } catch (ParseException | IOException e) | |
176 | { | |
177 | 0 | e.printStackTrace(); |
178 | // ignore | |
179 | } | |
180 | ||
181 | 0 | return seq; |
182 | } | |
183 | ||
184 | /** | |
185 | * Returns the first non-null attribute found (if any) as a string, formatted | |
186 | * suitably for display as feature description or tooltip. Answers null if | |
187 | * none of the attribute keys is present. | |
188 | * | |
189 | * @param obj | |
190 | * @param keys | |
191 | * @return | |
192 | */ | |
193 | 0 | @SuppressWarnings("unchecked") |
194 | protected String getFirstNotNull(Map<String, Object> obj, String... keys) | |
195 | { | |
196 | 0 | for (String key : keys) |
197 | { | |
198 | 0 | Object val = obj.get(key); |
199 | 0 | if (val != null) |
200 | { | |
201 | 0 | String s = val instanceof List<?> |
202 | ? JSONUtils.arrayToStringList((List<Object>) val) | |
203 | : val.toString(); | |
204 | 0 | if (!s.isEmpty()) |
205 | { | |
206 | 0 | return s; |
207 | } | |
208 | } | |
209 | } | |
210 | 0 | return null; |
211 | } | |
212 | ||
213 | /** | |
214 | * A helper method that reads the 'key' entry in the JSON object, and if not | |
215 | * null, sets its string value as an attribute on the sequence feature | |
216 | * | |
217 | * @param sf | |
218 | * @param obj | |
219 | * @param key | |
220 | */ | |
221 | 0 | protected void setFeatureAttribute(SequenceFeature sf, |
222 | Map<String, Object> obj, String key) | |
223 | { | |
224 | 0 | Object object = obj.get(key); |
225 | 0 | if (object != null) |
226 | { | |
227 | 0 | sf.setValue(key, object.toString()); |
228 | } | |
229 | } | |
230 | ||
231 | /** | |
232 | * Returns a URL for the REST overlap endpoint | |
233 | * | |
234 | * @param ids | |
235 | * @return | |
236 | */ | |
237 | 0 | @Override |
238 | protected URL getUrl(List<String> ids) throws MalformedURLException | |
239 | { | |
240 | 0 | StringBuffer urlstring = new StringBuffer(128); |
241 | 0 | urlstring.append(getDomain()).append("/overlap/id/").append(ids.get(0)); |
242 | ||
243 | // @see https://github.com/Ensembl/ensembl-rest/wiki/Output-formats | |
244 | 0 | urlstring.append("?content-type=" + getResponseMimeType()); |
245 | ||
246 | /* | |
247 | * specify object_type=gene in case is shared by transcript and/or protein; | |
248 | * currently only fetching features for gene sequences; | |
249 | * refactor in future if needed to fetch for transcripts | |
250 | */ | |
251 | 0 | urlstring.append("&").append(OBJECT_TYPE).append("=") |
252 | .append(OBJECT_TYPE_GENE); | |
253 | ||
254 | /* | |
255 | * specify features to retrieve | |
256 | * @see http://rest.ensembl.org/documentation/info/overlap_id | |
257 | * could make the list a configurable entry in .jalview_properties | |
258 | */ | |
259 | 0 | for (EnsemblFeatureType feature : featuresWanted) |
260 | { | |
261 | 0 | urlstring.append("&feature=").append(feature.name()); |
262 | } | |
263 | ||
264 | 0 | return new URL(urlstring.toString()); |
265 | } | |
266 | ||
267 | 0 | @Override |
268 | protected boolean useGetRequest() | |
269 | { | |
270 | 0 | return true; |
271 | } | |
272 | ||
273 | /** | |
274 | * Returns the MIME type for GFF3. For GET requests the Content-type header | |
275 | * describes the required encoding of the response. | |
276 | */ | |
277 | 0 | @Override |
278 | protected String getRequestMimeType() | |
279 | { | |
280 | 0 | return "application/json"; |
281 | } | |
282 | ||
283 | /** | |
284 | * Returns the MIME type wanted for the response | |
285 | */ | |
286 | 0 | @Override |
287 | protected String getResponseMimeType() | |
288 | { | |
289 | 0 | return "application/json"; |
290 | } | |
291 | ||
292 | /** | |
293 | * Overloaded method that allows a list of features to retrieve to be | |
294 | * specified | |
295 | * | |
296 | * @param accId | |
297 | * @param features | |
298 | * @return | |
299 | * @throws IOException | |
300 | */ | |
301 | 0 | protected AlignmentI getSequenceRecords(String accId, |
302 | EnsemblFeatureType[] features) throws IOException | |
303 | { | |
304 | 0 | featuresWanted = features; |
305 | 0 | return getSequenceRecords(accId); |
306 | } | |
307 | } |