1. Project Clover database Fri Dec 6 2024 13:47:14 GMT
  2. Package jalview.ext.ensembl

File EnsemblFeatures.java

 

Coverage histogram

../../../img/srcFileCovDistChart0.png
60% of files have more coverage

Code metrics

22
64
12
1
307
169
25
0.39
5.33
12
2.08

Classes

Class
Line #
Actions
EnsemblFeatures 50 64 25
0.00%
 

Contributing tests

No tests hitting this source file were found.

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.ext.ensembl;
22   
23    import jalview.datamodel.Alignment;
24    import jalview.datamodel.AlignmentI;
25    import jalview.datamodel.Sequence;
26    import jalview.datamodel.SequenceFeature;
27    import jalview.datamodel.SequenceI;
28    import jalview.io.gff.SequenceOntologyI;
29    import jalview.util.JSONUtils;
30    import jalview.util.Platform;
31   
32    import java.io.BufferedReader;
33    import java.io.IOException;
34    import java.net.MalformedURLException;
35    import java.net.URL;
36    import java.util.ArrayList;
37    import java.util.Iterator;
38    import java.util.List;
39    import java.util.Map;
40   
41    import org.json.simple.parser.ParseException;
42   
43    /**
44    * A client for fetching and processing Ensembl feature data in GFF format by
45    * calling the overlap REST service
46    *
47    * @author gmcarstairs
48    * @see http://rest.ensembl.org/documentation/info/overlap_id
49    */
 
50    class EnsemblFeatures extends EnsemblRestClient
51    {
52    /*
53    * The default features to retrieve from Ensembl
54    * can override in getSequenceRecords parameter
55    */
56    private EnsemblFeatureType[] featuresWanted = { EnsemblFeatureType.cds,
57    EnsemblFeatureType.exon, EnsemblFeatureType.variation };
58   
59    /**
60    * Default constructor (to use rest.ensembl.org)
61    */
 
62  0 toggle public EnsemblFeatures()
63    {
64  0 super();
65    }
66   
67    /**
68    * Constructor given the target domain to fetch data from
69    *
70    * @param d
71    */
 
72  0 toggle public EnsemblFeatures(String d)
73    {
74  0 super(d);
75    }
76   
 
77  0 toggle @Override
78    public String getDbName()
79    {
80  0 return "ENSEMBL (features)";
81    }
82   
83    /**
84    * Makes a query to the REST overlap endpoint for the given sequence
85    * identifier. This returns an 'alignment' consisting of one 'dummy sequence'
86    * (the genomic sequence for which overlap features are returned by the
87    * service). This sequence will have on it sequence features which are the
88    * real information of interest, such as CDS regions or sequence variations.
89    */
 
90  0 toggle @Override
91    public AlignmentI getSequenceRecords(String query) throws IOException
92    {
93    // TODO: use a vararg String... for getSequenceRecords instead?
94   
95  0 List<String> queries = new ArrayList<>();
96  0 queries.add(query);
97  0 SequenceI seq = parseFeaturesJson(queries);
98  0 if (seq == null)
99  0 return null;
100  0 return new Alignment(new SequenceI[] { seq });
101   
102    }
103   
104    /**
105    * Parses the JSON response into Jalview sequence features and attaches them
106    * to a dummy sequence
107    *
108    * @param br
109    * @return
110    */
 
111  0 toggle @SuppressWarnings("unchecked")
112    private SequenceI parseFeaturesJson(List<String> queries)
113    {
114  0 SequenceI seq = new Sequence("Dummy", "");
115  0 try
116    {
117  0 Iterator<Object> rvals = (Iterator<Object>) getJSON(null, queries, -1,
118    MODE_ITERATOR, null);
119  0 if (rvals == null)
120    {
121  0 return null;
122    }
123  0 while (rvals.hasNext())
124    {
125  0 try
126    {
127  0 Map<String, Object> obj = (Map<String, Object>) rvals.next();
128  0 String type = obj.get("feature_type").toString();
129  0 int start = Integer.parseInt(obj.get("start").toString());
130  0 int end = Integer.parseInt(obj.get("end").toString());
131  0 String source = obj.get("source").toString();
132  0 String strand = obj.get("strand").toString();
133  0 Object phase = obj.get("phase");
134  0 String alleles = JSONUtils
135    .arrayToStringList((List<Object>) obj.get("alleles"));
136  0 String clinSig = JSONUtils.arrayToStringList(
137    (List<Object>) obj.get("clinical_significance"));
138   
139    /*
140    * convert 'variation' to 'sequence_variant', and 'cds' to 'CDS'
141    * so as to have a valid SO term for the feature type
142    * ('gene', 'exon', 'transcript' don't need any conversion)
143    */
144  0 if ("variation".equals(type))
145    {
146  0 type = SequenceOntologyI.SEQUENCE_VARIANT;
147    }
148  0 else if (SequenceOntologyI.CDS.equalsIgnoreCase((type)))
149    {
150  0 type = SequenceOntologyI.CDS;
151    }
152   
153  0 String desc = getFirstNotNull(obj, "alleles", "external_name",
154    JSON_ID);
155  0 SequenceFeature sf = new SequenceFeature(type, desc, start, end,
156    source);
157  0 sf.setStrand("1".equals(strand) ? "+" : "-");
158  0 if (phase != null)
159    {
160  0 sf.setPhase(phase.toString());
161    }
162  0 setFeatureAttribute(sf, obj, "id");
163  0 setFeatureAttribute(sf, obj, "Parent");
164  0 setFeatureAttribute(sf, obj, "consequence_type");
165  0 sf.setValue("alleles", alleles);
166  0 sf.setValue("clinical_significance", clinSig);
167   
168  0 seq.addSequenceFeature(sf);
169   
170    } catch (Throwable t)
171    {
172    // ignore - keep trying other features
173    }
174    }
175    } catch (ParseException | IOException e)
176    {
177  0 e.printStackTrace();
178    // ignore
179    }
180   
181  0 return seq;
182    }
183   
184    /**
185    * Returns the first non-null attribute found (if any) as a string, formatted
186    * suitably for display as feature description or tooltip. Answers null if
187    * none of the attribute keys is present.
188    *
189    * @param obj
190    * @param keys
191    * @return
192    */
 
193  0 toggle @SuppressWarnings("unchecked")
194    protected String getFirstNotNull(Map<String, Object> obj, String... keys)
195    {
196  0 for (String key : keys)
197    {
198  0 Object val = obj.get(key);
199  0 if (val != null)
200    {
201  0 String s = val instanceof List<?>
202    ? JSONUtils.arrayToStringList((List<Object>) val)
203    : val.toString();
204  0 if (!s.isEmpty())
205    {
206  0 return s;
207    }
208    }
209    }
210  0 return null;
211    }
212   
213    /**
214    * A helper method that reads the 'key' entry in the JSON object, and if not
215    * null, sets its string value as an attribute on the sequence feature
216    *
217    * @param sf
218    * @param obj
219    * @param key
220    */
 
221  0 toggle protected void setFeatureAttribute(SequenceFeature sf,
222    Map<String, Object> obj, String key)
223    {
224  0 Object object = obj.get(key);
225  0 if (object != null)
226    {
227  0 sf.setValue(key, object.toString());
228    }
229    }
230   
231    /**
232    * Returns a URL for the REST overlap endpoint
233    *
234    * @param ids
235    * @return
236    */
 
237  0 toggle @Override
238    protected URL getUrl(List<String> ids) throws MalformedURLException
239    {
240  0 StringBuffer urlstring = new StringBuffer(128);
241  0 urlstring.append(getDomain()).append("/overlap/id/").append(ids.get(0));
242   
243    // @see https://github.com/Ensembl/ensembl-rest/wiki/Output-formats
244  0 urlstring.append("?content-type=" + getResponseMimeType());
245   
246    /*
247    * specify object_type=gene in case is shared by transcript and/or protein;
248    * currently only fetching features for gene sequences;
249    * refactor in future if needed to fetch for transcripts
250    */
251  0 urlstring.append("&").append(OBJECT_TYPE).append("=")
252    .append(OBJECT_TYPE_GENE);
253   
254    /*
255    * specify features to retrieve
256    * @see http://rest.ensembl.org/documentation/info/overlap_id
257    * could make the list a configurable entry in .jalview_properties
258    */
259  0 for (EnsemblFeatureType feature : featuresWanted)
260    {
261  0 urlstring.append("&feature=").append(feature.name());
262    }
263   
264  0 return new URL(urlstring.toString());
265    }
266   
 
267  0 toggle @Override
268    protected boolean useGetRequest()
269    {
270  0 return true;
271    }
272   
273    /**
274    * Returns the MIME type for GFF3. For GET requests the Content-type header
275    * describes the required encoding of the response.
276    */
 
277  0 toggle @Override
278    protected String getRequestMimeType()
279    {
280  0 return "application/json";
281    }
282   
283    /**
284    * Returns the MIME type wanted for the response
285    */
 
286  0 toggle @Override
287    protected String getResponseMimeType()
288    {
289  0 return "application/json";
290    }
291   
292    /**
293    * Overloaded method that allows a list of features to retrieve to be
294    * specified
295    *
296    * @param accId
297    * @param features
298    * @return
299    * @throws IOException
300    */
 
301  0 toggle protected AlignmentI getSequenceRecords(String accId,
302    EnsemblFeatureType[] features) throws IOException
303    {
304  0 featuresWanted = features;
305  0 return getSequenceRecords(accId);
306    }
307    }