Clover icon

Coverage Report

  1. Project Clover database Wed Nov 13 2024 18:27:33 GMT
  2. Package jalview.ext.ensembl

File EnsemblMap.java

 

Coverage histogram

../../../img/srcFileCovDistChart0.png
59% of files have more coverage

Code metrics

26
82
14
1
393
217
33
0.4
5.86
14
2.36

Classes

Class Line # Actions
EnsemblMap 53 82 33
0.00%
 

Contributing tests

No tests hitting this source file were found.

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.ext.ensembl;
22   
23    import jalview.datamodel.AlignmentI;
24    import jalview.datamodel.DBRefSource;
25    import jalview.datamodel.GeneLociI;
26    import jalview.datamodel.GeneLocus;
27    import jalview.datamodel.Mapping;
28    import jalview.util.MapList;
29   
30    import java.io.IOException;
31    import java.net.MalformedURLException;
32    import java.net.URL;
33    import java.util.ArrayList;
34    import java.util.Collections;
35    import java.util.Iterator;
36    import java.util.List;
37    import java.util.Map;
38   
39    import org.json.simple.parser.ParseException;
40   
41    /**
42    * A client for the Ensembl REST service /map endpoint, to convert from
43    * coordinates of one genome assembly to another.
44    * <p>
45    * Note that species and assembly identifiers passed to this class must be valid
46    * in Ensembl. They are not case sensitive.
47    *
48    * @author gmcarstairs
49    * @see https://rest.ensembl.org/documentation/info/assembly_map
50    * @see https://rest.ensembl.org/info/assembly/human?content-type=text/xml
51    * @see https://rest.ensembl.org/info/species?content-type=text/xml
52    */
 
53    public class EnsemblMap extends EnsemblRestClient
54    {
55    private static final String MAPPED = "mapped";
56   
57    private static final String MAPPINGS = "mappings";
58   
59    private static final String CDS = "cds";
60   
61    private static final String CDNA = "cdna";
62   
63    /**
64    * Default constructor (to use rest.ensembl.org)
65    */
 
66  0 toggle public EnsemblMap()
67    {
68  0 super();
69    }
70   
71    /**
72    * Constructor given the target domain to fetch data from
73    *
74    * @param
75    */
 
76  0 toggle public EnsemblMap(String domain)
77    {
78  0 super(domain);
79    }
80   
 
81  0 toggle @Override
82    public String getDbName()
83    {
84  0 return DBRefSource.ENSEMBL;
85    }
86   
 
87  0 toggle @Override
88    public AlignmentI getSequenceRecords(String queries) throws Exception
89    {
90  0 return null; // not used
91    }
92   
93    /**
94    * Constructs a URL of the format <code>
95    * http://rest.ensembl.org/map/human/GRCh38/17:45051610..45109016:1/GRCh37?content-type=application/json
96    * </code>
97    *
98    * @param species
99    * @param chromosome
100    * @param fromRef
101    * @param toRef
102    * @param startPos
103    * @param endPos
104    * @return
105    * @throws MalformedURLException
106    */
 
107  0 toggle protected URL getAssemblyMapUrl(String species, String chromosome,
108    String fromRef, String toRef, int startPos, int endPos)
109    throws MalformedURLException
110    {
111    /*
112    * start-end might be reverse strand - present forwards to the service
113    */
114  0 boolean forward = startPos <= endPos;
115  0 int start = forward ? startPos : endPos;
116  0 int end = forward ? endPos : startPos;
117  0 String strand = forward ? "1" : "-1";
118  0 String url = String.format(
119    "%s/map/%s/%s/%s:%d..%d:%s/%s?content-type=application/json",
120    getDomain(), species, fromRef, chromosome, start, end, strand,
121    toRef);
122  0 return new URL(url);
123    }
124   
 
125  0 toggle @Override
126    protected boolean useGetRequest()
127    {
128  0 return true;
129    }
130   
 
131  0 toggle @Override
132    protected URL getUrl(List<String> ids) throws MalformedURLException
133    {
134  0 return null; // not used
135    }
136   
137    /**
138    * Calls the REST /map service to get the chromosomal coordinates (start/end)
139    * in 'toRef' that corresponding to the (start/end) queryRange in 'fromRef'
140    *
141    * @param species
142    * @param chromosome
143    * @param fromRef
144    * @param toRef
145    * @param queryRange
146    * @return
147    * @see http://rest.ensemblgenomes.org/documentation/info/assembly_map
148    */
 
149  0 toggle public int[] getAssemblyMapping(String species, String chromosome,
150    String fromRef, String toRef, int[] queryRange)
151    {
152  0 URL url = null;
153  0 try
154    {
155  0 url = getAssemblyMapUrl(species, chromosome, fromRef, toRef,
156    queryRange[0], queryRange[1]);
157  0 return (parseAssemblyMappingResponse(url));
158    } catch (Throwable t)
159    {
160  0 jalview.bin.Console
161    .outPrintln("Error calling " + url + ": " + t.getMessage());
162  0 return null;
163    }
164    }
165   
166    /**
167    * Parses the JSON response from the /map/&lt;species&gt;/ REST service. The
168    * format is (with some fields omitted)
169    *
170    * <pre>
171    * {"mappings":
172    * [{
173    * "original": {"end":45109016,"start":45051610},
174    * "mapped" : {"end":43186384,"start":43128978}
175    * }] }
176    * </pre>
177    *
178    * @param br
179    * @return
180    */
 
181  0 toggle @SuppressWarnings("unchecked")
182    protected int[] parseAssemblyMappingResponse(URL url)
183    {
184  0 int[] result = null;
185   
186  0 try
187    {
188  0 Iterator<Object> rvals = (Iterator<Object>) getJSON(url, null, -1,
189    MODE_ITERATOR, MAPPINGS);
190  0 if (rvals == null)
191    {
192  0 return null;
193    }
194  0 while (rvals.hasNext())
195    {
196    // todo check for "mapped"
197  0 Map<String, Object> val = (Map<String, Object>) rvals.next();
198  0 Map<String, Object> mapped = (Map<String, Object>) val.get(MAPPED);
199  0 int start = Integer.parseInt(mapped.get("start").toString());
200  0 int end = Integer.parseInt(mapped.get("end").toString());
201  0 String strand = mapped.get("strand").toString();
202  0 if ("1".equals(strand))
203    {
204  0 result = new int[] { start, end };
205    }
206    else
207    {
208  0 result = new int[] { end, start };
209    }
210    }
211    } catch (IOException | ParseException | NumberFormatException e)
212    {
213    // ignore
214    }
215  0 return result;
216    }
217   
218    /**
219    * Calls the REST /map/cds/id service, and returns a DBRefEntry holding the
220    * returned chromosomal coordinates, or returns null if the call fails
221    *
222    * @param division
223    * e.g. Ensembl, EnsemblMetazoa
224    * @param accession
225    * e.g. ENST00000592782, Y55B1AR.1.1
226    * @param start
227    * @param end
228    * @return
229    */
 
230  0 toggle public GeneLociI getCdsMapping(String division, String accession,
231    int start, int end)
232    {
233  0 return getIdMapping(division, accession, start, end, CDS);
234    }
235   
236    /**
237    * Calls the REST /map/cdna/id service, and returns a DBRefEntry holding the
238    * returned chromosomal coordinates, or returns null if the call fails
239    *
240    * @param division
241    * e.g. Ensembl, EnsemblMetazoa
242    * @param accession
243    * e.g. ENST00000592782, Y55B1AR.1.1
244    * @param start
245    * @param end
246    * @return
247    */
 
248  0 toggle public GeneLociI getCdnaMapping(String division, String accession,
249    int start, int end)
250    {
251  0 return getIdMapping(division, accession, start, end, CDNA);
252    }
253   
 
254  0 toggle GeneLociI getIdMapping(String division, String accession, int start,
255    int end, String cdsOrCdna)
256    {
257  0 URL url = null;
258  0 try
259    {
260  0 String domain = new EnsemblInfo().getDomain(division);
261  0 if (domain != null)
262    {
263  0 url = getIdMapUrl(domain, accession, start, end, cdsOrCdna);
264  0 return (parseIdMappingResponse(url, accession, domain));
265    }
266  0 return null;
267    } catch (Throwable t)
268    {
269  0 jalview.bin.Console
270    .outPrintln("Error calling " + url + ": " + t.getMessage());
271  0 return null;
272    }
273    }
274   
275    /**
276    * Constructs a URL to the /map/cds/<id> or /map/cdna/<id> REST service. The
277    * REST call is to either ensembl or ensemblgenomes, as determined from the
278    * division, e.g. Ensembl or EnsemblProtists.
279    *
280    * @param domain
281    * @param accession
282    * @param start
283    * @param end
284    * @param cdsOrCdna
285    * @return
286    * @throws MalformedURLException
287    */
 
288  0 toggle URL getIdMapUrl(String domain, String accession, int start, int end,
289    String cdsOrCdna) throws MalformedURLException
290    {
291  0 String url = String.format(
292    "%s/map/%s/%s/%d..%d?include_original_region=1&content-type=application/json",
293    domain, cdsOrCdna, accession, start, end);
294  0 return new URL(url);
295    }
296   
297    /**
298    * Parses the JSON response from the /map/cds/ or /map/cdna REST service. The
299    * format is
300    *
301    * <pre>
302    * {"mappings":
303    * [
304    * {"assembly_name":"TAIR10","end":2501311,"seq_region_name":"1","gap":0,
305    * "strand":-1,"coord_system":"chromosome","rank":0,"start":2501114},
306    * {"assembly_name":"TAIR10","end":2500815,"seq_region_name":"1","gap":0,
307    * "strand":-1,"coord_system":"chromosome","rank":0,"start":2500714}
308    * ]
309    * }
310    * </pre>
311    *
312    * @param br
313    * @param accession
314    * @param domain
315    * @return
316    */
 
317  0 toggle @SuppressWarnings("unchecked")
318    GeneLociI parseIdMappingResponse(URL url, String accession, String domain)
319    {
320   
321  0 try
322    {
323  0 Iterator<Object> rvals = (Iterator<Object>) getJSON(url, null, -1,
324    MODE_ITERATOR, MAPPINGS);
325  0 if (rvals == null)
326    {
327  0 return null;
328    }
329  0 String assembly = null;
330  0 String chromosome = null;
331  0 int fromEnd = 0;
332  0 List<int[]> regions = new ArrayList<>();
333   
334  0 while (rvals.hasNext())
335    {
336  0 Map<String, Object> val = (Map<String, Object>) rvals.next();
337  0 Map<String, Object> original = (Map<String, Object>) val
338    .get("original");
339  0 fromEnd = Integer.parseInt(original.get("end").toString());
340   
341  0 Map<String, Object> mapped = (Map<String, Object>) val.get(MAPPED);
342  0 int start = Integer.parseInt(mapped.get("start").toString());
343  0 int end = Integer.parseInt(mapped.get("end").toString());
344  0 String ass = mapped.get("assembly_name").toString();
345  0 if (assembly != null && !assembly.equals(ass))
346    {
347  0 jalview.bin.Console.errPrintln(
348    "EnsemblMap found multiple assemblies - can't resolve");
349  0 return null;
350    }
351  0 assembly = ass;
352  0 String chr = mapped.get("seq_region_name").toString();
353  0 if (chromosome != null && !chromosome.equals(chr))
354    {
355  0 jalview.bin.Console.errPrintln(
356    "EnsemblMap found multiple chromosomes - can't resolve");
357  0 return null;
358    }
359  0 chromosome = chr;
360  0 String strand = mapped.get("strand").toString();
361  0 if ("-1".equals(strand))
362    {
363  0 regions.add(new int[] { end, start });
364    }
365    else
366    {
367  0 regions.add(new int[] { start, end });
368    }
369    }
370   
371    /*
372    * processed all mapped regions on chromosome, assemble the result,
373    * having first fetched the species id for the accession
374    */
375  0 final String species = new EnsemblLookup(domain)
376    .getSpecies(accession);
377  0 final String as = assembly;
378  0 final String chr = chromosome;
379  0 List<int[]> fromRange = Collections
380    .singletonList(new int[]
381    { 1, fromEnd });
382  0 Mapping mapping = new Mapping(new MapList(fromRange, regions, 1, 1));
383  0 return new GeneLocus(species == null ? "" : species, as, chr,
384    mapping);
385    } catch (IOException | ParseException | NumberFormatException e)
386    {
387    // ignore
388    }
389   
390  0 return null;
391    }
392   
393    }