Class |
Line # |
Actions |
|||
---|---|---|---|---|---|
SequenceOntology | 52 | 100 | 47 |
1 | /* | |
2 | * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) | |
3 | * Copyright (C) $$Year-Rel$$ The Jalview Authors | |
4 | * | |
5 | * This file is part of Jalview. | |
6 | * | |
7 | * Jalview is free software: you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation, either version 3 | |
10 | * of the License, or (at your option) any later version. | |
11 | * | |
12 | * Jalview is distributed in the hope that it will be useful, but | |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty | |
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR | |
15 | * PURPOSE. See the GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with Jalview. If not, see <http://www.gnu.org/licenses/>. | |
19 | * The Jalview Authors are detailed in the 'AUTHORS' file. | |
20 | */ | |
21 | package jalview.ext.so; | |
22 | ||
23 | import java.io.BufferedInputStream; | |
24 | import java.io.BufferedReader; | |
25 | import java.io.IOException; | |
26 | import java.io.InputStream; | |
27 | import java.io.InputStreamReader; | |
28 | import java.text.ParseException; | |
29 | import java.util.ArrayList; | |
30 | import java.util.Collections; | |
31 | import java.util.HashMap; | |
32 | import java.util.List; | |
33 | import java.util.Map; | |
34 | import java.util.NoSuchElementException; | |
35 | import java.util.zip.ZipEntry; | |
36 | import java.util.zip.ZipInputStream; | |
37 | ||
38 | import org.biojava.nbio.ontology.Ontology; | |
39 | import org.biojava.nbio.ontology.Term; | |
40 | import org.biojava.nbio.ontology.Term.Impl; | |
41 | import org.biojava.nbio.ontology.Triple; | |
42 | import org.biojava.nbio.ontology.io.OboParser; | |
43 | import org.biojava.nbio.ontology.utils.Annotation; | |
44 | ||
45 | import jalview.bin.Console; | |
46 | import jalview.io.gff.SequenceOntologyI; | |
47 | ||
48 | /** | |
49 | * A wrapper class that parses the Sequence Ontology and exposes useful access | |
50 | * methods. This version uses the BioJava parser. | |
51 | */ | |
52 | public class SequenceOntology implements SequenceOntologyI | |
53 | { | |
54 | /* | |
55 | * the parsed Ontology data as modelled by BioJava | |
56 | */ | |
57 | private Ontology ontology; | |
58 | ||
59 | /* | |
60 | * the ontology term for the isA relationship | |
61 | */ | |
62 | private Term isA; | |
63 | ||
64 | /* | |
65 | * lookup of terms by user readable name (NB not guaranteed unique) | |
66 | */ | |
67 | private Map<String, Term> termsByDescription; | |
68 | ||
69 | /* | |
70 | * Map where key is a Term and value is a (possibly empty) list of | |
71 | * all Terms to which the key has an 'isA' relationship, either | |
72 | * directly or indirectly (A isA B isA C) | |
73 | */ | |
74 | private Map<Term, List<Term>> termIsA; | |
75 | ||
76 | private List<String> termsFound; | |
77 | ||
78 | private List<String> termsNotFound; | |
79 | ||
80 | /** | |
81 | * Package private constructor to enforce use of singleton. Parses and caches | |
82 | * the SO OBO data file. | |
83 | */ | |
84 | 131 | public SequenceOntology() |
85 | { | |
86 | 131 | termsFound = new ArrayList<String>(); |
87 | 131 | termsNotFound = new ArrayList<String>(); |
88 | 131 | termsByDescription = new HashMap<String, Term>(); |
89 | 131 | termIsA = new HashMap<Term, List<Term>>(); |
90 | ||
91 | 131 | loadOntologyZipFile("so-xp-simple.obo"); |
92 | } | |
93 | ||
94 | /** | |
95 | * Loads the given ontology file from a zip file with ".zip" appended | |
96 | * | |
97 | * @param ontologyFile | |
98 | */ | |
99 | 131 | protected void loadOntologyZipFile(String ontologyFile) |
100 | { | |
101 | 131 | long now = System.currentTimeMillis(); |
102 | 131 | ZipInputStream zipStream = null; |
103 | 131 | try |
104 | { | |
105 | 131 | String zipFile = ontologyFile + ".zip"; |
106 | 131 | InputStream inStream = this.getClass() |
107 | .getResourceAsStream("/" + zipFile); | |
108 | 131 | zipStream = new ZipInputStream(new BufferedInputStream(inStream)); |
109 | 131 | ZipEntry entry; |
110 | ? | while ((entry = zipStream.getNextEntry()) != null) |
111 | { | |
112 | 393 | if (entry.getName().equals(ontologyFile)) |
113 | { | |
114 | 131 | loadOboFile(zipStream); |
115 | } | |
116 | } | |
117 | 131 | long elapsed = System.currentTimeMillis() - now; |
118 | 131 | Console.info("Loaded Sequence Ontology from " + zipFile + " (" |
119 | + elapsed + "ms)"); | |
120 | } catch (Exception e) | |
121 | { | |
122 | 0 | e.printStackTrace(); |
123 | } finally | |
124 | { | |
125 | 131 | closeStream(zipStream); |
126 | } | |
127 | } | |
128 | ||
129 | /** | |
130 | * Closes the input stream, swallowing all exceptions | |
131 | * | |
132 | * @param is | |
133 | */ | |
134 | 131 | protected void closeStream(InputStream is) |
135 | { | |
136 | 131 | if (is != null) |
137 | { | |
138 | 131 | try |
139 | { | |
140 | 131 | is.close(); |
141 | } catch (IOException e) | |
142 | { | |
143 | // ignore | |
144 | } | |
145 | } | |
146 | } | |
147 | ||
148 | /** | |
149 | * Reads, parses and stores the OBO file data | |
150 | * | |
151 | * @param is | |
152 | * @throws ParseException | |
153 | * @throws IOException | |
154 | */ | |
155 | 131 | protected void loadOboFile(InputStream is) |
156 | throws ParseException, IOException | |
157 | { | |
158 | 131 | BufferedReader oboFile = new BufferedReader(new InputStreamReader(is)); |
159 | 131 | OboParser parser = new OboParser(); |
160 | 131 | ontology = parser.parseOBO(oboFile, "SO", "the SO ontology"); |
161 | 131 | isA = ontology.getTerm("is_a"); |
162 | 131 | storeTermNames(); |
163 | } | |
164 | ||
165 | /** | |
166 | * Stores a lookup table of terms by description. Note that description is not | |
167 | * guaranteed unique. Where duplicate descriptions are found, try to discard | |
168 | * the term that is flagged as obsolete. However we do store obsolete terms | |
169 | * where there is no duplication of description. | |
170 | */ | |
171 | 131 | protected void storeTermNames() |
172 | { | |
173 | 131 | for (Term term : ontology.getTerms()) |
174 | { | |
175 | 753512 | if (term instanceof Impl) |
176 | { | |
177 | 341517 | String description = term.getDescription(); |
178 | 341517 | if (description != null) |
179 | { | |
180 | 299073 | Term replaced = termsByDescription.get(description); |
181 | 299073 | if (replaced != null) |
182 | { | |
183 | 1048 | boolean newTermIsObsolete = isObsolete(term); |
184 | 1048 | boolean oldTermIsObsolete = isObsolete(replaced); |
185 | 1048 | if (newTermIsObsolete && !oldTermIsObsolete) |
186 | { | |
187 | 262 | Console.debug("Ignoring " + term.getName() |
188 | + " as obsolete and duplicated by " | |
189 | + replaced.getName()); | |
190 | 262 | term = replaced; |
191 | } | |
192 | 786 | else if (!newTermIsObsolete && oldTermIsObsolete) |
193 | { | |
194 | 786 | Console.debug("Ignoring " + replaced.getName() |
195 | + " as obsolete and duplicated by " + term.getName()); | |
196 | } | |
197 | else | |
198 | { | |
199 | 0 | Console.debug("Warning: " + term.getName() + " has replaced " |
200 | + replaced.getName() + " for lookup of '" | |
201 | + description + "'"); | |
202 | } | |
203 | } | |
204 | 299073 | termsByDescription.put(description, term); |
205 | } | |
206 | } | |
207 | } | |
208 | } | |
209 | ||
210 | /** | |
211 | * Answers true if the term has property "is_obsolete" with value true, else | |
212 | * false | |
213 | * | |
214 | * @param term | |
215 | * @return | |
216 | */ | |
217 | 2096 | public static boolean isObsolete(Term term) |
218 | { | |
219 | 2096 | Annotation ann = term.getAnnotation(); |
220 | 2096 | if (ann != null) |
221 | { | |
222 | 2096 | try |
223 | { | |
224 | 1048 | if (Boolean.TRUE.equals(ann.getProperty("is_obsolete"))) |
225 | { | |
226 | 1048 | return true; |
227 | } | |
228 | } catch (NoSuchElementException e) | |
229 | { | |
230 | // fall through to false | |
231 | } | |
232 | } | |
233 | 1048 | return false; |
234 | } | |
235 | ||
236 | /** | |
237 | * Test whether the given Sequence Ontology term is nucleotide_match (either | |
238 | * directly or via is_a relationship) | |
239 | * | |
240 | * @param soTerm | |
241 | * SO name or description | |
242 | * @return | |
243 | */ | |
244 | 0 | public boolean isNucleotideMatch(String soTerm) |
245 | { | |
246 | 0 | return isA(soTerm, NUCLEOTIDE_MATCH); |
247 | } | |
248 | ||
249 | /** | |
250 | * Test whether the given Sequence Ontology term is protein_match (either | |
251 | * directly or via is_a relationship) | |
252 | * | |
253 | * @param soTerm | |
254 | * SO name or description | |
255 | * @return | |
256 | */ | |
257 | 0 | public boolean isProteinMatch(String soTerm) |
258 | { | |
259 | 0 | return isA(soTerm, PROTEIN_MATCH); |
260 | } | |
261 | ||
262 | /** | |
263 | * Test whether the given Sequence Ontology term is polypeptide (either | |
264 | * directly or via is_a relationship) | |
265 | * | |
266 | * @param soTerm | |
267 | * SO name or description | |
268 | * @return | |
269 | */ | |
270 | 0 | public boolean isPolypeptide(String soTerm) |
271 | { | |
272 | 0 | return isA(soTerm, POLYPEPTIDE); |
273 | } | |
274 | ||
275 | /** | |
276 | * Returns true if the given term has a (direct or indirect) 'isA' | |
277 | * relationship with the parent | |
278 | * | |
279 | * @param child | |
280 | * @param parent | |
281 | * @return | |
282 | */ | |
283 | 218 | @Override |
284 | public boolean isA(String child, String parent) | |
285 | { | |
286 | 218 | if (child == null || parent == null) |
287 | { | |
288 | 3 | return false; |
289 | } | |
290 | /* | |
291 | * optimise trivial checks like isA("CDS", "CDS") | |
292 | */ | |
293 | 215 | if (child.equals(parent)) |
294 | { | |
295 | 29 | termFound(child); |
296 | 29 | return true; |
297 | } | |
298 | ||
299 | 186 | Term childTerm = getTerm(child); |
300 | 186 | if (childTerm != null) |
301 | { | |
302 | 143 | termFound(child); |
303 | } | |
304 | else | |
305 | { | |
306 | 43 | termNotFound(child); |
307 | } | |
308 | 186 | Term parentTerm = getTerm(parent); |
309 | ||
310 | 186 | return termIsA(childTerm, parentTerm); |
311 | } | |
312 | ||
313 | /** | |
314 | * Records a valid term queried for, for reporting purposes | |
315 | * | |
316 | * @param term | |
317 | */ | |
318 | 172 | private void termFound(String term) |
319 | { | |
320 | 172 | synchronized (termsFound) |
321 | { | |
322 | 172 | if (!termsFound.contains(term)) |
323 | { | |
324 | 49 | termsFound.add(term); |
325 | } | |
326 | } | |
327 | } | |
328 | ||
329 | /** | |
330 | * Records an invalid term queried for, for reporting purposes | |
331 | * | |
332 | * @param term | |
333 | */ | |
334 | 43 | private void termNotFound(String term) |
335 | { | |
336 | 43 | synchronized (termsNotFound) |
337 | { | |
338 | 43 | if (!termsNotFound.contains(term)) |
339 | { | |
340 | 13 | Console.error("SO term " + term + " invalid"); |
341 | 13 | termsNotFound.add(term); |
342 | } | |
343 | } | |
344 | } | |
345 | ||
346 | /** | |
347 | * Returns true if the childTerm 'isA' parentTerm (directly or indirectly). | |
348 | * | |
349 | * @param childTerm | |
350 | * @param parentTerm | |
351 | * @return | |
352 | */ | |
353 | 1120 | protected synchronized boolean termIsA(Term childTerm, Term parentTerm) |
354 | { | |
355 | /* | |
356 | * null term could arise from a misspelled SO description | |
357 | */ | |
358 | 1120 | if (childTerm == null || parentTerm == null) |
359 | { | |
360 | 44 | return false; |
361 | } | |
362 | ||
363 | /* | |
364 | * recursive search endpoint: | |
365 | */ | |
366 | 1076 | if (childTerm == parentTerm) |
367 | { | |
368 | 64 | return true; |
369 | } | |
370 | ||
371 | /* | |
372 | * lazy initialisation - find all of a term's parents (recursively) | |
373 | * the first time this is called, and save them in a map. | |
374 | */ | |
375 | 1012 | if (!termIsA.containsKey(childTerm)) |
376 | { | |
377 | 41 | findParents(childTerm); |
378 | } | |
379 | ||
380 | 1012 | List<Term> parents = termIsA.get(childTerm); |
381 | 1012 | for (Term parent : parents) |
382 | { | |
383 | 934 | if (termIsA(parent, parentTerm)) |
384 | { | |
385 | /* | |
386 | * add (great-)grandparents to parents list as they are discovered, | |
387 | * for faster lookup next time | |
388 | */ | |
389 | 256 | if (!parents.contains(parentTerm)) |
390 | { | |
391 | 95 | parents.add(parentTerm); |
392 | } | |
393 | 256 | return true; |
394 | } | |
395 | } | |
396 | ||
397 | 756 | return false; |
398 | } | |
399 | ||
400 | /** | |
401 | * Finds all the 'isA' parents of the childTerm and stores them as a (possibly | |
402 | * empty) list. | |
403 | * | |
404 | * @param childTerm | |
405 | */ | |
406 | 376 | protected synchronized void findParents(Term childTerm) |
407 | { | |
408 | 376 | List<Term> result = new ArrayList<Term>(); |
409 | 376 | for (Triple triple : ontology.getTriples(childTerm, null, isA)) |
410 | { | |
411 | 335 | Term parent = triple.getObject(); |
412 | 335 | result.add(parent); |
413 | ||
414 | /* | |
415 | * and search for the parent's parents recursively | |
416 | */ | |
417 | 335 | findParents(parent); |
418 | } | |
419 | 376 | termIsA.put(childTerm, result); |
420 | } | |
421 | ||
422 | /** | |
423 | * Returns the Term for a given name (e.g. "SO:0000735") or description (e.g. | |
424 | * "sequence_location"), or null if not found. | |
425 | * | |
426 | * @param child | |
427 | * @return | |
428 | */ | |
429 | 372 | protected Term getTerm(String nameOrDescription) |
430 | { | |
431 | 372 | Term t = termsByDescription.get(nameOrDescription); |
432 | 372 | if (t == null) |
433 | { | |
434 | 61 | try |
435 | { | |
436 | 61 | t = ontology.getTerm(nameOrDescription); |
437 | } catch (NoSuchElementException e) | |
438 | { | |
439 | // not found | |
440 | } | |
441 | } | |
442 | 372 | return t; |
443 | } | |
444 | ||
445 | 0 | public boolean isSequenceVariant(String soTerm) |
446 | { | |
447 | 0 | return isA(soTerm, SEQUENCE_VARIANT); |
448 | } | |
449 | ||
450 | /** | |
451 | * Sorts (case-insensitive) and returns the list of valid terms queried for | |
452 | */ | |
453 | 0 | @Override |
454 | public List<String> termsFound() | |
455 | { | |
456 | 0 | synchronized (termsFound) |
457 | { | |
458 | 0 | Collections.sort(termsFound, String.CASE_INSENSITIVE_ORDER); |
459 | 0 | return termsFound; |
460 | } | |
461 | } | |
462 | ||
463 | /** | |
464 | * Sorts (case-insensitive) and returns the list of invalid terms queried for | |
465 | */ | |
466 | 0 | @Override |
467 | public List<String> termsNotFound() | |
468 | { | |
469 | 0 | synchronized (termsNotFound) |
470 | { | |
471 | 0 | Collections.sort(termsNotFound, String.CASE_INSENSITIVE_ORDER); |
472 | 0 | return termsNotFound; |
473 | } | |
474 | } | |
475 | } |