1. Project Clover database Fri Dec 6 2024 13:47:14 GMT
  2. Package jalview.ext.so

File SequenceOntology.java

 

Coverage histogram

../../../img/srcFileCovDistChart9.png
12% of files have more coverage

Code metrics

42
100
18
1
475
277
47
0.47
5.56
18
2.61

Classes

Class
Line #
Actions
SequenceOntology 52 100 47
0.8585%
 

Contributing tests

This file is covered by 81 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.ext.so;
22   
23    import java.io.BufferedInputStream;
24    import java.io.BufferedReader;
25    import java.io.IOException;
26    import java.io.InputStream;
27    import java.io.InputStreamReader;
28    import java.text.ParseException;
29    import java.util.ArrayList;
30    import java.util.Collections;
31    import java.util.HashMap;
32    import java.util.List;
33    import java.util.Map;
34    import java.util.NoSuchElementException;
35    import java.util.zip.ZipEntry;
36    import java.util.zip.ZipInputStream;
37   
38    import org.biojava.nbio.ontology.Ontology;
39    import org.biojava.nbio.ontology.Term;
40    import org.biojava.nbio.ontology.Term.Impl;
41    import org.biojava.nbio.ontology.Triple;
42    import org.biojava.nbio.ontology.io.OboParser;
43    import org.biojava.nbio.ontology.utils.Annotation;
44   
45    import jalview.bin.Console;
46    import jalview.io.gff.SequenceOntologyI;
47   
48    /**
49    * A wrapper class that parses the Sequence Ontology and exposes useful access
50    * methods. This version uses the BioJava parser.
51    */
 
52    public class SequenceOntology implements SequenceOntologyI
53    {
54    /*
55    * the parsed Ontology data as modelled by BioJava
56    */
57    private Ontology ontology;
58   
59    /*
60    * the ontology term for the isA relationship
61    */
62    private Term isA;
63   
64    /*
65    * lookup of terms by user readable name (NB not guaranteed unique)
66    */
67    private Map<String, Term> termsByDescription;
68   
69    /*
70    * Map where key is a Term and value is a (possibly empty) list of
71    * all Terms to which the key has an 'isA' relationship, either
72    * directly or indirectly (A isA B isA C)
73    */
74    private Map<Term, List<Term>> termIsA;
75   
76    private List<String> termsFound;
77   
78    private List<String> termsNotFound;
79   
80    /**
81    * Package private constructor to enforce use of singleton. Parses and caches
82    * the SO OBO data file.
83    */
 
84  131 toggle public SequenceOntology()
85    {
86  131 termsFound = new ArrayList<String>();
87  131 termsNotFound = new ArrayList<String>();
88  131 termsByDescription = new HashMap<String, Term>();
89  131 termIsA = new HashMap<Term, List<Term>>();
90   
91  131 loadOntologyZipFile("so-xp-simple.obo");
92    }
93   
94    /**
95    * Loads the given ontology file from a zip file with ".zip" appended
96    *
97    * @param ontologyFile
98    */
 
99  131 toggle protected void loadOntologyZipFile(String ontologyFile)
100    {
101  131 long now = System.currentTimeMillis();
102  131 ZipInputStream zipStream = null;
103  131 try
104    {
105  131 String zipFile = ontologyFile + ".zip";
106  131 InputStream inStream = this.getClass()
107    .getResourceAsStream("/" + zipFile);
108  131 zipStream = new ZipInputStream(new BufferedInputStream(inStream));
109  131 ZipEntry entry;
110  ? while ((entry = zipStream.getNextEntry()) != null)
111    {
112  393 if (entry.getName().equals(ontologyFile))
113    {
114  131 loadOboFile(zipStream);
115    }
116    }
117  131 long elapsed = System.currentTimeMillis() - now;
118  131 Console.info("Loaded Sequence Ontology from " + zipFile + " ("
119    + elapsed + "ms)");
120    } catch (Exception e)
121    {
122  0 e.printStackTrace();
123    } finally
124    {
125  131 closeStream(zipStream);
126    }
127    }
128   
129    /**
130    * Closes the input stream, swallowing all exceptions
131    *
132    * @param is
133    */
 
134  131 toggle protected void closeStream(InputStream is)
135    {
136  131 if (is != null)
137    {
138  131 try
139    {
140  131 is.close();
141    } catch (IOException e)
142    {
143    // ignore
144    }
145    }
146    }
147   
148    /**
149    * Reads, parses and stores the OBO file data
150    *
151    * @param is
152    * @throws ParseException
153    * @throws IOException
154    */
 
155  131 toggle protected void loadOboFile(InputStream is)
156    throws ParseException, IOException
157    {
158  131 BufferedReader oboFile = new BufferedReader(new InputStreamReader(is));
159  131 OboParser parser = new OboParser();
160  131 ontology = parser.parseOBO(oboFile, "SO", "the SO ontology");
161  131 isA = ontology.getTerm("is_a");
162  131 storeTermNames();
163    }
164   
165    /**
166    * Stores a lookup table of terms by description. Note that description is not
167    * guaranteed unique. Where duplicate descriptions are found, try to discard
168    * the term that is flagged as obsolete. However we do store obsolete terms
169    * where there is no duplication of description.
170    */
 
171  131 toggle protected void storeTermNames()
172    {
173  131 for (Term term : ontology.getTerms())
174    {
175  753512 if (term instanceof Impl)
176    {
177  341517 String description = term.getDescription();
178  341517 if (description != null)
179    {
180  299073 Term replaced = termsByDescription.get(description);
181  299073 if (replaced != null)
182    {
183  1048 boolean newTermIsObsolete = isObsolete(term);
184  1048 boolean oldTermIsObsolete = isObsolete(replaced);
185  1048 if (newTermIsObsolete && !oldTermIsObsolete)
186    {
187  262 Console.debug("Ignoring " + term.getName()
188    + " as obsolete and duplicated by "
189    + replaced.getName());
190  262 term = replaced;
191    }
192  786 else if (!newTermIsObsolete && oldTermIsObsolete)
193    {
194  786 Console.debug("Ignoring " + replaced.getName()
195    + " as obsolete and duplicated by " + term.getName());
196    }
197    else
198    {
199  0 Console.debug("Warning: " + term.getName() + " has replaced "
200    + replaced.getName() + " for lookup of '"
201    + description + "'");
202    }
203    }
204  299073 termsByDescription.put(description, term);
205    }
206    }
207    }
208    }
209   
210    /**
211    * Answers true if the term has property "is_obsolete" with value true, else
212    * false
213    *
214    * @param term
215    * @return
216    */
 
217  2096 toggle public static boolean isObsolete(Term term)
218    {
219  2096 Annotation ann = term.getAnnotation();
220  2096 if (ann != null)
221    {
222  2096 try
223    {
224  1048 if (Boolean.TRUE.equals(ann.getProperty("is_obsolete")))
225    {
226  1048 return true;
227    }
228    } catch (NoSuchElementException e)
229    {
230    // fall through to false
231    }
232    }
233  1048 return false;
234    }
235   
236    /**
237    * Test whether the given Sequence Ontology term is nucleotide_match (either
238    * directly or via is_a relationship)
239    *
240    * @param soTerm
241    * SO name or description
242    * @return
243    */
 
244  0 toggle public boolean isNucleotideMatch(String soTerm)
245    {
246  0 return isA(soTerm, NUCLEOTIDE_MATCH);
247    }
248   
249    /**
250    * Test whether the given Sequence Ontology term is protein_match (either
251    * directly or via is_a relationship)
252    *
253    * @param soTerm
254    * SO name or description
255    * @return
256    */
 
257  0 toggle public boolean isProteinMatch(String soTerm)
258    {
259  0 return isA(soTerm, PROTEIN_MATCH);
260    }
261   
262    /**
263    * Test whether the given Sequence Ontology term is polypeptide (either
264    * directly or via is_a relationship)
265    *
266    * @param soTerm
267    * SO name or description
268    * @return
269    */
 
270  0 toggle public boolean isPolypeptide(String soTerm)
271    {
272  0 return isA(soTerm, POLYPEPTIDE);
273    }
274   
275    /**
276    * Returns true if the given term has a (direct or indirect) 'isA'
277    * relationship with the parent
278    *
279    * @param child
280    * @param parent
281    * @return
282    */
 
283  218 toggle @Override
284    public boolean isA(String child, String parent)
285    {
286  218 if (child == null || parent == null)
287    {
288  3 return false;
289    }
290    /*
291    * optimise trivial checks like isA("CDS", "CDS")
292    */
293  215 if (child.equals(parent))
294    {
295  29 termFound(child);
296  29 return true;
297    }
298   
299  186 Term childTerm = getTerm(child);
300  186 if (childTerm != null)
301    {
302  143 termFound(child);
303    }
304    else
305    {
306  43 termNotFound(child);
307    }
308  186 Term parentTerm = getTerm(parent);
309   
310  186 return termIsA(childTerm, parentTerm);
311    }
312   
313    /**
314    * Records a valid term queried for, for reporting purposes
315    *
316    * @param term
317    */
 
318  172 toggle private void termFound(String term)
319    {
320  172 synchronized (termsFound)
321    {
322  172 if (!termsFound.contains(term))
323    {
324  49 termsFound.add(term);
325    }
326    }
327    }
328   
329    /**
330    * Records an invalid term queried for, for reporting purposes
331    *
332    * @param term
333    */
 
334  43 toggle private void termNotFound(String term)
335    {
336  43 synchronized (termsNotFound)
337    {
338  43 if (!termsNotFound.contains(term))
339    {
340  13 Console.error("SO term " + term + " invalid");
341  13 termsNotFound.add(term);
342    }
343    }
344    }
345   
346    /**
347    * Returns true if the childTerm 'isA' parentTerm (directly or indirectly).
348    *
349    * @param childTerm
350    * @param parentTerm
351    * @return
352    */
 
353  1120 toggle protected synchronized boolean termIsA(Term childTerm, Term parentTerm)
354    {
355    /*
356    * null term could arise from a misspelled SO description
357    */
358  1120 if (childTerm == null || parentTerm == null)
359    {
360  44 return false;
361    }
362   
363    /*
364    * recursive search endpoint:
365    */
366  1076 if (childTerm == parentTerm)
367    {
368  64 return true;
369    }
370   
371    /*
372    * lazy initialisation - find all of a term's parents (recursively)
373    * the first time this is called, and save them in a map.
374    */
375  1012 if (!termIsA.containsKey(childTerm))
376    {
377  41 findParents(childTerm);
378    }
379   
380  1012 List<Term> parents = termIsA.get(childTerm);
381  1012 for (Term parent : parents)
382    {
383  934 if (termIsA(parent, parentTerm))
384    {
385    /*
386    * add (great-)grandparents to parents list as they are discovered,
387    * for faster lookup next time
388    */
389  256 if (!parents.contains(parentTerm))
390    {
391  95 parents.add(parentTerm);
392    }
393  256 return true;
394    }
395    }
396   
397  756 return false;
398    }
399   
400    /**
401    * Finds all the 'isA' parents of the childTerm and stores them as a (possibly
402    * empty) list.
403    *
404    * @param childTerm
405    */
 
406  376 toggle protected synchronized void findParents(Term childTerm)
407    {
408  376 List<Term> result = new ArrayList<Term>();
409  376 for (Triple triple : ontology.getTriples(childTerm, null, isA))
410    {
411  335 Term parent = triple.getObject();
412  335 result.add(parent);
413   
414    /*
415    * and search for the parent's parents recursively
416    */
417  335 findParents(parent);
418    }
419  376 termIsA.put(childTerm, result);
420    }
421   
422    /**
423    * Returns the Term for a given name (e.g. "SO:0000735") or description (e.g.
424    * "sequence_location"), or null if not found.
425    *
426    * @param child
427    * @return
428    */
 
429  372 toggle protected Term getTerm(String nameOrDescription)
430    {
431  372 Term t = termsByDescription.get(nameOrDescription);
432  372 if (t == null)
433    {
434  61 try
435    {
436  61 t = ontology.getTerm(nameOrDescription);
437    } catch (NoSuchElementException e)
438    {
439    // not found
440    }
441    }
442  372 return t;
443    }
444   
 
445  0 toggle public boolean isSequenceVariant(String soTerm)
446    {
447  0 return isA(soTerm, SEQUENCE_VARIANT);
448    }
449   
450    /**
451    * Sorts (case-insensitive) and returns the list of valid terms queried for
452    */
 
453  0 toggle @Override
454    public List<String> termsFound()
455    {
456  0 synchronized (termsFound)
457    {
458  0 Collections.sort(termsFound, String.CASE_INSENSITIVE_ORDER);
459  0 return termsFound;
460    }
461    }
462   
463    /**
464    * Sorts (case-insensitive) and returns the list of invalid terms queried for
465    */
 
466  0 toggle @Override
467    public List<String> termsNotFound()
468    {
469  0 synchronized (termsNotFound)
470    {
471  0 Collections.sort(termsNotFound, String.CASE_INSENSITIVE_ORDER);
472  0 return termsNotFound;
473    }
474    }
475    }