Clover icon

Coverage Report

  1. Project Clover database Thu Aug 13 2020 12:04:21 BST
  2. Package jalview.ext.so

File SequenceOntology.java

 

Coverage histogram

../../../img/srcFileCovDistChart9.png
12% of files have more coverage

Code metrics

42
100
18
1
474
276
47
0.47
5.56
18
2.61

Classes

Class Line # Actions
SequenceOntology 51 100 47
0.8585%
 

Contributing tests

This file is covered by 25 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.ext.so;
22   
23    import jalview.io.gff.SequenceOntologyI;
24   
25    import java.io.BufferedInputStream;
26    import java.io.BufferedReader;
27    import java.io.IOException;
28    import java.io.InputStream;
29    import java.io.InputStreamReader;
30    import java.text.ParseException;
31    import java.util.ArrayList;
32    import java.util.Collections;
33    import java.util.HashMap;
34    import java.util.List;
35    import java.util.Map;
36    import java.util.NoSuchElementException;
37    import java.util.zip.ZipEntry;
38    import java.util.zip.ZipInputStream;
39   
40    import org.biojava.nbio.ontology.Ontology;
41    import org.biojava.nbio.ontology.Term;
42    import org.biojava.nbio.ontology.Term.Impl;
43    import org.biojava.nbio.ontology.Triple;
44    import org.biojava.nbio.ontology.io.OboParser;
45    import org.biojava.nbio.ontology.utils.Annotation;
46   
47    /**
48    * A wrapper class that parses the Sequence Ontology and exposes useful access
49    * methods. This version uses the BioJava parser.
50    */
 
51    public class SequenceOntology implements SequenceOntologyI
52    {
53    /*
54    * the parsed Ontology data as modelled by BioJava
55    */
56    private Ontology ontology;
57   
58    /*
59    * the ontology term for the isA relationship
60    */
61    private Term isA;
62   
63    /*
64    * lookup of terms by user readable name (NB not guaranteed unique)
65    */
66    private Map<String, Term> termsByDescription;
67   
68    /*
69    * Map where key is a Term and value is a (possibly empty) list of
70    * all Terms to which the key has an 'isA' relationship, either
71    * directly or indirectly (A isA B isA C)
72    */
73    private Map<Term, List<Term>> termIsA;
74   
75    private List<String> termsFound;
76   
77    private List<String> termsNotFound;
78   
79    /**
80    * Package private constructor to enforce use of singleton. Parses and caches
81    * the SO OBO data file.
82    */
 
83  36 toggle public SequenceOntology()
84    {
85  36 termsFound = new ArrayList<String>();
86  36 termsNotFound = new ArrayList<String>();
87  36 termsByDescription = new HashMap<String, Term>();
88  36 termIsA = new HashMap<Term, List<Term>>();
89   
90  36 loadOntologyZipFile("so-xp-simple.obo");
91    }
92   
93    /**
94    * Loads the given ontology file from a zip file with ".zip" appended
95    *
96    * @param ontologyFile
97    */
 
98  36 toggle protected void loadOntologyZipFile(String ontologyFile)
99    {
100  36 long now = System.currentTimeMillis();
101  36 ZipInputStream zipStream = null;
102  36 try
103    {
104  36 String zipFile = ontologyFile + ".zip";
105  36 InputStream inStream = this.getClass()
106    .getResourceAsStream("/" + zipFile);
107  36 zipStream = new ZipInputStream(new BufferedInputStream(inStream));
108  36 ZipEntry entry;
109  ? while ((entry = zipStream.getNextEntry()) != null)
110    {
111  108 if (entry.getName().equals(ontologyFile))
112    {
113  36 loadOboFile(zipStream);
114    }
115    }
116  36 long elapsed = System.currentTimeMillis() - now;
117  36 System.out.println("Loaded Sequence Ontology from " + zipFile + " ("
118    + elapsed + "ms)");
119    } catch (Exception e)
120    {
121  0 e.printStackTrace();
122    } finally
123    {
124  36 closeStream(zipStream);
125    }
126    }
127   
128    /**
129    * Closes the input stream, swallowing all exceptions
130    *
131    * @param is
132    */
 
133  36 toggle protected void closeStream(InputStream is)
134    {
135  36 if (is != null)
136    {
137  36 try
138    {
139  36 is.close();
140    } catch (IOException e)
141    {
142    // ignore
143    }
144    }
145    }
146   
147    /**
148    * Reads, parses and stores the OBO file data
149    *
150    * @param is
151    * @throws ParseException
152    * @throws IOException
153    */
 
154  36 toggle protected void loadOboFile(InputStream is)
155    throws ParseException, IOException
156    {
157  36 BufferedReader oboFile = new BufferedReader(new InputStreamReader(is));
158  36 OboParser parser = new OboParser();
159  36 ontology = parser.parseOBO(oboFile, "SO", "the SO ontology");
160  36 isA = ontology.getTerm("is_a");
161  36 storeTermNames();
162    }
163   
164    /**
165    * Stores a lookup table of terms by description. Note that description is not
166    * guaranteed unique. Where duplicate descriptions are found, try to discard
167    * the term that is flagged as obsolete. However we do store obsolete terms
168    * where there is no duplication of description.
169    */
 
170  36 toggle protected void storeTermNames()
171    {
172  36 for (Term term : ontology.getTerms())
173    {
174  207072 if (term instanceof Impl)
175    {
176  93852 String description = term.getDescription();
177  93852 if (description != null)
178    {
179  82188 Term replaced = termsByDescription.get(description);
180  82188 if (replaced != null)
181    {
182  288 boolean newTermIsObsolete = isObsolete(term);
183  288 boolean oldTermIsObsolete = isObsolete(replaced);
184  288 if (newTermIsObsolete && !oldTermIsObsolete)
185    {
186  72 System.err.println("Ignoring " + term.getName()
187    + " as obsolete and duplicated by "
188    + replaced.getName());
189  72 term = replaced;
190    }
191  216 else if (!newTermIsObsolete && oldTermIsObsolete)
192    {
193  216 System.err.println("Ignoring " + replaced.getName()
194    + " as obsolete and duplicated by " + term.getName());
195    }
196    else
197    {
198  0 System.err.println("Warning: " + term.getName()
199    + " has replaced " + replaced.getName()
200    + " for lookup of '" + description + "'");
201    }
202    }
203  82188 termsByDescription.put(description, term);
204    }
205    }
206    }
207    }
208   
209    /**
210    * Answers true if the term has property "is_obsolete" with value true, else
211    * false
212    *
213    * @param term
214    * @return
215    */
 
216  576 toggle public static boolean isObsolete(Term term)
217    {
218  576 Annotation ann = term.getAnnotation();
219  576 if (ann != null)
220    {
221  576 try
222    {
223  288 if (Boolean.TRUE.equals(ann.getProperty("is_obsolete")))
224    {
225  288 return true;
226    }
227    } catch (NoSuchElementException e)
228    {
229    // fall through to false
230    }
231    }
232  288 return false;
233    }
234   
235    /**
236    * Test whether the given Sequence Ontology term is nucleotide_match (either
237    * directly or via is_a relationship)
238    *
239    * @param soTerm
240    * SO name or description
241    * @return
242    */
 
243  0 toggle public boolean isNucleotideMatch(String soTerm)
244    {
245  0 return isA(soTerm, NUCLEOTIDE_MATCH);
246    }
247   
248    /**
249    * Test whether the given Sequence Ontology term is protein_match (either
250    * directly or via is_a relationship)
251    *
252    * @param soTerm
253    * SO name or description
254    * @return
255    */
 
256  0 toggle public boolean isProteinMatch(String soTerm)
257    {
258  0 return isA(soTerm, PROTEIN_MATCH);
259    }
260   
261    /**
262    * Test whether the given Sequence Ontology term is polypeptide (either
263    * directly or via is_a relationship)
264    *
265    * @param soTerm
266    * SO name or description
267    * @return
268    */
 
269  0 toggle public boolean isPolypeptide(String soTerm)
270    {
271  0 return isA(soTerm, POLYPEPTIDE);
272    }
273   
274    /**
275    * Returns true if the given term has a (direct or indirect) 'isA'
276    * relationship with the parent
277    *
278    * @param child
279    * @param parent
280    * @return
281    */
 
282  232 toggle @Override
283    public boolean isA(String child, String parent)
284    {
285  232 if (child == null || parent == null)
286    {
287  3 return false;
288    }
289    /*
290    * optimise trivial checks like isA("CDS", "CDS")
291    */
292  229 if (child.equals(parent))
293    {
294  31 termFound(child);
295  31 return true;
296    }
297   
298  198 Term childTerm = getTerm(child);
299  198 if (childTerm != null)
300    {
301  155 termFound(child);
302    }
303    else
304    {
305  43 termNotFound(child);
306    }
307  198 Term parentTerm = getTerm(parent);
308   
309  198 return termIsA(childTerm, parentTerm);
310    }
311   
312    /**
313    * Records a valid term queried for, for reporting purposes
314    *
315    * @param term
316    */
 
317  186 toggle private void termFound(String term)
318    {
319  186 synchronized (termsFound)
320    {
321  186 if (!termsFound.contains(term))
322    {
323  44 termsFound.add(term);
324    }
325    }
326    }
327   
328    /**
329    * Records an invalid term queried for, for reporting purposes
330    *
331    * @param term
332    */
 
333  43 toggle private void termNotFound(String term)
334    {
335  43 synchronized (termsNotFound)
336    {
337  43 if (!termsNotFound.contains(term))
338    {
339  10 System.err.println("SO term " + term + " invalid");
340  10 termsNotFound.add(term);
341    }
342    }
343    }
344   
345    /**
346    * Returns true if the childTerm 'isA' parentTerm (directly or indirectly).
347    *
348    * @param childTerm
349    * @param parentTerm
350    * @return
351    */
 
352  1200 toggle protected synchronized boolean termIsA(Term childTerm, Term parentTerm)
353    {
354    /*
355    * null term could arise from a misspelled SO description
356    */
357  1200 if (childTerm == null || parentTerm == null)
358    {
359  44 return false;
360    }
361   
362    /*
363    * recursive search endpoint:
364    */
365  1156 if (childTerm == parentTerm)
366    {
367  64 return true;
368    }
369   
370    /*
371    * lazy initialisation - find all of a term's parents (recursively)
372    * the first time this is called, and save them in a map.
373    */
374  1092 if (!termIsA.containsKey(childTerm))
375    {
376  37 findParents(childTerm);
377    }
378   
379  1092 List<Term> parents = termIsA.get(childTerm);
380  1092 for (Term parent : parents)
381    {
382  1002 if (termIsA(parent, parentTerm))
383    {
384    /*
385    * add (great-)grandparents to parents list as they are discovered,
386    * for faster lookup next time
387    */
388  256 if (!parents.contains(parentTerm))
389    {
390  94 parents.add(parentTerm);
391    }
392  256 return true;
393    }
394    }
395   
396  836 return false;
397    }
398   
399    /**
400    * Finds all the 'isA' parents of the childTerm and stores them as a (possibly
401    * empty) list.
402    *
403    * @param childTerm
404    */
 
405  355 toggle protected synchronized void findParents(Term childTerm)
406    {
407  355 List<Term> result = new ArrayList<Term>();
408  355 for (Triple triple : ontology.getTriples(childTerm, null, isA))
409    {
410  318 Term parent = triple.getObject();
411  318 result.add(parent);
412   
413    /*
414    * and search for the parent's parents recursively
415    */
416  318 findParents(parent);
417    }
418  355 termIsA.put(childTerm, result);
419    }
420   
421    /**
422    * Returns the Term for a given name (e.g. "SO:0000735") or description (e.g.
423    * "sequence_location"), or null if not found.
424    *
425    * @param child
426    * @return
427    */
 
428  396 toggle protected Term getTerm(String nameOrDescription)
429    {
430  396 Term t = termsByDescription.get(nameOrDescription);
431  396 if (t == null)
432    {
433  61 try
434    {
435  61 t = ontology.getTerm(nameOrDescription);
436    } catch (NoSuchElementException e)
437    {
438    // not found
439    }
440    }
441  396 return t;
442    }
443   
 
444  0 toggle public boolean isSequenceVariant(String soTerm)
445    {
446  0 return isA(soTerm, SEQUENCE_VARIANT);
447    }
448   
449    /**
450    * Sorts (case-insensitive) and returns the list of valid terms queried for
451    */
 
452  0 toggle @Override
453    public List<String> termsFound()
454    {
455  0 synchronized (termsFound)
456    {
457  0 Collections.sort(termsFound, String.CASE_INSENSITIVE_ORDER);
458  0 return termsFound;
459    }
460    }
461   
462    /**
463    * Sorts (case-insensitive) and returns the list of invalid terms queried for
464    */
 
465  0 toggle @Override
466    public List<String> termsNotFound()
467    {
468  0 synchronized (termsNotFound)
469    {
470  0 Collections.sort(termsNotFound, String.CASE_INSENSITIVE_ORDER);
471  0 return termsNotFound;
472    }
473    }
474    }