Clover icon

Coverage Report

  1. Project Clover database Mon Nov 18 2024 09:38:20 GMT
  2. Package jalview.ext.so

File SequenceOntology.java

 

Coverage histogram

../../../img/srcFileCovDistChart9.png
7% of files have more coverage

Code metrics

42
100
18
1
475
277
47
0.47
5.56
18
2.61

Classes

Class Line # Actions
SequenceOntology 52 100 47
0.8437584.4%
 

Contributing tests

This file is covered by 26 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.ext.so;
22   
23    import java.io.BufferedInputStream;
24    import java.io.BufferedReader;
25    import java.io.IOException;
26    import java.io.InputStream;
27    import java.io.InputStreamReader;
28    import java.text.ParseException;
29    import java.util.ArrayList;
30    import java.util.Collections;
31    import java.util.HashMap;
32    import java.util.List;
33    import java.util.Map;
34    import java.util.NoSuchElementException;
35    import java.util.zip.ZipEntry;
36    import java.util.zip.ZipInputStream;
37   
38    import org.biojava.nbio.ontology.Ontology;
39    import org.biojava.nbio.ontology.Term;
40    import org.biojava.nbio.ontology.Term.Impl;
41    import org.biojava.nbio.ontology.Triple;
42    import org.biojava.nbio.ontology.io.OboParser;
43    import org.biojava.nbio.ontology.utils.Annotation;
44   
45    import jalview.bin.Console;
46    import jalview.io.gff.SequenceOntologyI;
47   
48    /**
49    * A wrapper class that parses the Sequence Ontology and exposes useful access
50    * methods. This version uses the BioJava parser.
51    */
 
52    public class SequenceOntology implements SequenceOntologyI
53    {
54    /*
55    * the parsed Ontology data as modelled by BioJava
56    */
57    private Ontology ontology;
58   
59    /*
60    * the ontology term for the isA relationship
61    */
62    private Term isA;
63   
64    /*
65    * lookup of terms by user readable name (NB not guaranteed unique)
66    */
67    private Map<String, Term> termsByDescription;
68   
69    /*
70    * Map where key is a Term and value is a (possibly empty) list of
71    * all Terms to which the key has an 'isA' relationship, either
72    * directly or indirectly (A isA B isA C)
73    */
74    private Map<Term, List<Term>> termIsA;
75   
76    private List<String> termsFound;
77   
78    private List<String> termsNotFound;
79   
80    /**
81    * Package private constructor to enforce use of singleton. Parses and caches
82    * the SO OBO data file.
83    */
 
84  48 toggle public SequenceOntology()
85    {
86  48 termsFound = new ArrayList<String>();
87  48 termsNotFound = new ArrayList<String>();
88  48 termsByDescription = new HashMap<String, Term>();
89  48 termIsA = new HashMap<Term, List<Term>>();
90   
91  48 loadOntologyZipFile("so-xp-simple.obo");
92    }
93   
94    /**
95    * Loads the given ontology file from a zip file with ".zip" appended
96    *
97    * @param ontologyFile
98    */
 
99  48 toggle protected void loadOntologyZipFile(String ontologyFile)
100    {
101  48 long now = System.currentTimeMillis();
102  48 ZipInputStream zipStream = null;
103  48 try
104    {
105  48 String zipFile = ontologyFile + ".zip";
106  48 InputStream inStream = this.getClass()
107    .getResourceAsStream("/" + zipFile);
108  48 zipStream = new ZipInputStream(new BufferedInputStream(inStream));
109  48 ZipEntry entry;
110  ? while ((entry = zipStream.getNextEntry()) != null)
111    {
112  144 if (entry.getName().equals(ontologyFile))
113    {
114  48 loadOboFile(zipStream);
115    }
116    }
117  48 long elapsed = System.currentTimeMillis() - now;
118  48 Console.info("Loaded Sequence Ontology from " + zipFile + " ("
119    + elapsed + "ms)");
120    } catch (Exception e)
121    {
122  0 e.printStackTrace();
123    } finally
124    {
125  48 closeStream(zipStream);
126    }
127    }
128   
129    /**
130    * Closes the input stream, swallowing all exceptions
131    *
132    * @param is
133    */
 
134  48 toggle protected void closeStream(InputStream is)
135    {
136  48 if (is != null)
137    {
138  48 try
139    {
140  48 is.close();
141    } catch (IOException e)
142    {
143    // ignore
144    }
145    }
146    }
147   
148    /**
149    * Reads, parses and stores the OBO file data
150    *
151    * @param is
152    * @throws ParseException
153    * @throws IOException
154    */
 
155  48 toggle protected void loadOboFile(InputStream is)
156    throws ParseException, IOException
157    {
158  48 BufferedReader oboFile = new BufferedReader(new InputStreamReader(is));
159  48 OboParser parser = new OboParser();
160  48 ontology = parser.parseOBO(oboFile, "SO", "the SO ontology");
161  48 isA = ontology.getTerm("is_a");
162  48 storeTermNames();
163    }
164   
165    /**
166    * Stores a lookup table of terms by description. Note that description is not
167    * guaranteed unique. Where duplicate descriptions are found, try to discard
168    * the term that is flagged as obsolete. However we do store obsolete terms
169    * where there is no duplication of description.
170    */
 
171  48 toggle protected void storeTermNames()
172    {
173  48 for (Term term : ontology.getTerms())
174    {
175  276096 if (term instanceof Impl)
176    {
177  125136 String description = term.getDescription();
178  125136 if (description != null)
179    {
180  109584 Term replaced = termsByDescription.get(description);
181  109584 if (replaced != null)
182    {
183  384 boolean newTermIsObsolete = isObsolete(term);
184  384 boolean oldTermIsObsolete = isObsolete(replaced);
185  384 if (newTermIsObsolete && !oldTermIsObsolete)
186    {
187  96 Console.debug("Ignoring " + term.getName()
188    + " as obsolete and duplicated by "
189    + replaced.getName());
190  96 term = replaced;
191    }
192  288 else if (!newTermIsObsolete && oldTermIsObsolete)
193    {
194  288 Console.debug("Ignoring " + replaced.getName()
195    + " as obsolete and duplicated by " + term.getName());
196    }
197    else
198    {
199  0 Console.debug("Warning: " + term.getName() + " has replaced "
200    + replaced.getName() + " for lookup of '"
201    + description + "'");
202    }
203    }
204  109584 termsByDescription.put(description, term);
205    }
206    }
207    }
208    }
209   
210    /**
211    * Answers true if the term has property "is_obsolete" with value true, else
212    * false
213    *
214    * @param term
215    * @return
216    */
 
217  768 toggle public static boolean isObsolete(Term term)
218    {
219  768 Annotation ann = term.getAnnotation();
220  768 if (ann != null)
221    {
222  768 try
223    {
224  384 if (Boolean.TRUE.equals(ann.getProperty("is_obsolete")))
225    {
226  384 return true;
227    }
228    } catch (NoSuchElementException e)
229    {
230    // fall through to false
231    }
232    }
233  384 return false;
234    }
235   
236    /**
237    * Test whether the given Sequence Ontology term is nucleotide_match (either
238    * directly or via is_a relationship)
239    *
240    * @param soTerm
241    * SO name or description
242    * @return
243    */
 
244  0 toggle public boolean isNucleotideMatch(String soTerm)
245    {
246  0 return isA(soTerm, NUCLEOTIDE_MATCH);
247    }
248   
249    /**
250    * Test whether the given Sequence Ontology term is protein_match (either
251    * directly or via is_a relationship)
252    *
253    * @param soTerm
254    * SO name or description
255    * @return
256    */
 
257  0 toggle public boolean isProteinMatch(String soTerm)
258    {
259  0 return isA(soTerm, PROTEIN_MATCH);
260    }
261   
262    /**
263    * Test whether the given Sequence Ontology term is polypeptide (either
264    * directly or via is_a relationship)
265    *
266    * @param soTerm
267    * SO name or description
268    * @return
269    */
 
270  0 toggle public boolean isPolypeptide(String soTerm)
271    {
272  0 return isA(soTerm, POLYPEPTIDE);
273    }
274   
275    /**
276    * Returns true if the given term has a (direct or indirect) 'isA'
277    * relationship with the parent
278    *
279    * @param child
280    * @param parent
281    * @return
282    */
 
283  44 toggle @Override
284    public boolean isA(String child, String parent)
285    {
286  44 if (child == null || parent == null)
287    {
288  3 return false;
289    }
290    /*
291    * optimise trivial checks like isA("CDS", "CDS")
292    */
293  41 if (child.equals(parent))
294    {
295  4 termFound(child);
296  4 return true;
297    }
298   
299  37 Term childTerm = getTerm(child);
300  37 if (childTerm != null)
301    {
302  36 termFound(child);
303    }
304    else
305    {
306  1 termNotFound(child);
307    }
308  37 Term parentTerm = getTerm(parent);
309   
310  37 return termIsA(childTerm, parentTerm);
311    }
312   
313    /**
314    * Records a valid term queried for, for reporting purposes
315    *
316    * @param term
317    */
 
318  40 toggle private void termFound(String term)
319    {
320  40 synchronized (termsFound)
321    {
322  40 if (!termsFound.contains(term))
323    {
324  28 termsFound.add(term);
325    }
326    }
327    }
328   
329    /**
330    * Records an invalid term queried for, for reporting purposes
331    *
332    * @param term
333    */
 
334  1 toggle private void termNotFound(String term)
335    {
336  1 synchronized (termsNotFound)
337    {
338  1 if (!termsNotFound.contains(term))
339    {
340  1 Console.error("SO term " + term + " invalid");
341  1 termsNotFound.add(term);
342    }
343    }
344    }
345   
346    /**
347    * Returns true if the childTerm 'isA' parentTerm (directly or indirectly).
348    *
349    * @param childTerm
350    * @param parentTerm
351    * @return
352    */
 
353  162 toggle protected synchronized boolean termIsA(Term childTerm, Term parentTerm)
354    {
355    /*
356    * null term could arise from a misspelled SO description
357    */
358  162 if (childTerm == null || parentTerm == null)
359    {
360  1 return false;
361    }
362   
363    /*
364    * recursive search endpoint:
365    */
366  161 if (childTerm == parentTerm)
367    {
368  32 return true;
369    }
370   
371    /*
372    * lazy initialisation - find all of a term's parents (recursively)
373    * the first time this is called, and save them in a map.
374    */
375  129 if (!termIsA.containsKey(childTerm))
376    {
377  24 findParents(childTerm);
378    }
379   
380  129 List<Term> parents = termIsA.get(childTerm);
381  129 for (Term parent : parents)
382    {
383  125 if (termIsA(parent, parentTerm))
384    {
385    /*
386    * add (great-)grandparents to parents list as they are discovered,
387    * for faster lookup next time
388    */
389  107 if (!parents.contains(parentTerm))
390    {
391  71 parents.add(parentTerm);
392    }
393  107 return true;
394    }
395    }
396   
397  22 return false;
398    }
399   
400    /**
401    * Finds all the 'isA' parents of the childTerm and stores them as a (possibly
402    * empty) list.
403    *
404    * @param childTerm
405    */
 
406  256 toggle protected synchronized void findParents(Term childTerm)
407    {
408  256 List<Term> result = new ArrayList<Term>();
409  256 for (Triple triple : ontology.getTriples(childTerm, null, isA))
410    {
411  232 Term parent = triple.getObject();
412  232 result.add(parent);
413   
414    /*
415    * and search for the parent's parents recursively
416    */
417  232 findParents(parent);
418    }
419  256 termIsA.put(childTerm, result);
420    }
421   
422    /**
423    * Returns the Term for a given name (e.g. "SO:0000735") or description (e.g.
424    * "sequence_location"), or null if not found.
425    *
426    * @param child
427    * @return
428    */
 
429  74 toggle protected Term getTerm(String nameOrDescription)
430    {
431  74 Term t = termsByDescription.get(nameOrDescription);
432  74 if (t == null)
433    {
434  16 try
435    {
436  16 t = ontology.getTerm(nameOrDescription);
437    } catch (NoSuchElementException e)
438    {
439    // not found
440    }
441    }
442  74 return t;
443    }
444   
 
445  0 toggle public boolean isSequenceVariant(String soTerm)
446    {
447  0 return isA(soTerm, SEQUENCE_VARIANT);
448    }
449   
450    /**
451    * Sorts (case-insensitive) and returns the list of valid terms queried for
452    */
 
453  0 toggle @Override
454    public List<String> termsFound()
455    {
456  0 synchronized (termsFound)
457    {
458  0 Collections.sort(termsFound, String.CASE_INSENSITIVE_ORDER);
459  0 return termsFound;
460    }
461    }
462   
463    /**
464    * Sorts (case-insensitive) and returns the list of invalid terms queried for
465    */
 
466  0 toggle @Override
467    public List<String> termsNotFound()
468    {
469  0 synchronized (termsNotFound)
470    {
471  0 Collections.sort(termsNotFound, String.CASE_INSENSITIVE_ORDER);
472  0 return termsNotFound;
473    }
474    }
475    }