Class |
Line # |
Actions |
|||
---|---|---|---|---|---|
DBRefUtils | 43 | 178 | 154 | ||
DBRefUtils.DbRefComp | 292 | 1 | 1 |
1 | /* | |
2 | * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) | |
3 | * Copyright (C) $$Year-Rel$$ The Jalview Authors | |
4 | * | |
5 | * This file is part of Jalview. | |
6 | * | |
7 | * Jalview is free software: you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation, either version 3 | |
10 | * of the License, or (at your option) any later version. | |
11 | * | |
12 | * Jalview is distributed in the hope that it will be useful, but | |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty | |
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR | |
15 | * PURPOSE. See the GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with Jalview. If not, see <http://www.gnu.org/licenses/>. | |
19 | * The Jalview Authors are detailed in the 'AUTHORS' file. | |
20 | */ | |
21 | package jalview.util; | |
22 | ||
23 | import java.util.ArrayList; | |
24 | import java.util.BitSet; | |
25 | import java.util.HashMap; | |
26 | import java.util.HashSet; | |
27 | import java.util.List; | |
28 | import java.util.Locale; | |
29 | import java.util.Map; | |
30 | ||
31 | import com.stevesoft.pat.Regex; | |
32 | ||
33 | import jalview.bin.Console; | |
34 | import jalview.datamodel.DBRefEntry; | |
35 | import jalview.datamodel.DBRefSource; | |
36 | import jalview.datamodel.Mapping; | |
37 | import jalview.datamodel.PDBEntry; | |
38 | import jalview.datamodel.SequenceI; | |
39 | ||
40 | /** | |
41 | * Utilities for handling DBRef objects and their collections. | |
42 | */ | |
43 | public class DBRefUtils | |
44 | { | |
45 | /* | |
46 | * lookup from lower-case form of a name to its canonical (standardised) form | |
47 | */ | |
48 | private static Map<String, String> canonicalSourceNameLookup = new HashMap<>(); | |
49 | ||
50 | public final static int DB_SOURCE = 1; | |
51 | ||
52 | public final static int DB_VERSION = 2; | |
53 | ||
54 | public final static int DB_ID = 4; | |
55 | ||
56 | public final static int DB_MAP = 8; | |
57 | ||
58 | public final static int SEARCH_MODE_NO_MAP_NO_VERSION = DB_SOURCE | DB_ID; | |
59 | ||
60 | public final static int SEARCH_MODE_FULL = DB_SOURCE | DB_VERSION | DB_ID | |
61 | | DB_MAP; | |
62 | ||
63 | 50 | static |
64 | { | |
65 | // TODO load these from a resource file? | |
66 | 50 | canonicalSourceNameLookup.put("uniprotkb/swiss-prot", |
67 | DBRefSource.UNIPROT); | |
68 | 50 | canonicalSourceNameLookup.put("uniprotkb/trembl", DBRefSource.UNIPROT); |
69 | ||
70 | // Ensembl values for dbname in xref REST service: | |
71 | 50 | canonicalSourceNameLookup.put("uniprot/sptrembl", DBRefSource.UNIPROT); |
72 | 50 | canonicalSourceNameLookup.put("uniprot/swissprot", DBRefSource.UNIPROT); |
73 | ||
74 | 50 | canonicalSourceNameLookup.put("pdb", DBRefSource.PDB); |
75 | 50 | canonicalSourceNameLookup.put("ensembl", DBRefSource.ENSEMBL); |
76 | // Ensembl Gn and Tr are for Ensembl genomic and transcript IDs as served | |
77 | // from ENA. | |
78 | 50 | canonicalSourceNameLookup.put("ensembl-tr", DBRefSource.ENSEMBL); |
79 | 50 | canonicalSourceNameLookup.put("ensembl-gn", DBRefSource.ENSEMBL); |
80 | ||
81 | // guarantee we always have lowercase entries for canonical string lookups | |
82 | 50 | for (String k : canonicalSourceNameLookup.keySet()) |
83 | { | |
84 | 400 | canonicalSourceNameLookup.put(k.toLowerCase(Locale.ROOT), |
85 | canonicalSourceNameLookup.get(k)); | |
86 | } | |
87 | } | |
88 | ||
89 | /** | |
90 | * Returns those DBRefEntry objects whose source identifier (once converted to | |
91 | * Jalview's canonical form) is in the list of sources to search for. Returns | |
92 | * null if no matches found. | |
93 | * | |
94 | * @param dbrefs | |
95 | * DBRefEntry objects to search | |
96 | * @param sources | |
97 | * array of sources to select | |
98 | * @return | |
99 | */ | |
100 | 13849 | public static List<DBRefEntry> selectRefs(List<DBRefEntry> dbrefs, |
101 | String[] sources) | |
102 | { | |
103 | 13849 | if (dbrefs == null || sources == null) |
104 | { | |
105 | 9938 | return dbrefs; |
106 | } | |
107 | ||
108 | // BH TODO (what?) | |
109 | 3911 | HashSet<String> srcs = new HashSet<String>(); |
110 | 3911 | for (String src : sources) |
111 | { | |
112 | 17413 | srcs.add(src.toUpperCase(Locale.ROOT)); |
113 | } | |
114 | ||
115 | 3911 | int nrefs = dbrefs.size(); |
116 | 3911 | List<DBRefEntry> res = new ArrayList<DBRefEntry>(); |
117 | 24754 | for (int ib = 0; ib < nrefs; ib++) |
118 | { | |
119 | 20843 | DBRefEntry dbr = dbrefs.get(ib); |
120 | 20843 | String source = getCanonicalName(dbr.getSource()); |
121 | 20843 | if (srcs.contains(source.toUpperCase(Locale.ROOT))) |
122 | { | |
123 | 3493 | res.add(dbr); |
124 | } | |
125 | } | |
126 | 3911 | if (res.size() > 0) |
127 | { | |
128 | // List<DBRefEntry> reply = new DBRefEntry[res.size()]; | |
129 | 2178 | return res;// .toArray(reply); |
130 | } | |
131 | 1733 | return null; |
132 | } | |
133 | ||
134 | 23 | private static boolean selectRefsBS(List<DBRefEntry> dbrefs, |
135 | int sourceKeys, BitSet bsSelect) | |
136 | { | |
137 | 23 | if (dbrefs == null || sourceKeys == 0) |
138 | { | |
139 | 0 | return false; |
140 | } | |
141 | 348 | for (int i = 0, n = dbrefs.size(); i < n; i++) |
142 | { | |
143 | 325 | DBRefEntry dbr = dbrefs.get(i); |
144 | 325 | if ((dbr.getSourceKey() & sourceKeys) != 0) |
145 | { | |
146 | 90 | bsSelect.clear(i); |
147 | } | |
148 | } | |
149 | 23 | return !bsSelect.isEmpty(); |
150 | } | |
151 | ||
152 | /** | |
153 | * Returns a (possibly empty) list of those references that match the given | |
154 | * entry, according to the given comparator. | |
155 | * | |
156 | * @param refs | |
157 | * an array of database references to search | |
158 | * @param entry | |
159 | * an entry to compare against | |
160 | * @param comparator | |
161 | * @return | |
162 | */ | |
163 | 0 | static List<DBRefEntry> searchRefs(DBRefEntry[] refs, DBRefEntry entry, |
164 | DbRefComp comparator) | |
165 | { | |
166 | 0 | List<DBRefEntry> rfs = new ArrayList<>(); |
167 | 0 | if (refs == null || entry == null) |
168 | { | |
169 | 0 | return rfs; |
170 | } | |
171 | 0 | for (int i = 0; i < refs.length; i++) |
172 | { | |
173 | 0 | if (comparator.matches(entry, refs[i])) |
174 | { | |
175 | 0 | rfs.add(refs[i]); |
176 | } | |
177 | } | |
178 | 0 | return rfs; |
179 | } | |
180 | ||
181 | /** | |
182 | * look up source in an internal list of database reference sources and return | |
183 | * the canonical jalview name for the source, or the original string if it has | |
184 | * no canonical form. | |
185 | * | |
186 | * @param source | |
187 | * @return canonical jalview source (one of jalview.datamodel.DBRefSource.*) | |
188 | * or original source | |
189 | */ | |
190 | 271830 | public static String getCanonicalName(String source) |
191 | { | |
192 | 271831 | if (source == null) |
193 | { | |
194 | 1 | return null; |
195 | } | |
196 | 271830 | String canonical = canonicalSourceNameLookup |
197 | .get(source.toLowerCase(Locale.ROOT)); | |
198 | 271830 | return canonical == null ? source : canonical; |
199 | } | |
200 | ||
201 | /** | |
202 | * Returns a (possibly empty) list of those references that match the given | |
203 | * entry. Currently uses a comparator which matches if | |
204 | * <ul> | |
205 | * <li>database sources are the same</li> | |
206 | * <li>accession ids are the same</li> | |
207 | * <li>both have no mapping, or the mappings are the same</li> | |
208 | * </ul> | |
209 | * | |
210 | * @param ref | |
211 | * Set of references to search | |
212 | * @param entry | |
213 | * pattern to match | |
214 | * @param mode | |
215 | * SEARCH_MODE_FULL for all; SEARCH_MODE_NO_MAP_NO_VERSION optional | |
216 | * @return | |
217 | */ | |
218 | 5048 | public static List<DBRefEntry> searchRefs(List<DBRefEntry> ref, |
219 | DBRefEntry entry, int mode) | |
220 | { | |
221 | 5048 | return searchRefs(ref, entry, |
222 | matchDbAndIdAndEitherMapOrEquivalentMapList, mode); | |
223 | } | |
224 | ||
225 | /** | |
226 | * Returns a list of those references that match the given accession id | |
227 | * <ul> | |
228 | * <li>database sources are the same</li> | |
229 | * <li>accession ids are the same</li> | |
230 | * <li>both have no mapping, or the mappings are the same</li> | |
231 | * </ul> | |
232 | * | |
233 | * @param refs | |
234 | * Set of references to search | |
235 | * @param accId | |
236 | * accession id to match | |
237 | * @return | |
238 | */ | |
239 | 6 | public static List<DBRefEntry> searchRefs(List<DBRefEntry> refs, |
240 | String accId) | |
241 | { | |
242 | 6 | List<DBRefEntry> rfs = new ArrayList<DBRefEntry>(); |
243 | 6 | if (refs == null || accId == null) |
244 | { | |
245 | 0 | return rfs; |
246 | } | |
247 | 22 | for (int i = 0, n = refs.size(); i < n; i++) |
248 | { | |
249 | 16 | DBRefEntry e = refs.get(i); |
250 | 16 | if (accId.equals(e.getAccessionId())) |
251 | { | |
252 | 8 | rfs.add(e); |
253 | } | |
254 | } | |
255 | 6 | return rfs; |
256 | // return searchRefs(refs, new DBRefEntry("", "", accId), matchId, | |
257 | // SEARCH_MODE_FULL); | |
258 | } | |
259 | ||
260 | /** | |
261 | * Returns a (possibly empty) list of those references that match the given | |
262 | * entry, according to the given comparator. | |
263 | * | |
264 | * @param refs | |
265 | * an array of database references to search | |
266 | * @param entry | |
267 | * an entry to compare against | |
268 | * @param comparator | |
269 | * @param mode | |
270 | * SEARCH_MODE_FULL for all; SEARCH_MODE_NO_MAP_NO_VERSION optional | |
271 | * @return | |
272 | */ | |
273 | 5048 | static List<DBRefEntry> searchRefs(List<DBRefEntry> refs, |
274 | DBRefEntry entry, DbRefComp comparator, int mode) | |
275 | { | |
276 | 5048 | List<DBRefEntry> rfs = new ArrayList<DBRefEntry>(); |
277 | 5048 | if (refs == null || entry == null) |
278 | { | |
279 | 1 | return rfs; |
280 | } | |
281 | 106882 | for (int i = 0, n = refs.size(); i < n; i++) |
282 | { | |
283 | 101835 | DBRefEntry e = refs.get(i); |
284 | 101835 | if (comparator.matches(entry, e, SEARCH_MODE_FULL)) |
285 | { | |
286 | 889 | rfs.add(e); |
287 | } | |
288 | } | |
289 | 5047 | return rfs; |
290 | } | |
291 | ||
292 | interface DbRefComp | |
293 | { | |
294 | 0 | default public boolean matches(DBRefEntry refa, DBRefEntry refb) |
295 | { | |
296 | 0 | return matches(refa, refb, SEARCH_MODE_FULL); |
297 | }; | |
298 | ||
299 | public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode); | |
300 | } | |
301 | ||
302 | /** | |
303 | * match on all non-null fields in refa | |
304 | */ | |
305 | // TODO unused - remove? would be broken by equating "" with null | |
306 | public static DbRefComp matchNonNullonA = new DbRefComp() | |
307 | { | |
308 | 0 | @Override |
309 | public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) | |
310 | { | |
311 | 0 | if ((mode & DB_SOURCE) != 0 && (refa.getSource() == null |
312 | || DBRefUtils.getCanonicalName(refb.getSource()).equals( | |
313 | DBRefUtils.getCanonicalName(refa.getSource())))) | |
314 | { | |
315 | 0 | if ((mode & DB_VERSION) != 0 && (refa.getVersion() == null |
316 | || refb.getVersion().equals(refa.getVersion()))) | |
317 | { | |
318 | 0 | if ((mode & DB_ID) != 0 && (refa.getAccessionId() == null |
319 | || refb.getAccessionId().equals(refa.getAccessionId()))) | |
320 | { | |
321 | 0 | if ((mode & DB_MAP) != 0 |
322 | && (refa.getMap() == null || (refb.getMap() != null | |
323 | && refb.getMap().equals(refa.getMap())))) | |
324 | { | |
325 | 0 | return true; |
326 | } | |
327 | } | |
328 | } | |
329 | } | |
330 | 0 | return false; |
331 | } | |
332 | }; | |
333 | ||
334 | /** | |
335 | * either field is null or field matches for all of source, version, accession | |
336 | * id and map. | |
337 | */ | |
338 | // TODO unused - remove? | |
339 | public static DbRefComp matchEitherNonNull = new DbRefComp() | |
340 | { | |
341 | 0 | @Override |
342 | public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) | |
343 | { | |
344 | 0 | if (nullOrEqualSource(refa.getSource(), refb.getSource()) |
345 | && nullOrEqual(refa.getVersion(), refb.getVersion()) | |
346 | && nullOrEqual(refa.getAccessionId(), refb.getAccessionId()) | |
347 | && nullOrEqual(refa.getMap(), refb.getMap())) | |
348 | { | |
349 | 0 | return true; |
350 | } | |
351 | 0 | return false; |
352 | } | |
353 | ||
354 | }; | |
355 | ||
356 | /** | |
357 | * Parses a DBRefEntry and adds it to the sequence, also a PDBEntry if the | |
358 | * database is PDB. | |
359 | * <p> | |
360 | * Used by file parsers to generate DBRefs from annotation within file (eg | |
361 | * Stockholm) | |
362 | * | |
363 | * @param dbname | |
364 | * @param version | |
365 | * @param acn | |
366 | * @param seq | |
367 | * where to annotate with reference | |
368 | * @return parsed version of entry that was added to seq (if any) | |
369 | */ | |
370 | 1551 | public static DBRefEntry parseToDbRef(SequenceI seq, String dbname, |
371 | String version, String acn) | |
372 | { | |
373 | 1551 | DBRefEntry ref = null; |
374 | 1551 | if (dbname != null) |
375 | { | |
376 | 1551 | String locsrc = DBRefUtils.getCanonicalName(dbname); |
377 | 1551 | if (locsrc.equals(DBRefSource.PDB)) |
378 | { | |
379 | /* | |
380 | * Check for PFAM style stockhom PDB accession id citation e.g. | |
381 | * "1WRI A; 7-80;" | |
382 | */ | |
383 | 27 | Regex r = new com.stevesoft.pat.Regex( |
384 | "([0-9][0-9A-Za-z]{3})\\s*(.?)\\s*;\\s*([0-9]+)-([0-9]+)"); | |
385 | 27 | if (r.search(acn.trim())) |
386 | { | |
387 | 27 | String pdbid = r.stringMatched(1); |
388 | 27 | String chaincode = r.stringMatched(2); |
389 | 27 | if (chaincode == null) |
390 | { | |
391 | 0 | chaincode = " "; |
392 | } | |
393 | // String mapstart = r.stringMatched(3); | |
394 | // String mapend = r.stringMatched(4); | |
395 | 27 | if (chaincode.equals(" ")) |
396 | { | |
397 | 0 | chaincode = "_"; |
398 | } | |
399 | // construct pdb ref. | |
400 | 27 | ref = new DBRefEntry(locsrc, version, pdbid + chaincode); |
401 | 27 | PDBEntry pdbr = new PDBEntry(); |
402 | 27 | pdbr.setId(pdbid); |
403 | 27 | pdbr.setType(PDBEntry.Type.PDB); |
404 | 27 | pdbr.setChainCode(chaincode); |
405 | 27 | seq.addPDBId(pdbr); |
406 | } | |
407 | else | |
408 | { | |
409 | 0 | jalview.bin.Console.errPrintln("Malformed PDB DR line:" + acn); |
410 | } | |
411 | } | |
412 | else | |
413 | { | |
414 | // default: | |
415 | 1524 | ref = new DBRefEntry(locsrc, version, acn.trim()); |
416 | } | |
417 | } | |
418 | 1551 | if (ref != null) |
419 | { | |
420 | 1551 | seq.addDBRef(ref); |
421 | } | |
422 | 1551 | return ref; |
423 | } | |
424 | ||
425 | /** | |
426 | * accession ID and DB must be identical. Version is ignored. Map is either | |
427 | * not defined or is a match (or is compatible?) | |
428 | */ | |
429 | // TODO unused - remove? | |
430 | public static DbRefComp matchDbAndIdAndEitherMap = new DbRefComp() | |
431 | { | |
432 | 0 | @Override |
433 | public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) | |
434 | { | |
435 | 0 | if (refa.getSource() != null && refb.getSource() != null |
436 | && DBRefUtils.getCanonicalName(refb.getSource()).equals( | |
437 | DBRefUtils.getCanonicalName(refa.getSource()))) | |
438 | { | |
439 | // We dont care about version | |
440 | 0 | if (refa.getAccessionId() != null && refb.getAccessionId() != null |
441 | // FIXME should be && not || here? | |
442 | || refb.getAccessionId().equals(refa.getAccessionId())) | |
443 | { | |
444 | 0 | if ((refa.getMap() == null || refb.getMap() == null) |
445 | || (refa.getMap() != null && refb.getMap() != null | |
446 | && refb.getMap().equals(refa.getMap()))) | |
447 | { | |
448 | 0 | return true; |
449 | } | |
450 | } | |
451 | } | |
452 | 0 | return false; |
453 | } | |
454 | }; | |
455 | ||
456 | /** | |
457 | * accession ID and DB must be identical. Version is ignored. No map on either | |
458 | * or map but no maplist on either or maplist of map on a is the complement of | |
459 | * maplist of map on b. | |
460 | */ | |
461 | // TODO unused - remove? | |
462 | public static DbRefComp matchDbAndIdAndComplementaryMapList = new DbRefComp() | |
463 | { | |
464 | 0 | @Override |
465 | public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) | |
466 | { | |
467 | 0 | if (refa.getSource() != null && refb.getSource() != null |
468 | && DBRefUtils.getCanonicalName(refb.getSource()).equals( | |
469 | DBRefUtils.getCanonicalName(refa.getSource()))) | |
470 | { | |
471 | // We dont care about version | |
472 | 0 | if (refa.getAccessionId() != null && refb.getAccessionId() != null |
473 | || refb.getAccessionId().equals(refa.getAccessionId())) | |
474 | { | |
475 | 0 | if ((refa.getMap() == null && refb.getMap() == null) |
476 | || (refa.getMap() != null && refb.getMap() != null)) | |
477 | { | |
478 | 0 | if ((refb.getMap().getMap() == null |
479 | && refa.getMap().getMap() == null) | |
480 | || (refb.getMap().getMap() != null | |
481 | && refa.getMap().getMap() != null | |
482 | && refb.getMap().getMap().getInverse() | |
483 | .equals(refa.getMap().getMap()))) | |
484 | { | |
485 | 0 | return true; |
486 | } | |
487 | } | |
488 | } | |
489 | } | |
490 | 0 | return false; |
491 | } | |
492 | }; | |
493 | ||
494 | /** | |
495 | * accession ID and DB must be identical. Version is ignored. No map on both | |
496 | * or or map but no maplist on either or maplist of map on a is equivalent to | |
497 | * the maplist of map on b. | |
498 | */ | |
499 | // TODO unused - remove? | |
500 | public static DbRefComp matchDbAndIdAndEquivalentMapList = new DbRefComp() | |
501 | { | |
502 | 0 | @Override |
503 | public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) | |
504 | { | |
505 | 0 | if (refa.getSource() != null && refb.getSource() != null |
506 | && DBRefUtils.getCanonicalName(refb.getSource()).equals( | |
507 | DBRefUtils.getCanonicalName(refa.getSource()))) | |
508 | { | |
509 | // We dont care about version | |
510 | // if ((refa.getVersion()==null || refb.getVersion()==null) | |
511 | // || refb.getVersion().equals(refa.getVersion())) | |
512 | // { | |
513 | 0 | if (refa.getAccessionId() != null && refb.getAccessionId() != null |
514 | || refb.getAccessionId().equals(refa.getAccessionId())) | |
515 | { | |
516 | 0 | if (refa.getMap() == null && refb.getMap() == null) |
517 | { | |
518 | 0 | return true; |
519 | } | |
520 | 0 | if (refa.getMap() != null && refb.getMap() != null |
521 | && ((refb.getMap().getMap() == null | |
522 | && refa.getMap().getMap() == null) | |
523 | || (refb.getMap().getMap() != null | |
524 | && refa.getMap().getMap() != null | |
525 | && refb.getMap().getMap() | |
526 | .equals(refa.getMap().getMap())))) | |
527 | { | |
528 | 0 | return true; |
529 | } | |
530 | } | |
531 | } | |
532 | 0 | return false; |
533 | } | |
534 | }; | |
535 | ||
536 | /** | |
537 | * accession ID and DB must be identical, or null on a. Version is ignored. No | |
538 | * map on either or map but no maplist on either or maplist of map on a is | |
539 | * equivalent to the maplist of map on b. | |
540 | */ | |
541 | public static DbRefComp matchDbAndIdAndEitherMapOrEquivalentMapList = new DbRefComp() | |
542 | { | |
543 | 101835 | @Override |
544 | public boolean matches(DBRefEntry refa, DBRefEntry refb, int mode) | |
545 | { | |
546 | 101835 | if (refa.getSource() != null && refb.getSource() != null |
547 | && DBRefUtils.getCanonicalName(refb.getSource()).equals( | |
548 | DBRefUtils.getCanonicalName(refa.getSource()))) | |
549 | { | |
550 | // We dont care about version | |
551 | 11597 | if (refa.getAccessionId() == null |
552 | || refa.getAccessionId().equals(refb.getAccessionId())) | |
553 | { | |
554 | 912 | if (refa.getMap() == null || refb.getMap() == null) |
555 | { | |
556 | 888 | return true; |
557 | } | |
558 | 24 | if ((refa.getMap() != null && refb.getMap() != null) |
559 | && (refb.getMap().getMap() == null | |
560 | && refa.getMap().getMap() == null) | |
561 | || (refb.getMap().getMap() != null | |
562 | && refa.getMap().getMap() != null | |
563 | && (refb.getMap().getMap() | |
564 | .equals(refa.getMap().getMap())))) | |
565 | { | |
566 | 1 | return true; |
567 | } | |
568 | } | |
569 | } | |
570 | 100946 | return false; |
571 | } | |
572 | }; | |
573 | ||
574 | /** | |
575 | * Returns the (possibly empty) list of those supplied dbrefs which have the | |
576 | * specified source database, with a case-insensitive match of source name | |
577 | * | |
578 | * @param dbRefs | |
579 | * @param source | |
580 | * @return | |
581 | */ | |
582 | 0 | public static List<DBRefEntry> searchRefsForSource(DBRefEntry[] dbRefs, |
583 | String source) | |
584 | { | |
585 | 0 | List<DBRefEntry> matches = new ArrayList<>(); |
586 | 0 | if (dbRefs != null && source != null) |
587 | { | |
588 | 0 | for (DBRefEntry dbref : dbRefs) |
589 | { | |
590 | 0 | if (source.equalsIgnoreCase(dbref.getSource())) |
591 | { | |
592 | 0 | matches.add(dbref); |
593 | } | |
594 | } | |
595 | } | |
596 | 0 | return matches; |
597 | } | |
598 | ||
599 | /** | |
600 | * Returns true if either object is null, or they are equal | |
601 | * | |
602 | * @param o1 | |
603 | * @param o2 | |
604 | * @return | |
605 | */ | |
606 | 0 | public static boolean nullOrEqual(Object o1, Object o2) |
607 | { | |
608 | 0 | if (o1 == null || o2 == null) |
609 | { | |
610 | 0 | return true; |
611 | } | |
612 | 0 | return o1.equals(o2); |
613 | } | |
614 | ||
615 | /** | |
616 | * canonicalise source string before comparing. null is always wildcard | |
617 | * | |
618 | * @param o1 | |
619 | * - null or source string to compare | |
620 | * @param o2 | |
621 | * - null or source string to compare | |
622 | * @return true if either o1 or o2 are null, or o1 equals o2 under | |
623 | * DBRefUtils.getCanonicalName | |
624 | * (o1).equals(DBRefUtils.getCanonicalName(o2)) | |
625 | */ | |
626 | 0 | public static boolean nullOrEqualSource(String o1, String o2) |
627 | { | |
628 | 0 | if (o1 == null || o2 == null) |
629 | { | |
630 | 0 | return true; |
631 | } | |
632 | 0 | return DBRefUtils.getCanonicalName(o1) |
633 | .equals(DBRefUtils.getCanonicalName(o2)); | |
634 | } | |
635 | ||
636 | /** | |
637 | * Selects just the DNA or protein references from a set of references | |
638 | * | |
639 | * @param selectDna | |
640 | * if true, select references to 'standard' DNA databases, else to | |
641 | * 'standard' peptide databases | |
642 | * @param refs | |
643 | * a set of references to select from | |
644 | * @return | |
645 | */ | |
646 | 13752 | public static List<DBRefEntry> selectDbRefs(boolean selectDna, |
647 | List<DBRefEntry> refs) | |
648 | { | |
649 | 13752 | return selectRefs(refs, |
650 | 13752 | selectDna ? DBRefSource.DNACODINGDBS : DBRefSource.PROTEINDBS); |
651 | // could attempt to find other cross | |
652 | // refs here - ie PDB xrefs | |
653 | // (not dna, not protein seq) | |
654 | } | |
655 | ||
656 | /** | |
657 | * Returns the (possibly empty) list of those supplied dbrefs which have the | |
658 | * specified source database, with a case-insensitive match of source name | |
659 | * | |
660 | * @param dbRefs | |
661 | * @param source | |
662 | * @return | |
663 | */ | |
664 | 48 | public static List<DBRefEntry> searchRefsForSource( |
665 | List<DBRefEntry> dbRefs, String source) | |
666 | { | |
667 | 48 | List<DBRefEntry> matches = new ArrayList<DBRefEntry>(); |
668 | 48 | if (dbRefs != null && source != null) |
669 | { | |
670 | 46 | for (DBRefEntry dbref : dbRefs) |
671 | { | |
672 | 129 | if (source.equalsIgnoreCase(dbref.getSource())) |
673 | { | |
674 | 48 | matches.add(dbref); |
675 | } | |
676 | } | |
677 | } | |
678 | 48 | return matches; |
679 | } | |
680 | ||
681 | /** | |
682 | * promote direct database references to primary for nucleotide or protein | |
683 | * sequences if they have an appropriate primary ref | |
684 | * <table> | |
685 | * <tr> | |
686 | * <th>Seq Type</th> | |
687 | * <th>Primary DB</th> | |
688 | * <th>Direct which will be promoted</th> | |
689 | * </tr> | |
690 | * <tr align=center> | |
691 | * <td>peptides</td> | |
692 | * <td>Ensembl</td> | |
693 | * <td>Uniprot</td> | |
694 | * </tr> | |
695 | * <tr align=center> | |
696 | * <td>peptides</td> | |
697 | * <td>Ensembl</td> | |
698 | * <td>Uniprot</td> | |
699 | * </tr> | |
700 | * <tr align=center> | |
701 | * <td>dna</td> | |
702 | * <td>Ensembl</td> | |
703 | * <td>ENA</td> | |
704 | * </tr> | |
705 | * </table> | |
706 | * | |
707 | * @param sequence | |
708 | */ | |
709 | 288 | public static void ensurePrimaries(SequenceI sequence, |
710 | List<DBRefEntry> pr) | |
711 | { | |
712 | 288 | if (pr.size() == 0) |
713 | { | |
714 | // nothing to do | |
715 | 268 | return; |
716 | } | |
717 | 20 | int sstart = sequence.getStart(); |
718 | 20 | int send = sequence.getEnd(); |
719 | 20 | boolean isProtein = sequence.isProtein(); |
720 | 20 | BitSet bsSelect = new BitSet(); |
721 | ||
722 | // List<DBRefEntry> selfs = new ArrayList<DBRefEntry>(); | |
723 | // { | |
724 | ||
725 | // List<DBRefEntry> selddfs = selectDbRefs(!isprot, sequence.getDBRefs()); | |
726 | // if (selfs == null || selfs.size() == 0) | |
727 | // { | |
728 | // // nothing to do | |
729 | // return; | |
730 | // } | |
731 | ||
732 | 20 | List<DBRefEntry> dbrefs = sequence.getDBRefs(); |
733 | 20 | bsSelect.set(0, dbrefs.size()); |
734 | ||
735 | 20 | if (!selectRefsBS(dbrefs, isProtein ? DBRefSource.PROTEIN_MASK |
736 | : DBRefSource.DNA_CODING_MASK, bsSelect)) | |
737 | 2 | return; |
738 | ||
739 | // selfs.addAll(selfArray); | |
740 | // } | |
741 | ||
742 | // filter non-primary refs | |
743 | 42 | for (int ip = pr.size(); --ip >= 0;) |
744 | { | |
745 | 24 | DBRefEntry p = pr.get(ip); |
746 | 268 | for (int i = bsSelect.nextSetBit(0); i >= 0; i = bsSelect |
747 | .nextSetBit(i + 1)) | |
748 | { | |
749 | 244 | if (dbrefs.get(i) == p) |
750 | 3 | bsSelect.clear(i); |
751 | } | |
752 | // while (selfs.contains(p)) | |
753 | // { | |
754 | // selfs.remove(p); | |
755 | // } | |
756 | } | |
757 | // List<DBRefEntry> toPromote = new ArrayList<DBRefEntry>(); | |
758 | ||
759 | 20 | for (int ip = pr.size(), keys = 0; --ip >= 0 |
760 | && keys != DBRefSource.PRIMARY_MASK;) | |
761 | { | |
762 | 19 | DBRefEntry p = pr.get(ip); |
763 | 19 | if (isProtein) |
764 | { | |
765 | 5 | switch (getCanonicalName(p.getSource())) |
766 | { | |
767 | 3 | case DBRefSource.UNIPROT: |
768 | 3 | keys |= DBRefSource.UNIPROT_MASK; |
769 | 3 | break; |
770 | 0 | case DBRefSource.ENSEMBL: |
771 | 0 | keys |= DBRefSource.ENSEMBL_MASK; |
772 | 0 | break; |
773 | } | |
774 | } | |
775 | else | |
776 | { | |
777 | // TODO: promote transcript refs ?? | |
778 | } | |
779 | 19 | if (keys == 0 || !selectRefsBS(dbrefs, keys, bsSelect)) |
780 | 17 | return; |
781 | // if (candidates != null) | |
782 | { | |
783 | 8 | for (int ic = bsSelect.nextSetBit(0); ic >= 0; ic = bsSelect |
784 | .nextSetBit(ic + 1)) | |
785 | // for (int ic = 0, n = candidates.size(); ic < n; ic++) | |
786 | { | |
787 | 6 | DBRefEntry cand = dbrefs.get(ic);// candidates.get(ic); |
788 | 6 | if (cand.hasMap()) |
789 | { | |
790 | 0 | Mapping map = cand.getMap(); |
791 | 0 | SequenceI cto = map.getTo(); |
792 | 0 | if (cto != null && cto != sequence) |
793 | { | |
794 | // can't promote refs with mappings to other sequences | |
795 | 0 | continue; |
796 | } | |
797 | 0 | MapList mlist = map.getMap(); |
798 | 0 | if (mlist.getFromLowest() != sstart |
799 | && mlist.getFromHighest() != send) | |
800 | { | |
801 | // can't promote refs with mappings from a region of this sequence | |
802 | // - eg CDS | |
803 | 0 | continue; |
804 | } | |
805 | } | |
806 | // and promote - not that version must be non-null here, | |
807 | // as p must have passed isPrimaryCandidate() | |
808 | 6 | cand.setVersion(cand.getVersion() + " (promoted)"); |
809 | 6 | bsSelect.clear(ic); |
810 | // selfs.remove(cand); | |
811 | // toPromote.add(cand); | |
812 | 6 | if (!cand.isPrimaryCandidate()) |
813 | { | |
814 | 3 | if (Console.isDebugEnabled()) |
815 | { | |
816 | 0 | Console.debug( |
817 | "Warning: Couldn't promote dbref " + cand.toString() | |
818 | + " for sequence " + sequence.toString()); | |
819 | } | |
820 | } | |
821 | } | |
822 | } | |
823 | } | |
824 | } | |
825 | ||
826 | } |