Class |
Line # |
Actions |
|||
---|---|---|---|---|---|
DBRefFetcher | 64 | 240 | 97 | ||
DBRefFetcher.FetchFinishedListenerI | 70 | 0 | 0 |
1 | /* | |
2 | * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) | |
3 | * Copyright (C) $$Year-Rel$$ The Jalview Authors | |
4 | * | |
5 | * This file is part of Jalview. | |
6 | * | |
7 | * Jalview is free software: you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation, either version 3 | |
10 | * of the License, or (at your option) any later version. | |
11 | * | |
12 | * Jalview is distributed in the hope that it will be useful, but | |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty | |
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR | |
15 | * PURPOSE. See the GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with Jalview. If not, see <http://www.gnu.org/licenses/>. | |
19 | * The Jalview Authors are detailed in the 'AUTHORS' file. | |
20 | */ | |
21 | package jalview.ws; | |
22 | ||
23 | import java.util.Locale; | |
24 | ||
25 | import java.util.ArrayList; | |
26 | import java.util.Arrays; | |
27 | import java.util.Enumeration; | |
28 | import java.util.HashMap; | |
29 | import java.util.Hashtable; | |
30 | import java.util.List; | |
31 | import java.util.Map; | |
32 | import java.util.StringTokenizer; | |
33 | import java.util.Vector; | |
34 | import java.util.regex.Matcher; | |
35 | import java.util.regex.Pattern; | |
36 | ||
37 | import jalview.analysis.AlignSeq; | |
38 | import jalview.api.FeatureSettingsModelI; | |
39 | import jalview.bin.Cache; | |
40 | import jalview.bin.Console; | |
41 | import jalview.datamodel.AlignmentI; | |
42 | import jalview.datamodel.DBRefEntry; | |
43 | import jalview.datamodel.DBRefSource; | |
44 | import jalview.datamodel.Mapping; | |
45 | import jalview.datamodel.SequenceI; | |
46 | import jalview.gui.CutAndPasteTransfer; | |
47 | import jalview.gui.Desktop; | |
48 | import jalview.gui.FeatureSettings; | |
49 | import jalview.gui.IProgressIndicator; | |
50 | import jalview.gui.OOMWarning; | |
51 | import jalview.util.DBRefUtils; | |
52 | import jalview.util.MessageManager; | |
53 | import jalview.ws.seqfetcher.DbSourceProxy; | |
54 | import uk.ac.ebi.picr.model.UPEntry; | |
55 | import uk.ac.ebi.www.picr.AccessionMappingService.AccessionMapperServiceLocator; | |
56 | ||
57 | /** | |
58 | * Implements a runnable for validating a sequence against external databases | |
59 | * and then propagating references and features onto the sequence(s) | |
60 | * | |
61 | * @author $author$ | |
62 | * @version $Revision$ | |
63 | */ | |
64 | public class DBRefFetcher implements Runnable | |
65 | { | |
66 | private static final String NEWLINE = System.lineSeparator(); | |
67 | ||
68 | public static final String TRIM_RETRIEVED_SEQUENCES = "TRIM_FETCHED_DATASET_SEQS"; | |
69 | ||
70 | public interface FetchFinishedListenerI | |
71 | { | |
72 | void finished(); | |
73 | } | |
74 | ||
75 | SequenceI[] dataset; | |
76 | ||
77 | IProgressIndicator progressWindow; | |
78 | ||
79 | CutAndPasteTransfer output = new CutAndPasteTransfer(); | |
80 | ||
81 | /** | |
82 | * picr client instance | |
83 | */ | |
84 | uk.ac.ebi.www.picr.AccessionMappingService.AccessionMapperInterface picrClient = null; | |
85 | ||
86 | // This will be a collection of Vectors of sequenceI refs. | |
87 | // The key will be the seq name or accession id of the seq | |
88 | Hashtable<String, Vector<SequenceI>> seqRefs; | |
89 | ||
90 | DbSourceProxy[] dbSources; | |
91 | ||
92 | SequenceFetcher sfetcher; | |
93 | ||
94 | private List<FetchFinishedListenerI> listeners; | |
95 | ||
96 | private SequenceI[] alseqs; | |
97 | ||
98 | /* | |
99 | * when true - retrieved sequences will be trimmed to cover longest derived | |
100 | * alignment sequence | |
101 | */ | |
102 | private boolean trimDsSeqs = true; | |
103 | ||
104 | /** | |
105 | * Creates a new DBRefFetcher object and fetches from the currently selected | |
106 | * set of databases, if this is null then it fetches based on feature settings | |
107 | * | |
108 | * @param seqs | |
109 | * fetch references for these SequenceI array | |
110 | * @param progressIndicatorFrame | |
111 | * the frame for progress bar monitoring | |
112 | * @param sources | |
113 | * array of DbSourceProxy to query references form | |
114 | * @param featureSettings | |
115 | * FeatureSettings to get alternative DbSourceProxy from | |
116 | * @param isNucleotide | |
117 | * indicates if the array of SequenceI are Nucleotides or not | |
118 | */ | |
119 | 0 | public DBRefFetcher(SequenceI[] seqs, |
120 | IProgressIndicator progressIndicatorFrame, | |
121 | DbSourceProxy[] sources, FeatureSettings featureSettings, | |
122 | boolean isNucleotide) | |
123 | { | |
124 | 0 | listeners = new ArrayList<>(); |
125 | 0 | this.progressWindow = progressIndicatorFrame; |
126 | 0 | alseqs = new SequenceI[seqs.length]; |
127 | 0 | SequenceI[] ds = new SequenceI[seqs.length]; |
128 | 0 | for (int i = 0; i < seqs.length; i++) |
129 | { | |
130 | 0 | alseqs[i] = seqs[i]; |
131 | 0 | if (seqs[i].getDatasetSequence() != null) |
132 | { | |
133 | 0 | ds[i] = seqs[i].getDatasetSequence(); |
134 | } | |
135 | else | |
136 | { | |
137 | 0 | ds[i] = seqs[i]; |
138 | } | |
139 | } | |
140 | 0 | this.dataset = ds; |
141 | // TODO Jalview 2.5 lots of this code should be in the gui package! | |
142 | 0 | sfetcher = jalview.gui.SequenceFetcher.getSequenceFetcherSingleton(); |
143 | // set default behaviour for transferring excess sequence data to the | |
144 | // dataset | |
145 | 0 | trimDsSeqs = Cache.getDefault(TRIM_RETRIEVED_SEQUENCES, true); |
146 | 0 | if (sources == null) |
147 | { | |
148 | 0 | setDatabaseSources(featureSettings, isNucleotide); |
149 | } | |
150 | else | |
151 | { | |
152 | // we assume the caller knows what they're doing and ensured that all the | |
153 | // db source names are valid | |
154 | 0 | dbSources = sources; |
155 | } | |
156 | } | |
157 | ||
158 | /** | |
159 | * Helper method to configure the list of database sources to query | |
160 | * | |
161 | * @param featureSettings | |
162 | * @param forNucleotide | |
163 | */ | |
164 | 0 | void setDatabaseSources(FeatureSettings featureSettings, |
165 | boolean forNucleotide) | |
166 | { | |
167 | // af.featureSettings_actionPerformed(null); | |
168 | 0 | String[] defdb = null; |
169 | 0 | List<DbSourceProxy> selsources = new ArrayList<>(); |
170 | // select appropriate databases based on alignFrame context. | |
171 | 0 | if (forNucleotide) |
172 | { | |
173 | 0 | defdb = DBRefSource.DNACODINGDBS; |
174 | } | |
175 | else | |
176 | { | |
177 | 0 | defdb = DBRefSource.PROTEINDBS; |
178 | } | |
179 | 0 | List<DbSourceProxy> srces = new ArrayList<>(); |
180 | 0 | for (String ddb : defdb) |
181 | { | |
182 | 0 | List<DbSourceProxy> srcesfordb = sfetcher.getSourceProxy(ddb); |
183 | 0 | if (srcesfordb != null) |
184 | { | |
185 | 0 | for (DbSourceProxy src : srcesfordb) |
186 | { | |
187 | 0 | if (!srces.contains(src)) |
188 | { | |
189 | 0 | srces.addAll(srcesfordb); |
190 | } | |
191 | } | |
192 | } | |
193 | } | |
194 | // append the PDB data source, since it is 'special', catering for both | |
195 | // nucleotide and protein | |
196 | // srces.addAll(sfetcher.getSourceProxy(DBRefSource.PDB)); | |
197 | ||
198 | 0 | srces.addAll(selsources); |
199 | 0 | dbSources = srces.toArray(new DbSourceProxy[srces.size()]); |
200 | } | |
201 | ||
202 | /** | |
203 | * Constructor with only sequences provided | |
204 | * | |
205 | * @param sequences | |
206 | */ | |
207 | 0 | public DBRefFetcher(SequenceI[] sequences) |
208 | { | |
209 | 0 | this(sequences, null, null, null, false); |
210 | } | |
211 | ||
212 | /** | |
213 | * Add a listener to be notified when sequence fetching is complete | |
214 | * | |
215 | * @param l | |
216 | */ | |
217 | 0 | public void addListener(FetchFinishedListenerI l) |
218 | { | |
219 | 0 | listeners.add(l); |
220 | } | |
221 | ||
222 | /** | |
223 | * start the fetcher thread | |
224 | * | |
225 | * @param waitTillFinished | |
226 | * true to block until the fetcher has finished | |
227 | */ | |
228 | 0 | public void fetchDBRefs(boolean waitTillFinished) |
229 | { | |
230 | 0 | if (waitTillFinished) |
231 | { | |
232 | 0 | run(); |
233 | } | |
234 | else | |
235 | { | |
236 | 0 | new Thread(this).start(); |
237 | } | |
238 | } | |
239 | ||
240 | /** | |
241 | * The sequence will be added to a vector of sequences belonging to key which | |
242 | * could be either seq name or dbref id | |
243 | * | |
244 | * @param seq | |
245 | * SequenceI | |
246 | * @param key | |
247 | * String | |
248 | */ | |
249 | 0 | void addSeqId(SequenceI seq, String key) |
250 | { | |
251 | 0 | key = key.toUpperCase(Locale.ROOT); |
252 | ||
253 | 0 | Vector<SequenceI> seqs; |
254 | 0 | if (seqRefs.containsKey(key)) |
255 | { | |
256 | 0 | seqs = seqRefs.get(key); |
257 | ||
258 | 0 | if (seqs != null && !seqs.contains(seq)) |
259 | { | |
260 | 0 | seqs.addElement(seq); |
261 | } | |
262 | 0 | else if (seqs == null) |
263 | { | |
264 | 0 | seqs = new Vector<>(); |
265 | 0 | seqs.addElement(seq); |
266 | } | |
267 | ||
268 | } | |
269 | else | |
270 | { | |
271 | 0 | seqs = new Vector<>(); |
272 | 0 | seqs.addElement(seq); |
273 | } | |
274 | ||
275 | 0 | seqRefs.put(key, seqs); |
276 | } | |
277 | ||
278 | /** | |
279 | * DOCUMENT ME! | |
280 | */ | |
281 | 0 | @Override |
282 | public void run() | |
283 | { | |
284 | 0 | if (dbSources == null) |
285 | { | |
286 | 0 | throw new Error(MessageManager |
287 | .getString("error.implementation_error_must_init_dbsources")); | |
288 | } | |
289 | 0 | long startTime = System.currentTimeMillis(); |
290 | 0 | if (progressWindow != null) |
291 | { | |
292 | 0 | progressWindow.setProgressBar( |
293 | MessageManager.getString("status.fetching_db_refs"), | |
294 | startTime); | |
295 | } | |
296 | 0 | try |
297 | { | |
298 | 0 | if (Cache.getDefault("DBREFFETCH_USEPICR", false)) |
299 | { | |
300 | 0 | picrClient = new AccessionMapperServiceLocator() |
301 | .getAccessionMapperPort(); | |
302 | } | |
303 | } catch (Exception e) | |
304 | { | |
305 | 0 | jalview.bin.Console |
306 | .errPrintln("Couldn't locate PICR service instance.\n"); | |
307 | 0 | e.printStackTrace(); |
308 | } | |
309 | ||
310 | 0 | Vector<SequenceI> sdataset = new Vector<>(Arrays.asList(dataset)); |
311 | 0 | List<String> warningMessages = new ArrayList<>(); |
312 | ||
313 | // clear any old feature display settings recorded from past sessions | |
314 | 0 | featureDisplaySettings = null; |
315 | ||
316 | 0 | int db = 0; |
317 | 0 | while (sdataset.size() > 0 && db < dbSources.length) |
318 | { | |
319 | 0 | int maxqlen = 1; // default number of queries made at one time |
320 | 0 | jalview.bin.Console |
321 | .outPrintln("Verifying against " + dbSources[db].getDbName()); | |
322 | ||
323 | // iterate through db for each remaining un-verified sequence | |
324 | 0 | SequenceI[] currSeqs = new SequenceI[sdataset.size()]; |
325 | 0 | sdataset.copyInto(currSeqs);// seqs that are to be validated against |
326 | // dbSources[db] | |
327 | 0 | Vector<String> queries = new Vector<>(); // generated queries curSeq |
328 | 0 | seqRefs = new Hashtable<>(); |
329 | ||
330 | 0 | int seqIndex = 0; |
331 | ||
332 | 0 | DbSourceProxy dbsource = dbSources[db]; |
333 | // for moment, we dumbly iterate over all retrieval sources for a | |
334 | // particular database | |
335 | // TODO: introduce multithread multisource queries and logic to remove a | |
336 | // query from other sources if any source for a database returns a | |
337 | // record | |
338 | 0 | maxqlen = dbsource.getMaximumQueryCount(); |
339 | ||
340 | 0 | while (queries.size() > 0 || seqIndex < currSeqs.length) |
341 | { | |
342 | 0 | if (queries.size() > 0) |
343 | { | |
344 | // Still queries to make for current seqIndex | |
345 | 0 | StringBuffer queryString = new StringBuffer(""); |
346 | 0 | int numq = 0; |
347 | 0 | int nqSize = (maxqlen > queries.size()) ? queries.size() |
348 | : maxqlen; | |
349 | ||
350 | 0 | while (queries.size() > 0 && numq < nqSize) |
351 | { | |
352 | 0 | String query = queries.elementAt(0); |
353 | 0 | if (dbsource.isValidReference(query)) |
354 | { | |
355 | 0 | queryString.append( |
356 | 0 | (numq == 0) ? "" : dbsource.getAccessionSeparator()); |
357 | 0 | queryString.append(query); |
358 | 0 | numq++; |
359 | } | |
360 | // remove the extracted query string | |
361 | 0 | queries.removeElementAt(0); |
362 | } | |
363 | // make the queries and process the response | |
364 | 0 | AlignmentI retrieved = null; |
365 | 0 | try |
366 | { | |
367 | 0 | if (Console.isDebugEnabled()) |
368 | { | |
369 | 0 | Console.debug("Querying " + dbsource.getDbName() + " with : '" |
370 | + queryString.toString() + "'"); | |
371 | } | |
372 | 0 | retrieved = dbsource.getSequenceRecords(queryString.toString()); |
373 | } catch (Exception ex) | |
374 | { | |
375 | 0 | ex.printStackTrace(); |
376 | } catch (OutOfMemoryError err) | |
377 | { | |
378 | 0 | new OOMWarning("retrieving database references (" |
379 | + queryString.toString() + ")", err); | |
380 | } | |
381 | 0 | if (retrieved != null) |
382 | { | |
383 | 0 | transferReferences(sdataset, dbsource, retrieved, trimDsSeqs, |
384 | warningMessages); | |
385 | } | |
386 | } | |
387 | else | |
388 | { | |
389 | // make some more strings for use as queries | |
390 | 0 | for (int i = 0; (seqIndex < dataset.length) |
391 | && (i < 50); seqIndex++, i++) | |
392 | { | |
393 | 0 | SequenceI sequence = dataset[seqIndex]; |
394 | 0 | List<DBRefEntry> uprefs = DBRefUtils |
395 | .selectRefs(sequence.getDBRefs(), new String[] | |
396 | { dbsource.getDbSource() }); // jalview.datamodel.DBRefSource.UNIPROT | |
397 | // }); | |
398 | // check for existing dbrefs to use | |
399 | 0 | if (uprefs != null && uprefs.size() > 0) |
400 | { | |
401 | 0 | for (int j = 0, n = uprefs.size(); j < n; j++) |
402 | { | |
403 | 0 | DBRefEntry upref = uprefs.get(j); |
404 | 0 | addSeqId(sequence, upref.getAccessionId()); |
405 | 0 | queries.addElement( |
406 | upref.getAccessionId().toUpperCase(Locale.ROOT)); | |
407 | } | |
408 | } | |
409 | else | |
410 | { | |
411 | 0 | Pattern possibleIds = Pattern.compile("[A-Za-z0-9_]+"); |
412 | // generate queries from sequence ID string | |
413 | 0 | Matcher tokens = possibleIds.matcher(sequence.getName()); |
414 | 0 | int p = 0; |
415 | 0 | while (tokens.find(p)) |
416 | { | |
417 | 0 | String token = tokens.group(); |
418 | 0 | p = tokens.end(); |
419 | 0 | UPEntry[] presp = null; |
420 | 0 | if (picrClient != null) |
421 | { | |
422 | // resolve the string against PICR to recover valid IDs | |
423 | 0 | try |
424 | { | |
425 | 0 | presp = picrClient.getUPIForAccession(token, null, |
426 | picrClient.getMappedDatabaseNames(), null, | |
427 | true); | |
428 | } catch (Exception e) | |
429 | { | |
430 | 0 | jalview.bin.Console.errPrintln( |
431 | "Exception with Picr for '" + token + "'\n"); | |
432 | 0 | e.printStackTrace(); |
433 | } | |
434 | } | |
435 | 0 | if (presp != null && presp.length > 0) |
436 | { | |
437 | 0 | for (int id = 0; id < presp.length; id++) |
438 | { | |
439 | // construct sequences from response if sequences are | |
440 | // present, and do a transferReferences | |
441 | // otherwise transfer non sequence x-references directly. | |
442 | } | |
443 | 0 | jalview.bin.Console.outPrintln( |
444 | "Validated ID against PICR... (for what its worth):" | |
445 | + token); | |
446 | 0 | addSeqId(sequence, token); |
447 | 0 | queries.addElement(token.toUpperCase(Locale.ROOT)); |
448 | } | |
449 | else | |
450 | { | |
451 | // if () | |
452 | // jalview.bin.Console.outPrintln("Not querying source with | |
453 | // token="+token+"\n"); | |
454 | 0 | addSeqId(sequence, token); |
455 | 0 | queries.addElement(token.toUpperCase(Locale.ROOT)); |
456 | } | |
457 | } | |
458 | } | |
459 | } | |
460 | } | |
461 | } | |
462 | // advance to next database | |
463 | 0 | db++; |
464 | } // all databases have been queried | |
465 | 0 | if (!warningMessages.isEmpty()) |
466 | { | |
467 | 0 | StringBuilder sb = new StringBuilder(warningMessages.size() * 30); |
468 | 0 | sb.append(MessageManager |
469 | .getString("label.your_sequences_have_been_verified")); | |
470 | 0 | for (String msg : warningMessages) |
471 | { | |
472 | 0 | sb.append(msg).append(NEWLINE); |
473 | } | |
474 | 0 | output.setText(sb.toString()); |
475 | ||
476 | 0 | Desktop.addInternalFrame(output, |
477 | MessageManager.getString("label.sequences_updated"), 600, | |
478 | 300); | |
479 | // The above is the dataset, we must now find out the index | |
480 | // of the viewed sequence | |
481 | ||
482 | } | |
483 | 0 | if (progressWindow != null) |
484 | { | |
485 | 0 | progressWindow.setProgressBar( |
486 | MessageManager.getString("label.dbref_search_completed"), | |
487 | startTime); | |
488 | } | |
489 | ||
490 | 0 | for (FetchFinishedListenerI listener : listeners) |
491 | { | |
492 | 0 | listener.finished(); |
493 | } | |
494 | } | |
495 | ||
496 | /** | |
497 | * Verify local sequences in seqRefs against the retrieved sequence database | |
498 | * records. Returns true if any sequence was modified as a result (start/end | |
499 | * changed and/or sequence enlarged), else false. | |
500 | * | |
501 | * @param sdataset | |
502 | * dataset sequences we are retrieving for | |
503 | * @param dbSource | |
504 | * database source we are retrieving from | |
505 | * @param retrievedAl | |
506 | * retrieved sequences as alignment | |
507 | * @param trimDatasetSeqs | |
508 | * if true, sequences will not be enlarged to match longer retrieved | |
509 | * sequences, only their start/end adjusted | |
510 | * @param warningMessages | |
511 | * a list of messages to add to | |
512 | */ | |
513 | 0 | boolean transferReferences(Vector<SequenceI> sdataset, |
514 | DbSourceProxy dbSourceProxy, AlignmentI retrievedAl, | |
515 | boolean trimDatasetSeqs, List<String> warningMessages) | |
516 | { | |
517 | // jalview.bin.Console.outPrintln("trimming ? " + trimDatasetSeqs); | |
518 | 0 | if (retrievedAl == null || retrievedAl.getHeight() == 0) |
519 | { | |
520 | 0 | return false; |
521 | } | |
522 | ||
523 | 0 | String dbSource = dbSourceProxy.getDbName(); |
524 | 0 | boolean modified = false; |
525 | 0 | SequenceI[] retrieved = recoverDbSequences( |
526 | retrievedAl.getSequencesArray()); | |
527 | 0 | SequenceI sequence = null; |
528 | ||
529 | 0 | for (SequenceI retrievedSeq : retrieved) |
530 | { | |
531 | // Work out which sequences this sequence matches, | |
532 | // taking into account all accessionIds and names in the file | |
533 | 0 | Vector<SequenceI> sequenceMatches = new Vector<>(); |
534 | // look for corresponding accession ids | |
535 | 0 | List<DBRefEntry> entryRefs = DBRefUtils |
536 | .selectRefs(retrievedSeq.getDBRefs(), new String[] | |
537 | { dbSource }); | |
538 | 0 | if (entryRefs == null) |
539 | { | |
540 | 0 | System.err |
541 | .println("Dud dbSource string ? no entryrefs selected for " | |
542 | + dbSource + " on " + retrievedSeq.getName()); | |
543 | 0 | continue; |
544 | } | |
545 | 0 | for (int j = 0, n = entryRefs.size(); j < n; j++) |
546 | { | |
547 | 0 | DBRefEntry ref = entryRefs.get(j); |
548 | 0 | String accessionId = ref.getAccessionId(); |
549 | // match up on accessionId | |
550 | 0 | if (seqRefs.containsKey(accessionId.toUpperCase(Locale.ROOT))) |
551 | { | |
552 | 0 | Vector<SequenceI> seqs = seqRefs.get(accessionId); |
553 | 0 | for (int jj = 0; jj < seqs.size(); jj++) |
554 | { | |
555 | 0 | sequence = seqs.elementAt(jj); |
556 | 0 | if (!sequenceMatches.contains(sequence)) |
557 | { | |
558 | 0 | sequenceMatches.addElement(sequence); |
559 | } | |
560 | } | |
561 | } | |
562 | } | |
563 | 0 | if (sequenceMatches.isEmpty()) |
564 | { | |
565 | // failed to match directly on accessionId==query so just compare all | |
566 | // sequences to entry | |
567 | 0 | Enumeration<String> e = seqRefs.keys(); |
568 | 0 | while (e.hasMoreElements()) |
569 | { | |
570 | 0 | Vector<SequenceI> sqs = seqRefs.get(e.nextElement()); |
571 | 0 | if (sqs != null && sqs.size() > 0) |
572 | { | |
573 | 0 | Enumeration<SequenceI> sqe = sqs.elements(); |
574 | 0 | while (sqe.hasMoreElements()) |
575 | { | |
576 | 0 | sequenceMatches.addElement(sqe.nextElement()); |
577 | } | |
578 | } | |
579 | } | |
580 | } | |
581 | // look for corresponding names | |
582 | // this is uniprot specific ? | |
583 | // could be useful to extend this so we try to find any 'significant' | |
584 | // information in common between two sequence objects. | |
585 | /* | |
586 | * List<DBRefEntry> entryRefs = | |
587 | * jalview.util.DBRefUtils.selectRefs(entry.getDBRef(), new String[] { | |
588 | * dbSource }); for (int j = 0; j < entry.getName().size(); j++) { String | |
589 | * name = entry.getName().elementAt(j).toString(); if | |
590 | * (seqRefs.containsKey(name)) { Vector seqs = (Vector) seqRefs.get(name); | |
591 | * for (int jj = 0; jj < seqs.size(); jj++) { sequence = (SequenceI) | |
592 | * seqs.elementAt(jj); if (!sequenceMatches.contains(sequence)) { | |
593 | * sequenceMatches.addElement(sequence); } } } } | |
594 | */ | |
595 | 0 | if (sequenceMatches.size() > 0) |
596 | { | |
597 | 0 | addFeatureSettings(dbSourceProxy); |
598 | } | |
599 | // sequenceMatches now contains the set of all sequences associated with | |
600 | // the returned db record | |
601 | 0 | final String retrievedSeqString = retrievedSeq.getSequenceAsString(); |
602 | 0 | String entrySeq = retrievedSeqString.toUpperCase(Locale.ROOT); |
603 | 0 | for (int m = 0; m < sequenceMatches.size(); m++) |
604 | { | |
605 | 0 | sequence = sequenceMatches.elementAt(m); |
606 | // only update start and end positions and shift features if there are | |
607 | // no existing references | |
608 | // TODO: test for legacy where uniprot or EMBL refs exist but no | |
609 | // mappings are made (but content matches retrieved set) | |
610 | 0 | boolean updateRefFrame = sequence.getDBRefs() == null |
611 | || sequence.getDBRefs().size() == 0; | |
612 | // TODO: | |
613 | // verify sequence against the entry sequence | |
614 | ||
615 | 0 | Mapping mp; |
616 | 0 | final int sequenceStart = sequence.getStart(); |
617 | ||
618 | 0 | boolean remoteEnclosesLocal = false; |
619 | 0 | String nonGapped = AlignSeq |
620 | .extractGaps("-. ", sequence.getSequenceAsString()) | |
621 | .toUpperCase(Locale.ROOT); | |
622 | 0 | int absStart = entrySeq.indexOf(nonGapped); |
623 | 0 | if (absStart == -1) |
624 | { | |
625 | // couldn't find local sequence in sequence from database, so check if | |
626 | // the database sequence is a subsequence of local sequence | |
627 | 0 | absStart = nonGapped.indexOf(entrySeq); |
628 | 0 | if (absStart == -1) |
629 | { | |
630 | // verification failed. couldn't find any relationship between | |
631 | // entrySeq and local sequence | |
632 | // messages suppressed as many-to-many matches are confusing | |
633 | // String msg = sequence.getName() | |
634 | // + " Sequence not 100% match with " | |
635 | // + retrievedSeq.getName(); | |
636 | // addWarningMessage(warningMessages, msg); | |
637 | 0 | continue; |
638 | } | |
639 | /* | |
640 | * retrieved sequence is a proper subsequence of local sequence | |
641 | */ | |
642 | 0 | String msg = sequence.getName() + " has " + absStart |
643 | + " prefixed residues compared to " | |
644 | + retrievedSeq.getName(); | |
645 | 0 | addWarningMessage(warningMessages, msg); |
646 | ||
647 | /* | |
648 | * So create a mapping to the external entry from the matching region of | |
649 | * the local sequence, and leave local start/end untouched. | |
650 | */ | |
651 | 0 | mp = new Mapping(null, |
652 | new int[] | |
653 | { sequenceStart + absStart, | |
654 | sequenceStart + absStart + entrySeq.length() - 1 }, | |
655 | new int[] | |
656 | { retrievedSeq.getStart(), | |
657 | retrievedSeq.getStart() + entrySeq.length() - 1 }, | |
658 | 1, 1); | |
659 | 0 | updateRefFrame = false; |
660 | } | |
661 | else | |
662 | { | |
663 | /* | |
664 | * local sequence is a subsequence of (or matches) retrieved sequence | |
665 | */ | |
666 | 0 | remoteEnclosesLocal = true; |
667 | 0 | mp = null; |
668 | ||
669 | 0 | if (updateRefFrame) |
670 | { | |
671 | /* | |
672 | * relocate existing sequence features by offset | |
673 | */ | |
674 | 0 | int startShift = absStart - sequenceStart + 1; |
675 | 0 | if (startShift != 0) |
676 | { | |
677 | 0 | modified |= sequence.getFeatures().shiftFeatures(1, |
678 | startShift); | |
679 | } | |
680 | } | |
681 | } | |
682 | ||
683 | 0 | jalview.bin.Console.outPrintln("Adding dbrefs to " |
684 | + sequence.getName() + " from " + dbSource + " sequence : " | |
685 | + retrievedSeq.getName()); | |
686 | 0 | sequence.transferAnnotation(retrievedSeq, mp); |
687 | ||
688 | 0 | absStart += retrievedSeq.getStart(); |
689 | 0 | int absEnd = absStart + nonGapped.length() - 1; |
690 | 0 | if (!trimDatasetSeqs) |
691 | { | |
692 | /* | |
693 | * update start position and/or expand to longer retrieved sequence | |
694 | */ | |
695 | 0 | if (!retrievedSeqString.equals(sequence.getSequenceAsString()) |
696 | && remoteEnclosesLocal) | |
697 | { | |
698 | 0 | sequence.setSequence(retrievedSeqString); |
699 | 0 | modified = true; |
700 | 0 | addWarningMessage(warningMessages, |
701 | "Sequence for " + sequence.getName() + " expanded from " | |
702 | + retrievedSeq.getName()); | |
703 | } | |
704 | 0 | if (sequence.getStart() != retrievedSeq.getStart()) |
705 | { | |
706 | 0 | sequence.setStart(retrievedSeq.getStart()); |
707 | 0 | modified = true; |
708 | 0 | if (absStart != sequenceStart) |
709 | { | |
710 | 0 | addWarningMessage(warningMessages, |
711 | "Start/end position for " + sequence.getName() | |
712 | + " updated from " + retrievedSeq.getName()); | |
713 | } | |
714 | } | |
715 | } | |
716 | 0 | if (updateRefFrame) |
717 | { | |
718 | // finally, update local sequence reference frame if we're allowed | |
719 | 0 | if (trimDatasetSeqs) |
720 | { | |
721 | // just fix start/end | |
722 | 0 | if (sequence.getStart() != absStart |
723 | || sequence.getEnd() != absEnd) | |
724 | { | |
725 | 0 | sequence.setStart(absStart); |
726 | 0 | sequence.setEnd(absEnd); |
727 | 0 | modified = true; |
728 | 0 | addWarningMessage(warningMessages, |
729 | "Start/end for " + sequence.getName() | |
730 | + " updated from " + retrievedSeq.getName()); | |
731 | } | |
732 | } | |
733 | // search for alignment sequences to update coordinate frame for | |
734 | 0 | for (int alsq = 0; alsq < alseqs.length; alsq++) |
735 | { | |
736 | 0 | if (alseqs[alsq].getDatasetSequence() == sequence) |
737 | { | |
738 | 0 | String ngAlsq = AlignSeq |
739 | .extractGaps("-. ", | |
740 | alseqs[alsq].getSequenceAsString()) | |
741 | .toUpperCase(Locale.ROOT); | |
742 | 0 | int oldstrt = alseqs[alsq].getStart(); |
743 | 0 | alseqs[alsq].setStart(sequence.getSequenceAsString() |
744 | .toUpperCase(Locale.ROOT).indexOf(ngAlsq) | |
745 | + sequence.getStart()); | |
746 | 0 | if (oldstrt != alseqs[alsq].getStart()) |
747 | { | |
748 | 0 | alseqs[alsq].setEnd( |
749 | ngAlsq.length() + alseqs[alsq].getStart() - 1); | |
750 | 0 | modified = true; |
751 | } | |
752 | } | |
753 | } | |
754 | // TODO: search for all other references to this dataset sequence, and | |
755 | // update start/end | |
756 | // TODO: update all AlCodonMappings which involve this alignment | |
757 | // sequence (e.g. Q30167 cdna translation from exon2 product (vamsas | |
758 | // demo) | |
759 | } | |
760 | // and remove it from the rest | |
761 | // TODO: decide if we should remove annotated sequence from set | |
762 | 0 | sdataset.remove(sequence); |
763 | } | |
764 | } | |
765 | 0 | return modified; |
766 | } | |
767 | ||
768 | Map<String, FeatureSettingsModelI> featureDisplaySettings = null; | |
769 | ||
770 | 0 | private void addFeatureSettings(DbSourceProxy dbSourceProxy) |
771 | { | |
772 | 0 | FeatureSettingsModelI fsettings = dbSourceProxy |
773 | .getFeatureColourScheme(); | |
774 | 0 | if (fsettings != null) |
775 | { | |
776 | 0 | if (featureDisplaySettings == null) |
777 | { | |
778 | 0 | featureDisplaySettings = new HashMap<>(); |
779 | } | |
780 | 0 | featureDisplaySettings.put(dbSourceProxy.getDbName(), fsettings); |
781 | } | |
782 | } | |
783 | ||
784 | /** | |
785 | * | |
786 | * @return any feature settings associated with sources that have provided | |
787 | * sequences | |
788 | */ | |
789 | 0 | public List<FeatureSettingsModelI> getFeatureSettingsModels() |
790 | { | |
791 | 0 | return featureDisplaySettings == null |
792 | ? Arrays.asList(new FeatureSettingsModelI[0]) | |
793 | : Arrays.asList(featureDisplaySettings.values() | |
794 | .toArray(new FeatureSettingsModelI[1])); | |
795 | } | |
796 | ||
797 | /** | |
798 | * Adds the message to the list unless it already contains it | |
799 | * | |
800 | * @param messageList | |
801 | * @param msg | |
802 | */ | |
803 | 0 | void addWarningMessage(List<String> messageList, String msg) |
804 | { | |
805 | 0 | if (!messageList.contains(msg)) |
806 | { | |
807 | 0 | messageList.add(msg); |
808 | } | |
809 | } | |
810 | ||
811 | /** | |
812 | * loop thru and collect additional sequences in Map. | |
813 | * | |
814 | * @param sequencesArray | |
815 | * @return | |
816 | */ | |
817 | 0 | private SequenceI[] recoverDbSequences(SequenceI[] sequencesArray) |
818 | { | |
819 | 0 | int n; |
820 | 0 | if (sequencesArray == null || (n = sequencesArray.length) == 0) |
821 | 0 | return sequencesArray; |
822 | 0 | ArrayList<SequenceI> nseq = new ArrayList<>(); |
823 | 0 | for (int i = 0; i < n; i++) |
824 | { | |
825 | 0 | nseq.add(sequencesArray[i]); |
826 | 0 | List<DBRefEntry> dbr = sequencesArray[i].getDBRefs(); |
827 | 0 | Mapping map = null; |
828 | 0 | if (dbr != null) |
829 | { | |
830 | 0 | for (int r = 0, rn = dbr.size(); r < rn; r++) |
831 | { | |
832 | 0 | if ((map = dbr.get(r).getMap()) != null) |
833 | { | |
834 | 0 | if (map.getTo() != null && !nseq.contains(map.getTo())) |
835 | { | |
836 | 0 | nseq.add(map.getTo()); |
837 | } | |
838 | } | |
839 | } | |
840 | } | |
841 | } | |
842 | // BH 2019.01.25 question here if this is the right logic. Return the | |
843 | // original if nothing found? | |
844 | 0 | if (nseq.size() > 0) |
845 | { | |
846 | 0 | return nseq.toArray(new SequenceI[nseq.size()]); |
847 | } | |
848 | 0 | return sequencesArray; |
849 | } | |
850 | } |