Class |
Line # |
Actions |
|||
---|---|---|---|---|---|
GroupUrlLink | 28 | 252 | 109 | ||
GroupUrlLink.UrlStringTooLongException | 30 | 2 | 2 |
1 | /* | |
2 | * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) | |
3 | * Copyright (C) $$Year-Rel$$ The Jalview Authors | |
4 | * | |
5 | * This file is part of Jalview. | |
6 | * | |
7 | * Jalview is free software: you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation, either version 3 | |
10 | * of the License, or (at your option) any later version. | |
11 | * | |
12 | * Jalview is distributed in the hope that it will be useful, but | |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty | |
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR | |
15 | * PURPOSE. See the GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with Jalview. If not, see <http://www.gnu.org/licenses/>. | |
19 | * The Jalview Authors are detailed in the 'AUTHORS' file. | |
20 | */ | |
21 | package jalview.util; | |
22 | ||
23 | import jalview.datamodel.Sequence; | |
24 | import jalview.datamodel.SequenceI; | |
25 | ||
26 | import java.util.Hashtable; | |
27 | ||
28 | public class GroupUrlLink | |
29 | { | |
30 | public class UrlStringTooLongException extends Exception | |
31 | { | |
32 | 0 | public UrlStringTooLongException(int lng) |
33 | { | |
34 | 0 | urlLength = lng; |
35 | } | |
36 | ||
37 | public int urlLength; | |
38 | ||
39 | 0 | @Override |
40 | public String toString() | |
41 | { | |
42 | 0 | return "Generated url is estimated to be too long (" + urlLength |
43 | + ")"; | |
44 | } | |
45 | } | |
46 | ||
47 | /** | |
48 | * Helper class based on the UrlLink class which enables URLs to be | |
49 | * constructed from sequences or IDs associated with a group of sequences. URL | |
50 | * definitions consist of a pipe separated string containing a <label>|<url | |
51 | * construct>|<separator character>[|<sequence separator character>]. The url | |
52 | * construct includes regex qualified tokens which are replaced with seuqence | |
53 | * IDs ($SEQUENCE_IDS$) and/or seuqence regions ($SEQUENCES$) that are | |
54 | * extracted from the group. See <code>UrlLink</code> for more information | |
55 | * about the approach, and the original implementation. Documentation to come. | |
56 | * Note - groupUrls can be very big! | |
57 | */ | |
58 | private String url_prefix, target, label; | |
59 | ||
60 | /** | |
61 | * these are all filled in order of the occurence of each token in the url | |
62 | * string template | |
63 | */ | |
64 | private String url_suffix[], separators[], regexReplace[]; | |
65 | ||
66 | private String invalidMessage = null; | |
67 | ||
68 | /** | |
69 | * tokens that can be replaced in the URL. | |
70 | */ | |
71 | private static String[] tokens; | |
72 | ||
73 | /** | |
74 | * position of each token (which can appear once only) in the url | |
75 | */ | |
76 | private int[] segs; | |
77 | ||
78 | /** | |
79 | * contains tokens in the order they appear in the URL template. | |
80 | */ | |
81 | private String[] mtch; | |
82 | 0 | static |
83 | { | |
84 | 0 | if (tokens == null) |
85 | { | |
86 | 0 | tokens = new String[] { "SEQUENCEIDS", "SEQUENCES", "DATASETID" }; |
87 | } | |
88 | } | |
89 | ||
90 | /** | |
91 | * test for GroupURLType bitfield (with default tokens) | |
92 | */ | |
93 | public static final int SEQUENCEIDS = 1; | |
94 | ||
95 | /** | |
96 | * test for GroupURLType bitfield (with default tokens) | |
97 | */ | |
98 | public static final int SEQUENCES = 2; | |
99 | ||
100 | /** | |
101 | * test for GroupURLType bitfield (with default tokens) | |
102 | */ | |
103 | public static final int DATASETID = 4; | |
104 | ||
105 | // private int idseg = -1, seqseg = -1; | |
106 | ||
107 | /** | |
108 | * parse the given linkString of the form '<label>|<url>|separator | |
109 | * char[|optional sequence separator char]' into parts. url may contain a | |
110 | * string $SEQUENCEIDS<=optional regex=>$ where <=optional regex=> must be of | |
111 | * the form =/<perl style regex>/=$ or $SEQUENCES<=optional regex=>$ or | |
112 | * $SEQUENCES<=optional regex=>$. | |
113 | * | |
114 | * @param link | |
115 | */ | |
116 | 0 | public GroupUrlLink(String link) |
117 | { | |
118 | 0 | int sep = link.indexOf("|"); |
119 | 0 | segs = new int[tokens.length]; |
120 | 0 | int ntoks = 0; |
121 | 0 | for (int i = 0; i < segs.length; i++) |
122 | { | |
123 | 0 | if ((segs[i] = link.indexOf("$" + tokens[i])) > -1) |
124 | { | |
125 | 0 | ntoks++; |
126 | } | |
127 | } | |
128 | // expect at least one token | |
129 | 0 | if (ntoks == 0) |
130 | { | |
131 | 0 | invalidMessage = "Group URL string must contain at least one of "; |
132 | 0 | for (int i = 0; i < segs.length; i++) |
133 | { | |
134 | 0 | invalidMessage += " '$" + tokens[i] + "[=/regex=/]$'"; |
135 | } | |
136 | 0 | return; |
137 | } | |
138 | ||
139 | 0 | int[] ptok = new int[ntoks + 1]; |
140 | 0 | String[] tmtch = new String[ntoks + 1]; |
141 | 0 | mtch = new String[ntoks]; |
142 | 0 | for (int i = 0, t = 0; i < segs.length; i++) |
143 | { | |
144 | 0 | if (segs[i] > -1) |
145 | { | |
146 | 0 | ptok[t] = segs[i]; |
147 | 0 | tmtch[t++] = tokens[i]; |
148 | } | |
149 | } | |
150 | 0 | ptok[ntoks] = link.length(); |
151 | 0 | tmtch[ntoks] = "$$$$$$$$$"; |
152 | 0 | jalview.util.QuickSort.sort(ptok, tmtch); |
153 | 0 | for (int i = 0; i < ntoks; i++) |
154 | { | |
155 | 0 | mtch[i] = tmtch[i]; // TODO: check order is ascending |
156 | } | |
157 | /* | |
158 | * replaces the specific code below {}; if (psqids > -1 && pseqs > -1) { if | |
159 | * (psqids > pseqs) { idseg = 1; seqseg = 0; | |
160 | * | |
161 | * ptok = new int[] { pseqs, psqids, link.length() }; mtch = new String[] { | |
162 | * "$SEQUENCES", "$SEQUENCEIDS" }; } else { idseg = 0; seqseg = 1; ptok = | |
163 | * new int[] { psqids, pseqs, link.length() }; mtch = new String[] { | |
164 | * "$SEQUENCEIDS", "$SEQUENCES" }; } } else { if (psqids != -1) { idseg = 0; | |
165 | * ptok = new int[] { psqids, link.length() }; mtch = new String[] { | |
166 | * "$SEQUENCEIDS" }; } else { seqseg = 0; ptok = new int[] { pseqs, | |
167 | * link.length() }; mtch = new String[] { "$SEQUENCES" }; } } | |
168 | */ | |
169 | ||
170 | 0 | int p = sep; |
171 | // first get the label and target part before the first | | |
172 | 0 | do |
173 | { | |
174 | 0 | sep = p; |
175 | 0 | p = link.indexOf("|", sep + 1); |
176 | 0 | } while (p > sep && p < ptok[0]); |
177 | // Assuming that the URL itself does not contain any '|' symbols | |
178 | // sep now contains last pipe symbol position prior to any regex symbols | |
179 | 0 | label = link.substring(0, sep); |
180 | 0 | if (label.indexOf("|") > -1) |
181 | { | |
182 | // | terminated database name / www target at start of Label | |
183 | 0 | target = label.substring(0, label.indexOf("|")); |
184 | } | |
185 | 0 | else if (label.indexOf(" ") > 2) |
186 | { | |
187 | // space separated Label - matches database name | |
188 | 0 | target = label.substring(0, label.indexOf(" ")); |
189 | } | |
190 | else | |
191 | { | |
192 | 0 | target = label; |
193 | } | |
194 | // Now Parse URL : Whole URL string first | |
195 | 0 | url_prefix = link.substring(sep + 1, ptok[0]); |
196 | 0 | url_suffix = new String[mtch.length]; |
197 | 0 | regexReplace = new String[mtch.length]; |
198 | // and loop through tokens | |
199 | 0 | for (int pass = 0; pass < mtch.length; pass++) |
200 | { | |
201 | 0 | int mlength = 3 + mtch[pass].length(); |
202 | 0 | if (link.indexOf("$" + mtch[pass] + "=/") == ptok[pass] && (p = link |
203 | .indexOf("/=$", ptok[pass] + mlength)) > ptok[pass] + mlength) | |
204 | { | |
205 | // Extract Regex and suffix | |
206 | 0 | if (ptok[pass + 1] < p + 3) |
207 | { | |
208 | // tokens are not allowed inside other tokens - e.g. inserting a | |
209 | // $sequences$ into the regex match for the sequenceid | |
210 | 0 | invalidMessage = "Token regexes cannot contain other regexes (did you terminate the $" |
211 | + mtch[pass] + " regex with a '/=$' ?"; | |
212 | 0 | return; |
213 | } | |
214 | 0 | url_suffix[pass] = link.substring(p + 3, ptok[pass + 1]); |
215 | 0 | regexReplace[pass] = link.substring(ptok[pass] + mlength, p); |
216 | 0 | try |
217 | { | |
218 | 0 | com.stevesoft.pat.Regex rg = com.stevesoft.pat.Regex |
219 | .perlCode("/" + regexReplace[pass] + "/"); | |
220 | 0 | if (rg == null) |
221 | { | |
222 | 0 | invalidMessage = "Invalid Regular Expression : '" |
223 | + regexReplace[pass] + "'\n"; | |
224 | } | |
225 | } catch (Exception e) | |
226 | { | |
227 | 0 | invalidMessage = "Invalid Regular Expression : '" |
228 | + regexReplace[pass] + "'\n"; | |
229 | } | |
230 | } | |
231 | else | |
232 | { | |
233 | 0 | regexReplace[pass] = null; |
234 | // verify format is really correct. | |
235 | 0 | if ((p = link.indexOf("$" + mtch[pass] + "$")) == ptok[pass]) |
236 | { | |
237 | 0 | url_suffix[pass] = link.substring(p + mtch[pass].length() + 2, |
238 | ptok[pass + 1]); | |
239 | } | |
240 | else | |
241 | { | |
242 | 0 | invalidMessage = "Warning: invalid regex structure (after '" |
243 | + mtch[0] + "') for URL link : " + link; | |
244 | } | |
245 | } | |
246 | } | |
247 | 0 | int pass = 0; |
248 | 0 | separators = new String[url_suffix.length]; |
249 | 0 | String suffices = url_suffix[url_suffix.length - 1], lastsep = ","; |
250 | // have a look in the last suffix for any more separators. | |
251 | 0 | while ((p = suffices.indexOf('|')) > -1) |
252 | { | |
253 | 0 | separators[pass] = suffices.substring(p + 1); |
254 | 0 | if (pass == 0) |
255 | { | |
256 | // trim the original suffix string | |
257 | 0 | url_suffix[url_suffix.length - 1] = suffices.substring(0, p); |
258 | } | |
259 | else | |
260 | { | |
261 | 0 | lastsep = (separators[pass - 1] = separators[pass - 1].substring(0, |
262 | p)); | |
263 | } | |
264 | 0 | suffices = separators[pass]; |
265 | 0 | pass++; |
266 | } | |
267 | 0 | if (pass > 0) |
268 | { | |
269 | 0 | lastsep = separators[pass - 1]; |
270 | } | |
271 | // last separator is always used for all the remaining separators | |
272 | 0 | while (pass < separators.length) |
273 | { | |
274 | 0 | separators[pass++] = lastsep; |
275 | } | |
276 | } | |
277 | ||
278 | /** | |
279 | * @return the url_suffix | |
280 | */ | |
281 | 0 | public String getUrl_suffix() |
282 | { | |
283 | 0 | return url_suffix[url_suffix.length - 1]; |
284 | } | |
285 | ||
286 | /** | |
287 | * @return the url_prefix | |
288 | */ | |
289 | 0 | public String getUrl_prefix() |
290 | { | |
291 | 0 | return url_prefix; |
292 | } | |
293 | ||
294 | /** | |
295 | * @return the target | |
296 | */ | |
297 | 0 | public String getTarget() |
298 | { | |
299 | 0 | return target; |
300 | } | |
301 | ||
302 | /** | |
303 | * @return the label | |
304 | */ | |
305 | 0 | public String getLabel() |
306 | { | |
307 | 0 | return label; |
308 | } | |
309 | ||
310 | /** | |
311 | * @return the sequence ID regexReplace | |
312 | */ | |
313 | 0 | public String getIDRegexReplace() |
314 | { | |
315 | 0 | return _replaceFor(tokens[0]); |
316 | } | |
317 | ||
318 | 0 | private String _replaceFor(String token) |
319 | { | |
320 | 0 | for (int i = 0; i < mtch.length; i++) |
321 | { | |
322 | 0 | if (segs[i] > -1 && mtch[i].equals(token)) |
323 | { | |
324 | 0 | return regexReplace[i]; |
325 | } | |
326 | } | |
327 | 0 | return null; |
328 | } | |
329 | ||
330 | /** | |
331 | * @return the sequence ID regexReplace | |
332 | */ | |
333 | 0 | public String getSeqRegexReplace() |
334 | { | |
335 | 0 | return _replaceFor(tokens[1]); |
336 | } | |
337 | ||
338 | /** | |
339 | * @return the invalidMessage | |
340 | */ | |
341 | 0 | public String getInvalidMessage() |
342 | { | |
343 | 0 | return invalidMessage; |
344 | } | |
345 | ||
346 | /** | |
347 | * Check if URL string was parsed properly. | |
348 | * | |
349 | * @return boolean - if false then <code>getInvalidMessage</code> returns an | |
350 | * error message | |
351 | */ | |
352 | 0 | public boolean isValid() |
353 | { | |
354 | 0 | return invalidMessage == null; |
355 | } | |
356 | ||
357 | /** | |
358 | * return one or more URL strings by applying regex to the given idstring | |
359 | * | |
360 | * @param idstrings | |
361 | * array of id strings to pass to service | |
362 | * @param seqstrings | |
363 | * array of seq strings to pass to service | |
364 | * @param onlyIfMatches | |
365 | * - when true url strings are only made if regex is defined and | |
366 | * matches for all qualified tokens in groupURL - TODO: consider if | |
367 | * onlyIfMatches is really a useful parameter! | |
368 | * @return null or Object[] { int[] { number of seqs substituted},boolean[] { | |
369 | * which seqs were substituted }, StringBuffer[] { substituted lists | |
370 | * for each token }, String[] { url } } | |
371 | * @throws UrlStringTooLongException | |
372 | */ | |
373 | 0 | public Object[] makeUrls(String[] idstrings, String[] seqstrings, |
374 | String dsstring, boolean onlyIfMatches) | |
375 | throws UrlStringTooLongException | |
376 | { | |
377 | 0 | Hashtable rstrings = replacementArgs(idstrings, seqstrings, dsstring); |
378 | 0 | return makeUrls(rstrings, onlyIfMatches); |
379 | } | |
380 | ||
381 | /** | |
382 | * gathers input into a hashtable | |
383 | * | |
384 | * @param idstrings | |
385 | * @param seqstrings | |
386 | * @param dsstring | |
387 | * @return | |
388 | */ | |
389 | 0 | private Hashtable replacementArgs(String[] idstrings, String[] seqstrings, |
390 | String dsstring) | |
391 | { | |
392 | 0 | Hashtable rstrings = new Hashtable(); |
393 | 0 | rstrings.put(tokens[0], idstrings); |
394 | 0 | rstrings.put(tokens[1], seqstrings); |
395 | 0 | rstrings.put(tokens[2], new String[] { dsstring }); |
396 | 0 | if (idstrings.length != seqstrings.length) |
397 | { | |
398 | 0 | throw new Error(MessageManager.getString( |
399 | "error.idstring_seqstrings_only_one_per_sequence")); | |
400 | } | |
401 | 0 | return rstrings; |
402 | } | |
403 | ||
404 | 0 | public Object[] makeUrls(Hashtable repstrings, boolean onlyIfMatches) |
405 | throws UrlStringTooLongException | |
406 | { | |
407 | 0 | return makeUrlsIf(true, repstrings, onlyIfMatches); |
408 | } | |
409 | ||
410 | /** | |
411 | * | |
412 | * @param ids | |
413 | * @param seqstr | |
414 | * @param string | |
415 | * @param b | |
416 | * @return URL stub objects ready to pass to constructFrom | |
417 | * @throws UrlStringTooLongException | |
418 | */ | |
419 | 0 | public Object[] makeUrlStubs(String[] ids, String[] seqstr, String string, |
420 | boolean b) throws UrlStringTooLongException | |
421 | { | |
422 | 0 | Hashtable rstrings = replacementArgs(ids, seqstr, string); |
423 | 0 | Object[] stubs = makeUrlsIf(false, rstrings, b); |
424 | 0 | if (stubs != null) |
425 | { | |
426 | 0 | return new Object[] { stubs[0], stubs[1], rstrings, |
427 | new boolean[] | |
428 | { b } }; | |
429 | } | |
430 | // TODO Auto-generated method stub | |
431 | 0 | return null; |
432 | } | |
433 | ||
434 | /** | |
435 | * generate the URL for the given URL stub object array returned from | |
436 | * makeUrlStubs | |
437 | * | |
438 | * @param stubs | |
439 | * @return URL string. | |
440 | * @throws UrlStringTooLongException | |
441 | */ | |
442 | 0 | public String constructFrom(Object[] stubs) |
443 | throws UrlStringTooLongException | |
444 | { | |
445 | 0 | Object[] results = makeUrlsIf(true, (Hashtable) stubs[2], |
446 | ((boolean[]) stubs[3])[0]); | |
447 | 0 | return ((String[]) results[3])[0]; |
448 | } | |
449 | ||
450 | /** | |
451 | * conditionally generate urls or stubs for a given input. | |
452 | * | |
453 | * @param createFullUrl | |
454 | * set to false if you only want to test if URLs would be generated. | |
455 | * @param repstrings | |
456 | * @param onlyIfMatches | |
457 | * @return null if no url is generated. Object[] { int[] { number of matches | |
458 | * seqs }, boolean[] { which matched }, (if createFullUrl also has | |
459 | * StringBuffer[] { segment generated from inputs that is used in URL | |
460 | * }, String[] { url })} | |
461 | * @throws UrlStringTooLongException | |
462 | */ | |
463 | 0 | protected Object[] makeUrlsIf(boolean createFullUrl, Hashtable repstrings, |
464 | boolean onlyIfMatches) throws UrlStringTooLongException | |
465 | { | |
466 | 0 | int pass = 0; |
467 | ||
468 | // prepare string arrays in correct order to be assembled into URL input | |
469 | 0 | String[][] idseq = new String[mtch.length][]; // indexed by pass |
470 | 0 | int mins = 0, maxs = 0; // allowed two values, 1 or n-sequences. |
471 | 0 | for (int i = 0; i < mtch.length; i++) |
472 | { | |
473 | 0 | idseq[i] = (String[]) repstrings.get(mtch[i]); |
474 | 0 | if (idseq[i].length >= 1) |
475 | { | |
476 | 0 | if (mins == 0 && idseq[i].length == 1) |
477 | { | |
478 | 0 | mins = 1; |
479 | } | |
480 | 0 | if (maxs < 2) |
481 | { | |
482 | 0 | maxs = idseq[i].length; |
483 | } | |
484 | else | |
485 | { | |
486 | 0 | if (maxs != idseq[i].length) |
487 | { | |
488 | 0 | throw new Error(MessageManager.formatMessage( |
489 | "error.cannot_have_mixed_length_replacement_vectors", | |
490 | new String[] | |
491 | { (mtch[i]), | |
492 | Integer.valueOf(idseq[i].length).toString(), | |
493 | Integer.valueOf(maxs).toString() })); | |
494 | } | |
495 | } | |
496 | } | |
497 | else | |
498 | { | |
499 | 0 | throw new Error(MessageManager.getString( |
500 | "error.cannot_have_zero_length_vector_replacement_strings")); | |
501 | } | |
502 | } | |
503 | // iterate through input, collating segments to be inserted into url | |
504 | 0 | StringBuffer matched[] = new StringBuffer[idseq.length]; |
505 | // and precompile regexes | |
506 | 0 | com.stevesoft.pat.Regex[] rgxs = new com.stevesoft.pat.Regex[matched.length]; |
507 | 0 | for (pass = 0; pass < matched.length; pass++) |
508 | { | |
509 | 0 | matched[pass] = new StringBuffer(); |
510 | 0 | if (regexReplace[pass] != null) |
511 | { | |
512 | 0 | rgxs[pass] = com.stevesoft.pat.Regex |
513 | .perlCode("/" + regexReplace[pass] + "/"); | |
514 | } | |
515 | else | |
516 | { | |
517 | 0 | rgxs[pass] = null; |
518 | } | |
519 | } | |
520 | // tot up the invariant lengths for this url | |
521 | 0 | int urllength = url_prefix.length(); |
522 | 0 | for (pass = 0; pass < matched.length; pass++) |
523 | { | |
524 | 0 | urllength += url_suffix[pass].length(); |
525 | } | |
526 | ||
527 | // flags to record which of the input sequences were actually used to | |
528 | // generate the | |
529 | // url | |
530 | 0 | boolean[] thismatched = new boolean[maxs]; |
531 | 0 | int seqsmatched = 0; |
532 | 0 | for (int sq = 0; sq < maxs; sq++) |
533 | { | |
534 | // initialise flag for match | |
535 | 0 | thismatched[sq] = false; |
536 | 0 | StringBuffer[] thematches = new StringBuffer[rgxs.length]; |
537 | 0 | for (pass = 0; pass < rgxs.length; pass++) |
538 | { | |
539 | 0 | thematches[pass] = new StringBuffer(); // initialise - in case there are |
540 | // no more | |
541 | // matches. | |
542 | // if a regex is provided, then it must match for all sequences in all | |
543 | // tokens for it to be considered. | |
544 | 0 | if (idseq[pass].length <= sq) |
545 | { | |
546 | // no more replacement strings to try for this token | |
547 | 0 | continue; |
548 | } | |
549 | 0 | if (rgxs[pass] != null) |
550 | { | |
551 | 0 | com.stevesoft.pat.Regex rg = rgxs[pass]; |
552 | 0 | int rematchat = 0; |
553 | // concatenate all matches of re in the given string! | |
554 | 0 | while (rg.searchFrom(idseq[pass][sq], rematchat)) |
555 | { | |
556 | 0 | rematchat = rg.matchedTo(); |
557 | 0 | thismatched[sq] |= true; |
558 | 0 | urllength += rg.charsMatched(); // count length |
559 | 0 | if ((urllength + 32) > Platform.getMaxCommandLineLength()) |
560 | { | |
561 | 0 | throw new UrlStringTooLongException(urllength); |
562 | } | |
563 | ||
564 | 0 | if (!createFullUrl) |
565 | { | |
566 | 0 | continue; // don't bother making the URL replacement text. |
567 | } | |
568 | // do we take the cartesian products of the substituents ? | |
569 | 0 | int ns = rg.numSubs(); |
570 | 0 | if (ns == 0) |
571 | { | |
572 | 0 | thematches[pass].append(rg.stringMatched());// take whole regex |
573 | } | |
574 | /* | |
575 | * else if (ns==1) { // take only subgroup match return new String[] | |
576 | * { rg.stringMatched(1), url_prefix+rg.stringMatched(1)+url_suffix | |
577 | * }; } | |
578 | */ | |
579 | // deal with multiple submatch case - for moment we do the simplest | |
580 | // - concatenate the matched regions, instead of creating a complete | |
581 | // list for each alternate match over all sequences. | |
582 | // TODO: specify a 'replace pattern' - next refinement | |
583 | else | |
584 | { | |
585 | // debug | |
586 | /* | |
587 | * for (int s = 0; s <= rg.numSubs(); s++) { | |
588 | * jalview.bin.Console.errPrintln("Sub " + s + " : " + rg.matchedFrom(s) + | |
589 | * " : " + rg.matchedTo(s) + " : '" + rg.stringMatched(s) + "'"); | |
590 | * } | |
591 | */ | |
592 | // try to collate subgroup matches | |
593 | 0 | StringBuffer subs = new StringBuffer(); |
594 | // have to loop through submatches, collating them at top level | |
595 | // match | |
596 | 0 | int s = 0; // 1; |
597 | 0 | while (s <= ns) |
598 | { | |
599 | 0 | if (s + 1 <= ns && rg.matchedTo(s) > -1 |
600 | && rg.matchedTo(s + 1) > -1 | |
601 | && rg.matchedTo(s + 1) < rg.matchedTo(s)) | |
602 | { | |
603 | // s is top level submatch. search for submatches enclosed by | |
604 | // this one | |
605 | 0 | int r = s + 1; |
606 | 0 | StringBuffer rmtch = new StringBuffer(); |
607 | 0 | while (r <= ns && rg.matchedTo(r) <= rg.matchedTo(s)) |
608 | { | |
609 | 0 | if (rg.matchedFrom(r) > -1) |
610 | { | |
611 | 0 | rmtch.append(rg.stringMatched(r)); |
612 | } | |
613 | 0 | r++; |
614 | } | |
615 | 0 | if (rmtch.length() > 0) |
616 | { | |
617 | 0 | subs.append(rmtch); // simply concatenate |
618 | } | |
619 | 0 | s = r; |
620 | } | |
621 | else | |
622 | { | |
623 | 0 | if (rg.matchedFrom(s) > -1) |
624 | { | |
625 | 0 | subs.append(rg.stringMatched(s)); // concatenate |
626 | } | |
627 | 0 | s++; |
628 | } | |
629 | } | |
630 | 0 | thematches[pass].append(subs); |
631 | } | |
632 | } | |
633 | } | |
634 | else | |
635 | { | |
636 | // are we only supposed to take regex matches ? | |
637 | 0 | if (!onlyIfMatches) |
638 | { | |
639 | 0 | thismatched[sq] |= true; |
640 | 0 | urllength += idseq[pass][sq].length(); // tot up length |
641 | 0 | if (createFullUrl) |
642 | { | |
643 | 0 | thematches[pass] = new StringBuffer(idseq[pass][sq]); // take |
644 | // whole | |
645 | // string - | |
646 | // regardless - probably not a | |
647 | // good idea! | |
648 | /* | |
649 | * TODO: do some boilerplate trimming of the fields to make them | |
650 | * sensible e.g. trim off any 'prefix' in the id string (see | |
651 | * UrlLink for the below) - pre 2.4 Jalview behaviour if | |
652 | * (idstring.indexOf("|") > -1) { idstring = | |
653 | * idstring.substring(idstring.lastIndexOf("|") + 1); } | |
654 | */ | |
655 | } | |
656 | ||
657 | } | |
658 | } | |
659 | } | |
660 | ||
661 | // check if we are going to add this sequence's results ? all token | |
662 | // replacements must be valid for this to happen! | |
663 | // (including single value replacements - eg. dataset name) | |
664 | 0 | if (thismatched[sq]) |
665 | { | |
666 | 0 | if (createFullUrl) |
667 | { | |
668 | 0 | for (pass = 0; pass < matched.length; pass++) |
669 | { | |
670 | 0 | if (idseq[pass].length > 1 && matched[pass].length() > 0) |
671 | { | |
672 | 0 | matched[pass].append(separators[pass]); |
673 | } | |
674 | 0 | matched[pass].append(thematches[pass]); |
675 | } | |
676 | } | |
677 | 0 | seqsmatched++; |
678 | } | |
679 | } | |
680 | // finally, if any sequences matched, then form the URL and return | |
681 | 0 | if (seqsmatched == 0 || (createFullUrl && matched[0].length() == 0)) |
682 | { | |
683 | // no matches - no url generated | |
684 | 0 | return null; |
685 | } | |
686 | // check if we are beyond the feasible command line string limit for this | |
687 | // platform | |
688 | 0 | if ((urllength + 32) > Platform.getMaxCommandLineLength()) |
689 | { | |
690 | 0 | throw new UrlStringTooLongException(urllength); |
691 | } | |
692 | 0 | if (!createFullUrl) |
693 | { | |
694 | // just return the essential info about what the URL would be generated | |
695 | // from | |
696 | 0 | return new Object[] { new int[] { seqsmatched }, thismatched }; |
697 | } | |
698 | // otherwise, create the URL completely. | |
699 | ||
700 | 0 | StringBuffer submiturl = new StringBuffer(); |
701 | 0 | submiturl.append(url_prefix); |
702 | 0 | for (pass = 0; pass < matched.length; pass++) |
703 | { | |
704 | 0 | submiturl.append(matched[pass]); |
705 | 0 | if (url_suffix[pass] != null) |
706 | { | |
707 | 0 | submiturl.append(url_suffix[pass]); |
708 | } | |
709 | } | |
710 | ||
711 | 0 | return new Object[] { new int[] { seqsmatched }, thismatched, matched, |
712 | new String[] | |
713 | { submiturl.toString() } }; | |
714 | } | |
715 | ||
716 | /** | |
717 | * | |
718 | * @param urlstub | |
719 | * @return number of distinct sequence (id or seuqence) replacements predicted | |
720 | * for this stub | |
721 | */ | |
722 | 0 | public int getNumberInvolved(Object[] urlstub) |
723 | { | |
724 | 0 | return ((int[]) urlstub[0])[0]; // returns seqsmatched from |
725 | // makeUrlsIf(false,...) | |
726 | } | |
727 | ||
728 | /** | |
729 | * get token types present in this url as a bitfield indicating presence of | |
730 | * each token from tokens (LSB->MSB). | |
731 | * | |
732 | * @return groupURL class as integer | |
733 | */ | |
734 | 0 | public int getGroupURLType() |
735 | { | |
736 | 0 | int r = 0; |
737 | 0 | for (int pass = 0; pass < tokens.length; pass++) |
738 | { | |
739 | 0 | for (int i = 0; i < mtch.length; i++) |
740 | { | |
741 | 0 | if (mtch[i].equals(tokens[pass])) |
742 | { | |
743 | 0 | r += 1 << pass; |
744 | } | |
745 | } | |
746 | } | |
747 | 0 | return r; |
748 | } | |
749 | ||
750 | 0 | @Override |
751 | public String toString() | |
752 | { | |
753 | 0 | StringBuffer result = new StringBuffer(); |
754 | 0 | result.append(label + "|" + url_prefix); |
755 | 0 | int r; |
756 | 0 | for (r = 0; r < url_suffix.length; r++) |
757 | { | |
758 | 0 | result.append("$"); |
759 | 0 | result.append(mtch[r]); |
760 | 0 | if (regexReplace[r] != null) |
761 | { | |
762 | 0 | result.append("=/"); |
763 | 0 | result.append(regexReplace[r]); |
764 | 0 | result.append("/="); |
765 | } | |
766 | 0 | result.append("$"); |
767 | 0 | result.append(url_suffix[r]); |
768 | } | |
769 | 0 | for (r = 0; r < separators.length; r++) |
770 | { | |
771 | 0 | result.append("|"); |
772 | 0 | result.append(separators[r]); |
773 | } | |
774 | 0 | return result.toString(); |
775 | } | |
776 | ||
777 | /** | |
778 | * report stats about the generated url string given an input set | |
779 | * | |
780 | * @param ul | |
781 | * @param idstring | |
782 | * @param url | |
783 | */ | |
784 | 0 | private static void testUrls(GroupUrlLink ul, String[][] idstring, |
785 | Object[] url) | |
786 | { | |
787 | ||
788 | 0 | if (url == null) |
789 | { | |
790 | 0 | jalview.bin.Console.outPrintln("Created NO urls."); |
791 | } | |
792 | else | |
793 | { | |
794 | 0 | jalview.bin.Console |
795 | .outPrintln("Created a url from " + ((int[]) url[0])[0] | |
796 | + "out of " + idstring[0].length + " sequences."); | |
797 | 0 | jalview.bin.Console.outPrintln("Sequences that did not match:"); |
798 | 0 | for (int sq = 0; sq < idstring[0].length; sq++) |
799 | { | |
800 | 0 | if (!((boolean[]) url[1])[sq]) |
801 | { | |
802 | 0 | jalview.bin.Console.outPrintln("Seq " + sq + ": " |
803 | + idstring[0][sq] + "\t: " + idstring[1][sq]); | |
804 | } | |
805 | } | |
806 | 0 | jalview.bin.Console.outPrintln("Sequences that DID match:"); |
807 | 0 | for (int sq = 0; sq < idstring[0].length; sq++) |
808 | { | |
809 | 0 | if (((boolean[]) url[1])[sq]) |
810 | { | |
811 | 0 | jalview.bin.Console.outPrintln("Seq " + sq + ": " |
812 | + idstring[0][sq] + "\t: " + idstring[1][sq]); | |
813 | } | |
814 | } | |
815 | 0 | jalview.bin.Console.outPrintln("The generated URL:"); |
816 | 0 | jalview.bin.Console.outPrintln(((String[]) url[3])[0]); |
817 | } | |
818 | } | |
819 | ||
820 | /** | |
821 | * | |
822 | * @param argv | |
823 | * @j2sIgnore | |
824 | */ | |
825 | 0 | public static void main(String argv[]) |
826 | { | |
827 | // note - JAL-1383 - these services are all dead | |
828 | 0 | String[] links = new String[] { |
829 | "EnVision2|IDS|http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?workflow=Enfin%20Default%20Workflow&datasetName=linkInDatasetFromJalview&input=$SEQUENCEIDS$&inputType=0|,", | |
830 | "EnVision2|Seqs|http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?workflow=Enfin%20Default%20Workflow&datasetName=linkInDatasetFromJalview&input=$SEQUENCES$&inputType=1|,", | |
831 | "EnVision2|IDS|http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?workflow=Enfin%20Default%20Workflow&datasetName=$DATASETID$&input=$SEQUENCEIDS$&inputType=0|,", | |
832 | "EnVision2|Seqs|http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?workflow=Enfin%20Default%20Workflow&datasetName=$DATASETID$&input=$SEQUENCES$&inputType=1|,", | |
833 | "EnVision2|IDS|http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?workflow=$SEQUENCEIDS$&datasetName=linkInDatasetFromJalview&input=$SEQUENCEIDS$&inputType=0|,", | |
834 | "EnVision2|Seqs|http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?workflow=$SEQUENCEIDS$&datasetName=$DATASETID$&input=$SEQUENCES$&inputType=1|,", | |
835 | "EnVision2 Seqs|http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?workflow=Default&datasetName=JalviewSeqs$DATASETID$&input=$SEQUENCES=/([a-zA-Z]+)/=$&inputType=1|,", | |
836 | "EnVision2 Seqs|http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?workflow=Default&datasetName=JalviewSeqs$DATASETID$&input=$SEQUENCES=/[A-Za-z]+/=$&inputType=1|," | |
837 | /* | |
838 | * http://www.ebi.ac.uk/enfin-srv/envision2/pages/linkin.jsf?input=P38389,P38398 | |
839 | * &inputType=0&workflow=Enfin%20Default%20Workflow&datasetName= | |
840 | * linkInDatasetFromPRIDE | |
841 | */ | |
842 | }; | |
843 | ||
844 | 0 | SequenceI[] seqs = new SequenceI[] { |
845 | new Sequence("StupidLabel:gi|9234|pdb|102L|A", | |
846 | "asdiasdpasdpadpwpadasdpaspdw"), }; | |
847 | 0 | String[][] seqsandids = formStrings(seqs); |
848 | 0 | for (int i = 0; i < links.length; i++) |
849 | { | |
850 | 0 | GroupUrlLink ul = new GroupUrlLink(links[i]); |
851 | 0 | if (ul.isValid()) |
852 | { | |
853 | 0 | jalview.bin.Console.outPrintln("\n\n\n"); |
854 | 0 | jalview.bin.Console.outPrintln( |
855 | "Link " + i + " " + links[i] + " : " + ul.toString()); | |
856 | 0 | jalview.bin.Console.outPrintln(" pref : " + ul.getUrl_prefix()); |
857 | 0 | jalview.bin.Console |
858 | .outPrintln(" IdReplace : " + ul.getIDRegexReplace()); | |
859 | 0 | jalview.bin.Console |
860 | .outPrintln(" SeqReplace : " + ul.getSeqRegexReplace()); | |
861 | 0 | jalview.bin.Console.outPrintln(" Suffixes : " + ul.getUrl_suffix()); |
862 | ||
863 | 0 | jalview.bin.Console.outPrintln( |
864 | "<insert input id and sequence strings here> Without onlyIfMatches:"); | |
865 | 0 | Object[] urls; |
866 | 0 | try |
867 | { | |
868 | 0 | urls = ul.makeUrls(seqsandids[0], seqsandids[1], "mydataset", |
869 | false); | |
870 | 0 | testUrls(ul, seqsandids, urls); |
871 | } catch (UrlStringTooLongException ex) | |
872 | { | |
873 | 0 | jalview.bin.Console.outPrintln("too long exception " + ex); |
874 | } | |
875 | 0 | jalview.bin.Console.outPrintln( |
876 | "<insert input id and sequence strings here> With onlyIfMatches set:"); | |
877 | 0 | try |
878 | { | |
879 | 0 | urls = ul.makeUrls(seqsandids[0], seqsandids[1], "mydataset", |
880 | true); | |
881 | 0 | testUrls(ul, seqsandids, urls); |
882 | } catch (UrlStringTooLongException ex) | |
883 | { | |
884 | 0 | jalview.bin.Console.outPrintln("too long exception " + ex); |
885 | } | |
886 | } | |
887 | else | |
888 | { | |
889 | 0 | jalview.bin.Console.errPrintln("Invalid URLLink : " + links[i] |
890 | + " : " + ul.getInvalidMessage()); | |
891 | } | |
892 | } | |
893 | } | |
894 | ||
895 | /** | |
896 | * covenience method to generate the id and sequence string vector from a set | |
897 | * of seuqences using each sequence's getName() and getSequenceAsString() | |
898 | * method | |
899 | * | |
900 | * @param seqs | |
901 | * @return String[][] {{sequence ids},{sequence strings}} | |
902 | */ | |
903 | 0 | public static String[][] formStrings(SequenceI[] seqs) |
904 | { | |
905 | 0 | String[][] idset = new String[2][seqs.length]; |
906 | 0 | for (int i = 0; i < seqs.length; i++) |
907 | { | |
908 | 0 | idset[0][i] = seqs[i].getName(); |
909 | 0 | idset[1][i] = seqs[i].getSequenceAsString(); |
910 | } | |
911 | 0 | return idset; |
912 | } | |
913 | ||
914 | 0 | public void setLabel(String newlabel) |
915 | { | |
916 | 0 | this.label = newlabel; |
917 | } | |
918 | ||
919 | } |