Class |
Line # |
Actions |
|||
---|---|---|---|---|---|
UnicodePunct | 20 | 1 | 3 | ||
UnicodeWhite | 30 | 1 | 3 | ||
NUnicodePunct | 42 | 1 | 3 | ||
NUnicodeWhite | 54 | 1 | 3 | ||
UnicodeW | 64 | 4 | 5 | ||
NUnicodeW | 80 | 4 | 5 | ||
UnicodeDigit | 97 | 1 | 3 | ||
NUnicodeDigit | 108 | 1 | 3 | ||
UnicodeMath | 119 | 1 | 3 | ||
NUnicodeMath | 129 | 1 | 3 | ||
UnicodeCurrency | 139 | 1 | 3 | ||
NUnicodeCurrency | 149 | 1 | 3 | ||
UnicodeAlpha | 159 | 1 | 3 | ||
NUnicodeAlpha | 169 | 1 | 3 | ||
UnicodeUpper | 180 | 2 | 4 | ||
UnicodeLower | 195 | 2 | 4 | ||
Regex | 309 | 658 | 321 |
1 | // | |
2 | // This software is now distributed according to | |
3 | // the Lesser Gnu Public License. Please see | |
4 | // http://www.gnu.org/copyleft/lesser.txt for | |
5 | // the details. | |
6 | // -- Happy Computing! | |
7 | // | |
8 | package com.stevesoft.pat; | |
9 | ||
10 | import jalview.util.MessageManager; | |
11 | ||
12 | import java.io.File; | |
13 | import java.io.FilenameFilter; | |
14 | import java.util.BitSet; | |
15 | import java.util.Hashtable; | |
16 | ||
17 | import com.stevesoft.pat.wrap.StringWrap; | |
18 | ||
19 | /** Matches a Unicode punctuation character. */ | |
20 | class UnicodePunct extends UniValidator | |
21 | { | |
22 | 0 | @Override |
23 | public int validate(StringLike s, int from, int to) | |
24 | { | |
25 | 0 | return from < s.length() && Prop.isPunct(s.charAt(from)) ? to : -1; |
26 | } | |
27 | } | |
28 | ||
29 | /** Matches a Unicode white space character. */ | |
30 | class UnicodeWhite extends UniValidator | |
31 | { | |
32 | 0 | @Override |
33 | public int validate(StringLike s, int from, int to) | |
34 | { | |
35 | 0 | return from < s.length() && Prop.isWhite(s.charAt(from)) ? to : -1; |
36 | } | |
37 | } | |
38 | ||
39 | /** | |
40 | * Matches a character that is not a Unicode punctuation character. | |
41 | */ | |
42 | class NUnicodePunct extends UniValidator | |
43 | { | |
44 | 0 | @Override |
45 | public int validate(StringLike s, int from, int to) | |
46 | { | |
47 | 0 | return from < s.length() && !Prop.isPunct(s.charAt(from)) ? to : -1; |
48 | } | |
49 | } | |
50 | ||
51 | /** | |
52 | * Matches a character that is not a Unicode white space character. | |
53 | */ | |
54 | class NUnicodeWhite extends UniValidator | |
55 | { | |
56 | 0 | @Override |
57 | public int validate(StringLike s, int from, int to) | |
58 | { | |
59 | 0 | return from < s.length() && !Prop.isWhite(s.charAt(from)) ? to : -1; |
60 | } | |
61 | } | |
62 | ||
63 | /** Matches a Unicode word character: an alphanumeric or underscore. */ | |
64 | class UnicodeW extends UniValidator | |
65 | { | |
66 | 0 | @Override |
67 | public int validate(StringLike s, int from, int to) | |
68 | { | |
69 | 0 | if (from >= s.length()) |
70 | { | |
71 | 0 | return -1; |
72 | } | |
73 | 0 | char c = s.charAt(from); |
74 | 0 | return (Prop.isAlphabetic(c) || Prop.isDecimalDigit(c) || c == '_') ? to |
75 | : -1; | |
76 | } | |
77 | } | |
78 | ||
79 | /** Matches a character that is not a Unicode alphanumeric or underscore. */ | |
80 | class NUnicodeW extends UniValidator | |
81 | { | |
82 | 0 | @Override |
83 | public int validate(StringLike s, int from, int to) | |
84 | { | |
85 | 0 | if (from >= s.length()) |
86 | { | |
87 | 0 | return -1; |
88 | } | |
89 | 0 | char c = s.charAt(from); |
90 | 0 | return !(Prop.isAlphabetic(c) || Prop.isDecimalDigit(c) || c == '_') |
91 | ? to | |
92 | : -1; | |
93 | } | |
94 | } | |
95 | ||
96 | /** Matches a Unicode decimal digit. */ | |
97 | class UnicodeDigit extends UniValidator | |
98 | { | |
99 | 0 | @Override |
100 | public int validate(StringLike s, int from, int to) | |
101 | { | |
102 | 0 | return from < s.length() && Prop.isDecimalDigit(s.charAt(from)) ? to |
103 | : -1; | |
104 | } | |
105 | } | |
106 | ||
107 | /** Matches a character that is not a Unicode digit. */ | |
108 | class NUnicodeDigit extends UniValidator | |
109 | { | |
110 | 0 | @Override |
111 | public int validate(StringLike s, int from, int to) | |
112 | { | |
113 | 0 | return from < s.length() && !Prop.isDecimalDigit(s.charAt(from)) ? to |
114 | : -1; | |
115 | } | |
116 | } | |
117 | ||
118 | /** Matches a Unicode math character. */ | |
119 | class UnicodeMath extends UniValidator | |
120 | { | |
121 | 0 | @Override |
122 | public int validate(StringLike s, int from, int to) | |
123 | { | |
124 | 0 | return from < s.length() && Prop.isMath(s.charAt(from)) ? to : -1; |
125 | } | |
126 | } | |
127 | ||
128 | /** Matches a non-math Unicode character. */ | |
129 | class NUnicodeMath extends UniValidator | |
130 | { | |
131 | 0 | @Override |
132 | public int validate(StringLike s, int from, int to) | |
133 | { | |
134 | 0 | return from < s.length() && !Prop.isMath(s.charAt(from)) ? to : -1; |
135 | } | |
136 | } | |
137 | ||
138 | /** Matches a Unicode currency symbol. */ | |
139 | class UnicodeCurrency extends UniValidator | |
140 | { | |
141 | 0 | @Override |
142 | public int validate(StringLike s, int from, int to) | |
143 | { | |
144 | 0 | return from < s.length() && Prop.isCurrency(s.charAt(from)) ? to : -1; |
145 | } | |
146 | } | |
147 | ||
148 | /** Matches a non-currency symbol Unicode character. */ | |
149 | class NUnicodeCurrency extends UniValidator | |
150 | { | |
151 | 0 | @Override |
152 | public int validate(StringLike s, int from, int to) | |
153 | { | |
154 | 0 | return from < s.length() && !Prop.isCurrency(s.charAt(from)) ? to : -1; |
155 | } | |
156 | } | |
157 | ||
158 | /** Matches a Unicode alphabetic character. */ | |
159 | class UnicodeAlpha extends UniValidator | |
160 | { | |
161 | 0 | @Override |
162 | public int validate(StringLike s, int from, int to) | |
163 | { | |
164 | 0 | return from < s.length() && Prop.isAlphabetic(s.charAt(from)) ? to : -1; |
165 | } | |
166 | } | |
167 | ||
168 | /** Matches a non-alphabetic Unicode character. */ | |
169 | class NUnicodeAlpha extends UniValidator | |
170 | { | |
171 | 0 | @Override |
172 | public int validate(StringLike s, int from, int to) | |
173 | { | |
174 | 0 | return from < s.length() && !Prop.isAlphabetic(s.charAt(from)) ? to |
175 | : -1; | |
176 | } | |
177 | } | |
178 | ||
179 | /** Matches an upper case Unicode character. */ | |
180 | class UnicodeUpper extends UniValidator | |
181 | { | |
182 | 0 | @Override |
183 | public int validate(StringLike s, int from, int to) | |
184 | { | |
185 | 0 | return from < s.length() && isUpper(s.charAt(from)) ? to : -1; |
186 | } | |
187 | ||
188 | 0 | final boolean isUpper(char c) |
189 | { | |
190 | 0 | return c == CaseMgr.toUpperCase(c) && c != CaseMgr.toLowerCase(c); |
191 | } | |
192 | } | |
193 | ||
194 | /** Matches an upper case Unicode character. */ | |
195 | class UnicodeLower extends UniValidator | |
196 | { | |
197 | 0 | @Override |
198 | public int validate(StringLike s, int from, int to) | |
199 | { | |
200 | 0 | return from < s.length() && isLower(s.charAt(from)) ? to : -1; |
201 | } | |
202 | ||
203 | 0 | final boolean isLower(char c) |
204 | { | |
205 | 0 | return c != CaseMgr.toUpperCase(c) && c == CaseMgr.toLowerCase(c); |
206 | } | |
207 | } | |
208 | ||
209 | /** | |
210 | * Regex provides the parser which constructs the linked list of Pattern classes | |
211 | * from a String. | |
212 | * <p> | |
213 | * For the purpose of this documentation, the fact that java interprets the | |
214 | * backslash will be ignored. In practice, however, you will need a double | |
215 | * backslash to obtain a string that contains a single backslash character. | |
216 | * Thus, the example pattern "\b" should really be typed as "\\b" inside java | |
217 | * code. | |
218 | * <p> | |
219 | * Note that Regex is part of package "com.stevesoft.pat". To use it, simply | |
220 | * import com.stevesoft.pat.Regex at the top of your file. | |
221 | * <p> | |
222 | * Regex is made with a constructor that takes a String that defines the regular | |
223 | * expression. Thus, for example | |
224 | * | |
225 | * <pre> | |
226 | * Regex r = new Regex("[a-c]*"); | |
227 | * </pre> | |
228 | * | |
229 | * matches any number of characters so long as the are 'a', 'b', or 'c'). | |
230 | * <p> | |
231 | * To attempt to match the Pattern to a given string, you can use either the | |
232 | * search(String) member function, or the matchAt(String,int position) member | |
233 | * function. These functions return a boolean which tells you whether or not the | |
234 | * thing worked, and sets the methods "charsMatched()" and "matchedFrom()" in | |
235 | * the Regex object appropriately. | |
236 | * <p> | |
237 | * The portion of the string before the match can be obtained by the left() | |
238 | * member, and the portion after the match can be obtained by the right() | |
239 | * member. | |
240 | * <p> | |
241 | * Essentially, this package implements a syntax that is very much like the perl | |
242 | * 5 regular expression syntax. | |
243 | * | |
244 | * Longer example: | |
245 | * | |
246 | * <pre> | |
247 | * Regex r = new Regex("x(a|b)y"); | |
248 | * r.matchAt("xay", 0); | |
249 | * System.out.println("sub = " + r.stringMatched(1)); | |
250 | * </pre> | |
251 | * | |
252 | * The above would print "sub = a". | |
253 | * | |
254 | * <pre> | |
255 | * r.left() // would return "x" | |
256 | * r.right() // would return "y" | |
257 | * </pre> | |
258 | * | |
259 | * <p> | |
260 | * Differences between this package and perl5:<br> | |
261 | * The extended Pattern for setting flags, is now supported, but the flags are | |
262 | * different. "(?i)" tells the pattern to ignore case, "(?Q)" sets the | |
263 | * "dontMatchInQuotes" flag, and "(?iQ)" sets them both. You can change the | |
264 | * escape character. The pattern | |
265 | * | |
266 | * <pre> | |
267 | * (?e=#)#d+ | |
268 | * </pre> | |
269 | * | |
270 | * is the same as | |
271 | * | |
272 | * <pre> | |
273 | * \d+ | |
274 | * </pre> | |
275 | * | |
276 | * , but note that the sequence | |
277 | * | |
278 | * <pre> | |
279 | * (?e=#) | |
280 | * </pre> | |
281 | * | |
282 | * <b>must</b> occur at the very beginning of the pattern. There may be other | |
283 | * small differences as well. I will either make my package conform or note them | |
284 | * as I become aware of them. | |
285 | * <p> | |
286 | * This package supports additional patterns not in perl5: <center> <table * | |
287 | * border=1> | |
288 | * <tr> | |
289 | * <td>(?@())</td> | |
290 | * <td>Group</td> | |
291 | * <td>This matches all characters between the '(' character and the balancing | |
292 | * ')' character. Thus, it will match "()" as well as "(())". The balancing | |
293 | * characters are arbitrary, thus (?@{}) matches on "{}" and "{{}}".</td> | |
294 | * <tr> | |
295 | * <td>(?<1)</td> | |
296 | * <td>Backup</td> | |
297 | * <td>Moves the pointer backwards within the text. This allows you to make a | |
298 | * "look behind." It fails if it attempts to move to a position before the | |
299 | * beginning of the string. "x(?<1)" is equivalent to "(?=x)". The number, 1 | |
300 | * in this example, is the number of characters to move backwards.</td> | |
301 | * </table> | |
302 | * </center> | |
303 | * </dl> | |
304 | * | |
305 | * @author Steven R. Brandt | |
306 | * @version package com.stevesoft.pat, release 1.5.3 | |
307 | * @see Pattern | |
308 | */ | |
309 | public class Regex extends RegRes implements FilenameFilter | |
310 | { | |
311 | /** | |
312 | * BackRefOffset gives the identity number of the first pattern. Version 1.0 | |
313 | * used zero, version 1.1 uses 1 to be more compatible with perl. | |
314 | */ | |
315 | static int BackRefOffset = 1; | |
316 | ||
317 | private static Pattern none = new NoPattern(); | |
318 | ||
319 | Pattern thePattern = none; | |
320 | ||
321 | patInt minMatch = new patInt(0); | |
322 | ||
323 | static Hashtable validators = new Hashtable(); | |
324 | 10 | static |
325 | { | |
326 | 10 | define("p", "(?>1)", new UnicodePunct()); |
327 | 10 | define("P", "(?>1)", new NUnicodePunct()); |
328 | 10 | define("s", "(?>1)", new UnicodeWhite()); |
329 | 10 | define("S", "(?>1)", new NUnicodeWhite()); |
330 | 10 | define("w", "(?>1)", new UnicodeW()); |
331 | 10 | define("W", "(?>1)", new NUnicodeW()); |
332 | 10 | define("d", "(?>1)", new UnicodeDigit()); |
333 | 10 | define("D", "(?>1)", new NUnicodeDigit()); |
334 | 10 | define("m", "(?>1)", new UnicodeMath()); |
335 | 10 | define("M", "(?>1)", new NUnicodeMath()); |
336 | 10 | define("c", "(?>1)", new UnicodeCurrency()); |
337 | 10 | define("C", "(?>1)", new NUnicodeCurrency()); |
338 | 10 | define("a", "(?>1)", new UnicodeAlpha()); |
339 | 10 | define("A", "(?>1)", new NUnicodeAlpha()); |
340 | 10 | define("uc", "(?>1)", new UnicodeUpper()); |
341 | 10 | define("lc", "(?>1)", new UnicodeLower()); |
342 | } | |
343 | ||
344 | /** Set the dontMatch in quotes flag. */ | |
345 | 0 | public void setDontMatchInQuotes(boolean b) |
346 | { | |
347 | 0 | dontMatchInQuotes = b; |
348 | } | |
349 | ||
350 | /** Find out if the dontMatchInQuotes flag is enabled. */ | |
351 | 0 | public boolean getDontMatchInQuotes() |
352 | { | |
353 | 0 | return dontMatchInQuotes; |
354 | } | |
355 | ||
356 | boolean dontMatchInQuotes = false; | |
357 | ||
358 | /** | |
359 | * Set the state of the ignoreCase flag. If set to true, then the pattern | |
360 | * matcher will ignore case when searching for a match. | |
361 | */ | |
362 | 230 | public void setIgnoreCase(boolean b) |
363 | { | |
364 | 230 | ignoreCase = b; |
365 | } | |
366 | ||
367 | /** | |
368 | * Get the state of the ignoreCase flag. Returns true if we are ignoring the | |
369 | * case of the pattern, false otherwise. | |
370 | */ | |
371 | 0 | public boolean getIgnoreCase() |
372 | { | |
373 | 0 | return ignoreCase; |
374 | } | |
375 | ||
376 | boolean ignoreCase = false; | |
377 | ||
378 | static boolean defaultMFlag = false; | |
379 | ||
380 | /** | |
381 | * Set the default value of the m flag. If it is set to true, then the MFlag | |
382 | * will be on for any regex search executed. | |
383 | */ | |
384 | 0 | public static void setDefaultMFlag(boolean mFlag) |
385 | { | |
386 | 0 | defaultMFlag = mFlag; |
387 | } | |
388 | ||
389 | /** | |
390 | * Get the default value of the m flag. If it is set to true, then the MFlag | |
391 | * will be on for any regex search executed. | |
392 | */ | |
393 | 0 | public static boolean getDefaultMFlag() |
394 | { | |
395 | 0 | return defaultMFlag; |
396 | } | |
397 | ||
398 | /** | |
399 | * Initializes the object without a Pattern. To supply a Pattern use | |
400 | * compile(String s). | |
401 | * | |
402 | * @see com.stevesoft.pat.Regex#compile(java.lang.String) | |
403 | */ | |
404 | 2805 | public Regex() |
405 | { | |
406 | } | |
407 | ||
408 | /** | |
409 | * Create and compile a Regex, but do not throw any exceptions. If you wish to | |
410 | * have exceptions thrown for syntax errors, you must use the Regex(void) | |
411 | * constructor to create the Regex object, and then call the compile method. | |
412 | * Therefore, you should only call this method when you know your pattern is | |
413 | * right. I will probably become more like | |
414 | * | |
415 | * @see com.stevesoft.pat.Regex#search(java.lang.String) | |
416 | * @see com.stevesoft.pat.Regex#compile(java.lang.String) | |
417 | */ | |
418 | 2116 | public Regex(String s) |
419 | { | |
420 | 2116 | try |
421 | { | |
422 | 2116 | compile(s); |
423 | } catch (RegSyntax rs) | |
424 | { | |
425 | } | |
426 | } | |
427 | ||
428 | ReplaceRule rep = null; | |
429 | ||
430 | /** | |
431 | * Create and compile both a Regex and a ReplaceRule. | |
432 | * | |
433 | * @see com.stevesoft.pat.ReplaceRule | |
434 | * @see com.stevesoft.pat.Regex#compile(java.lang.String) | |
435 | */ | |
436 | 90 | public Regex(String s, String rp) |
437 | { | |
438 | 90 | this(s); |
439 | 90 | rep = ReplaceRule.perlCode(rp); |
440 | } | |
441 | ||
442 | /** | |
443 | * Create and compile a Regex, but give it the ReplaceRule specified. This | |
444 | * allows the user finer control of the Replacement process, if that is | |
445 | * desired. | |
446 | * | |
447 | * @see com.stevesoft.pat.ReplaceRule | |
448 | * @see com.stevesoft.pat.Regex#compile(java.lang.String) | |
449 | */ | |
450 | 0 | public Regex(String s, ReplaceRule rp) |
451 | { | |
452 | 0 | this(s); |
453 | 0 | rep = rp; |
454 | } | |
455 | ||
456 | /** | |
457 | * Change the ReplaceRule of this Regex by compiling a new one using String | |
458 | * rp. | |
459 | */ | |
460 | 0 | public void setReplaceRule(String rp) |
461 | { | |
462 | 0 | rep = ReplaceRule.perlCode(rp); |
463 | 0 | repr = null; // Clear Replacer history |
464 | } | |
465 | ||
466 | /** Change the ReplaceRule of this Regex to rp. */ | |
467 | 109 | public void setReplaceRule(ReplaceRule rp) |
468 | { | |
469 | 109 | rep = rp; |
470 | } | |
471 | ||
472 | /** | |
473 | * Test to see if a custom defined rule exists. | |
474 | * | |
475 | * @see com.stevesoft.pat#define(java.lang.String,java.lang.String,Validator) | |
476 | */ | |
477 | 0 | public static boolean isDefined(String nm) |
478 | { | |
479 | 0 | return validators.get(nm) != null; |
480 | } | |
481 | ||
482 | /** | |
483 | * Removes a custom defined rule. | |
484 | * | |
485 | * @see com.stevesoft.pat#define(java.lang.String,java.lang.String,Validator) | |
486 | */ | |
487 | 0 | public static void undefine(String nm) |
488 | { | |
489 | 0 | validators.remove(nm); |
490 | } | |
491 | ||
492 | /** | |
493 | * Defines a method to create a new rule. See test/deriv2.java and | |
494 | * test/deriv3.java for examples of how to use it. | |
495 | */ | |
496 | 160 | public static void define(String nm, String pat, Validator v) |
497 | { | |
498 | 160 | v.pattern = pat; |
499 | 160 | validators.put(nm, v); |
500 | } | |
501 | ||
502 | /** | |
503 | * Defines a shorthand for a pattern. The pattern will be invoked by a string | |
504 | * that has the form "(??"+nm+")". | |
505 | */ | |
506 | 0 | public static void define(String nm, String pat) |
507 | { | |
508 | 0 | validators.put(nm, pat); |
509 | } | |
510 | ||
511 | /** Get the current ReplaceRule. */ | |
512 | 3 | public ReplaceRule getReplaceRule() |
513 | { | |
514 | 3 | return rep; |
515 | } | |
516 | ||
517 | Replacer repr = null; | |
518 | ||
519 | 152 | final Replacer _getReplacer() |
520 | { | |
521 | 152 | return repr == null ? repr = new Replacer() : repr; |
522 | } | |
523 | ||
524 | 0 | public Replacer getReplacer() |
525 | { | |
526 | 0 | if (repr == null) |
527 | { | |
528 | 0 | repr = new Replacer(); |
529 | } | |
530 | 0 | repr.rh.me = this; |
531 | 0 | repr.rh.prev = null; |
532 | 0 | return repr; |
533 | } | |
534 | ||
535 | /** | |
536 | * Replace the first occurence of this pattern in String s according to the | |
537 | * ReplaceRule. | |
538 | * | |
539 | * @see com.stevesoft.pat.ReplaceRule | |
540 | * @see com.stevesoft.pat.Regex#getReplaceRule() | |
541 | */ | |
542 | 0 | public String replaceFirst(String s) |
543 | { | |
544 | 0 | return _getReplacer().replaceFirstRegion(s, this, 0, s.length()) |
545 | .toString(); | |
546 | } | |
547 | ||
548 | /** | |
549 | * Replace the first occurence of this pattern in String s beginning with | |
550 | * position pos according to the ReplaceRule. | |
551 | * | |
552 | * @see com.stevesoft.pat.ReplaceRule | |
553 | * @see com.stevesoft.pat.Regex#getReplaceRule() | |
554 | */ | |
555 | 0 | public String replaceFirstFrom(String s, int pos) |
556 | { | |
557 | 0 | return _getReplacer().replaceFirstRegion(s, this, pos, s.length()) |
558 | .toString(); | |
559 | } | |
560 | ||
561 | /** | |
562 | * Replace the first occurence of this pattern in String s beginning with | |
563 | * position start and ending with end according to the ReplaceRule. | |
564 | * | |
565 | * @see com.stevesoft.pat.ReplaceRule | |
566 | * @see com.stevesoft.pat.Regex#getReplaceRule() | |
567 | */ | |
568 | 0 | public String replaceFirstRegion(String s, int start, int end) |
569 | { | |
570 | 0 | return _getReplacer().replaceFirstRegion(s, this, start, end) |
571 | .toString(); | |
572 | } | |
573 | ||
574 | /** | |
575 | * Replace all occurences of this pattern in String s according to the | |
576 | * ReplaceRule. | |
577 | * | |
578 | * @see com.stevesoft.pat.ReplaceRule | |
579 | * @see com.stevesoft.pat.Regex#getReplaceRule() | |
580 | */ | |
581 | 152 | public String replaceAll(String s) |
582 | { | |
583 | 152 | return _getReplacer().replaceAllRegion(s, this, 0, s.length()) |
584 | .toString(); | |
585 | } | |
586 | ||
587 | 0 | public StringLike replaceAll(StringLike s) |
588 | { | |
589 | 0 | return _getReplacer().replaceAllRegion(s, this, 0, s.length()); |
590 | } | |
591 | ||
592 | /** | |
593 | * Replace all occurences of this pattern in String s beginning with position | |
594 | * pos according to the ReplaceRule. | |
595 | * | |
596 | * @see com.stevesoft.pat.ReplaceRule | |
597 | * @see com.stevesoft.pat.Regex#getReplaceRule() | |
598 | */ | |
599 | 0 | public String replaceAllFrom(String s, int pos) |
600 | { | |
601 | 0 | return _getReplacer().replaceAllRegion(s, this, pos, s.length()) |
602 | .toString(); | |
603 | } | |
604 | ||
605 | /** | |
606 | * Replace all occurences of this pattern in String s beginning with position | |
607 | * start and ending with end according to the ReplaceRule. | |
608 | * | |
609 | * @see com.stevesoft.pat.ReplaceRule | |
610 | * @see com.stevesoft.pat.Regex#getReplaceRule() | |
611 | */ | |
612 | 0 | public String replaceAllRegion(String s, int start, int end) |
613 | { | |
614 | 0 | return _getReplacer().replaceAllRegion(s, this, start, end).toString(); |
615 | } | |
616 | ||
617 | /** Essentially clones the Regex object */ | |
618 | 196 | public Regex(Regex r) |
619 | { | |
620 | 196 | super(r); |
621 | 196 | dontMatchInQuotes = r.dontMatchInQuotes; |
622 | 196 | esc = r.esc; |
623 | 196 | ignoreCase = r.ignoreCase; |
624 | 196 | gFlag = r.gFlag; |
625 | 196 | if (r.rep == null) |
626 | { | |
627 | 196 | rep = null; |
628 | } | |
629 | else | |
630 | { | |
631 | 0 | rep = (ReplaceRule) r.rep.clone(); |
632 | } | |
633 | /* | |
634 | * try { compile(r.toString()); } catch(RegSyntax r_) {} | |
635 | */ | |
636 | 196 | thePattern = r.thePattern.clone(new Hashtable()); |
637 | 196 | minMatch = r.minMatch; |
638 | 196 | skipper = r.skipper; |
639 | } | |
640 | ||
641 | /** | |
642 | * By default, the escape character is the backslash, but you can make it | |
643 | * anything you want by setting this variable. | |
644 | */ | |
645 | public char esc = Pattern.ESC; | |
646 | ||
647 | /** | |
648 | * This method compiles a regular expression, making it possible to call the | |
649 | * search or matchAt methods. | |
650 | * | |
651 | * @exception com.stevesoft.pat.RegSyntax | |
652 | * is thrown if a syntax error is encountered in the pattern. For | |
653 | * example, "x{3,1}" or "*a" are not valid patterns. | |
654 | * @see com.stevesoft.pat.Regex#search | |
655 | * @see com.stevesoft.pat.Regex#matchAt | |
656 | */ | |
657 | 2226 | public void compile(String prepat) throws RegSyntax |
658 | { | |
659 | 2226 | String postpat = parsePerl.codify(prepat, true); |
660 | 2226 | String pat = postpat == null ? prepat : postpat; |
661 | 2226 | minMatch = null; |
662 | 2226 | ignoreCase = false; |
663 | 2226 | dontMatchInQuotes = false; |
664 | 2226 | Rthings mk = new Rthings(this); |
665 | 2226 | int offset = mk.val; |
666 | 2226 | String newpat = pat; |
667 | 2226 | thePattern = none; |
668 | 2226 | p = null; |
669 | 2226 | or = null; |
670 | 2226 | minMatch = new patInt(0); |
671 | 2226 | StrPos sp = new StrPos(pat, 0); |
672 | 2226 | if (sp.incMatch("(?e=")) |
673 | { | |
674 | 0 | char newEsc = sp.c; |
675 | 0 | sp.inc(); |
676 | 0 | if (sp.match(')')) |
677 | { | |
678 | 0 | newpat = reEscape(pat.substring(6), newEsc, Pattern.ESC); |
679 | } | |
680 | } | |
681 | 2226 | else if (esc != Pattern.ESC) |
682 | { | |
683 | 0 | newpat = reEscape(pat, esc, Pattern.ESC); |
684 | } | |
685 | 2226 | thePattern = _compile(newpat, mk); |
686 | 2225 | numSubs_ = mk.val - offset; |
687 | 2225 | mk.set(this); |
688 | } | |
689 | ||
690 | /* | |
691 | * If a Regex is compared against a Regex, a check is done to see that the | |
692 | * patterns are equal as well as the most recent match. If a Regex is compare | |
693 | * with a RegRes, only the result of the most recent match is compared. | |
694 | */ | |
695 | 0 | @Override |
696 | public boolean equals(Object o) | |
697 | { | |
698 | 0 | if (o instanceof Regex) |
699 | { | |
700 | 0 | if (toString().equals(o.toString())) |
701 | { | |
702 | 0 | return super.equals(o); |
703 | } | |
704 | else | |
705 | { | |
706 | 0 | return false; |
707 | } | |
708 | } | |
709 | else | |
710 | { | |
711 | 0 | return super.equals(o); |
712 | } | |
713 | } | |
714 | ||
715 | /** A clone by any other name would smell as sweet. */ | |
716 | 196 | @Override |
717 | public Object clone() | |
718 | { | |
719 | 196 | return new Regex(this); |
720 | } | |
721 | ||
722 | /** Return a clone of the underlying RegRes object. */ | |
723 | 0 | public RegRes result() |
724 | { | |
725 | 0 | return (RegRes) super.clone(); |
726 | } | |
727 | ||
728 | // prep sets global variables of class | |
729 | // Pattern so that it can access them | |
730 | // during an attempt at a match | |
731 | Pthings pt = new Pthings(); | |
732 | ||
733 | 11616 | final Pthings prep(StringLike s) |
734 | { | |
735 | // if(gFlag) | |
736 | 11616 | pt.lastPos = matchedTo(); |
737 | 11616 | if (pt.lastPos < 0) |
738 | { | |
739 | 6467 | pt.lastPos = 0; |
740 | } | |
741 | 11616 | if ((s == null ? null : s.unwrap()) != (src == null ? null |
742 | : s.unwrap())) | |
743 | { | |
744 | 1883 | pt.lastPos = 0; |
745 | } | |
746 | 11616 | src = s; |
747 | 11616 | pt.dotDoesntMatchCR = dotDoesntMatchCR && (!sFlag); |
748 | 11616 | pt.mFlag = (mFlag | defaultMFlag); |
749 | 11616 | pt.ignoreCase = ignoreCase; |
750 | 11616 | pt.no_check = false; |
751 | 11616 | if (pt.marks != null) |
752 | { | |
753 | 36923 | for (int i = 0; i < pt.marks.length; i++) |
754 | { | |
755 | 32274 | pt.marks[i] = -1; |
756 | } | |
757 | } | |
758 | 11616 | pt.marks = null; |
759 | 11616 | pt.nMarks = numSubs_; |
760 | 11616 | pt.src = s; |
761 | 11616 | if (dontMatchInQuotes) |
762 | { | |
763 | 0 | setCbits(s, pt); |
764 | } | |
765 | else | |
766 | { | |
767 | 11616 | pt.cbits = null; |
768 | } | |
769 | 11616 | return pt; |
770 | } | |
771 | ||
772 | /** | |
773 | * Attempt to match a Pattern beginning at a specified location within the | |
774 | * string. | |
775 | * | |
776 | * @see com.stevesoft.pat.Regex#search | |
777 | */ | |
778 | 0 | public boolean matchAt(String s, int start_pos) |
779 | { | |
780 | 0 | return _search(s, start_pos, start_pos); |
781 | } | |
782 | ||
783 | /** | |
784 | * Attempt to match a Pattern beginning at a specified location within the | |
785 | * StringLike. | |
786 | * | |
787 | * @see com.stevesoft.pat.Regex#search | |
788 | */ | |
789 | 0 | public boolean matchAt(StringLike s, int start_pos) |
790 | { | |
791 | 0 | return _search(s, start_pos, start_pos); |
792 | } | |
793 | ||
794 | /** | |
795 | * Search through a String for the first occurrence of a match. | |
796 | * | |
797 | * @see com.stevesoft.pat.Regex#searchFrom | |
798 | * @see com.stevesoft.pat.Regex#matchAt | |
799 | */ | |
800 | 10388 | public boolean search(String s) |
801 | { | |
802 | 10388 | if (s == null) |
803 | { | |
804 | 0 | throw new NullPointerException(MessageManager |
805 | .getString("exception.null_string_given_to_regex_search")); | |
806 | } | |
807 | 10388 | return _search(s, 0, s.length()); |
808 | } | |
809 | ||
810 | 0 | public boolean search(StringLike sl) |
811 | { | |
812 | 0 | if (sl == null) |
813 | { | |
814 | 0 | throw new NullPointerException(MessageManager.getString( |
815 | "exception.null_string_like_given_to_regex_search")); | |
816 | } | |
817 | 0 | return _search(sl, 0, sl.length()); |
818 | } | |
819 | ||
820 | 0 | public boolean reverseSearch(String s) |
821 | { | |
822 | 0 | if (s == null) |
823 | { | |
824 | 0 | throw new NullPointerException(MessageManager.getString( |
825 | "exception.null_string_given_to_regex_reverse_search")); | |
826 | } | |
827 | 0 | return _reverseSearch(s, 0, s.length()); |
828 | } | |
829 | ||
830 | 0 | public boolean reverseSearch(StringLike sl) |
831 | { | |
832 | 0 | if (sl == null) |
833 | { | |
834 | 0 | throw new NullPointerException(MessageManager.getString( |
835 | "exception.null_string_like_given_to_regex_reverse_search")); | |
836 | } | |
837 | 0 | return _reverseSearch(sl, 0, sl.length()); |
838 | } | |
839 | ||
840 | /** | |
841 | * Search through a String for the first occurence of a match, but start at | |
842 | * position | |
843 | * | |
844 | * <pre> | |
845 | * start | |
846 | * </pre> | |
847 | */ | |
848 | 1073 | public boolean searchFrom(String s, int start) |
849 | { | |
850 | 1073 | if (s == null) |
851 | { | |
852 | 0 | throw new NullPointerException(MessageManager.getString( |
853 | "exception.null_string_like_given_to_regex_search_from")); | |
854 | } | |
855 | 1073 | return _search(s, start, s.length()); |
856 | } | |
857 | ||
858 | 0 | public boolean searchFrom(StringLike s, int start) |
859 | { | |
860 | 0 | if (s == null) |
861 | { | |
862 | 0 | throw new NullPointerException(MessageManager.getString( |
863 | "exception.null_string_like_given_to_regex_search_from")); | |
864 | } | |
865 | 0 | return _search(s, start, s.length()); |
866 | } | |
867 | ||
868 | /** | |
869 | * Search through a region of a String for the first occurence of a match. | |
870 | */ | |
871 | 0 | public boolean searchRegion(String s, int start, int end) |
872 | { | |
873 | 0 | if (s == null) |
874 | { | |
875 | 0 | throw new NullPointerException(MessageManager.getString( |
876 | "exception.null_string_like_given_to_regex_search_region")); | |
877 | } | |
878 | 0 | return _search(s, start, end); |
879 | } | |
880 | ||
881 | /** | |
882 | * Set this to change the default behavior of the "." pattern. By default it | |
883 | * now matches perl's behavior and fails to match the '\n' character. | |
884 | */ | |
885 | public static boolean dotDoesntMatchCR = true; | |
886 | ||
887 | StringLike gFlags; | |
888 | ||
889 | int gFlagto = 0; | |
890 | ||
891 | boolean gFlag = false; | |
892 | ||
893 | /** Set the 'g' flag */ | |
894 | 0 | public void setGFlag(boolean b) |
895 | { | |
896 | 0 | gFlag = b; |
897 | } | |
898 | ||
899 | /** Get the state of the 'g' flag. */ | |
900 | 0 | public boolean getGFlag() |
901 | { | |
902 | 0 | return gFlag; |
903 | } | |
904 | ||
905 | boolean sFlag = false; | |
906 | ||
907 | /** Get the state of the sFlag */ | |
908 | 0 | public boolean getSFlag() |
909 | { | |
910 | 0 | return sFlag; |
911 | } | |
912 | ||
913 | boolean mFlag = false; | |
914 | ||
915 | /** Get the state of the sFlag */ | |
916 | 0 | public boolean getMFlag() |
917 | { | |
918 | 0 | return mFlag; |
919 | } | |
920 | ||
921 | 11461 | final boolean _search(String s, int start, int end) |
922 | { | |
923 | 11461 | return _search(new StringWrap(s), start, end); |
924 | } | |
925 | ||
926 | 11616 | final boolean _search(StringLike s, int start, int end) |
927 | { | |
928 | 11616 | if (gFlag && gFlagto > 0 && gFlags != null |
929 | && s.unwrap() == gFlags.unwrap()) | |
930 | { | |
931 | 0 | start = gFlagto; |
932 | } | |
933 | 11616 | gFlags = null; |
934 | ||
935 | 11616 | Pthings pt = prep(s); |
936 | ||
937 | 11616 | int up = (minMatch == null ? end : end - minMatch.i); |
938 | ||
939 | 11616 | if (up < start && end >= start) |
940 | { | |
941 | 0 | up = start; |
942 | } | |
943 | ||
944 | 11616 | if (skipper == null) |
945 | { | |
946 | 263340 | for (int i = start; i <= up; i++) |
947 | { | |
948 | 259002 | charsMatched_ = thePattern.matchAt(s, i, pt); |
949 | 259002 | if (charsMatched_ >= 0) |
950 | { | |
951 | 4817 | matchFrom_ = thePattern.mfrom; |
952 | 4817 | marks = pt.marks; |
953 | 4817 | gFlagto = matchFrom_ + charsMatched_; |
954 | 4817 | gFlags = s; |
955 | 4817 | return didMatch_ = true; |
956 | } | |
957 | } | |
958 | } | |
959 | else | |
960 | { | |
961 | 2461 | pt.no_check = true; |
962 | 127535 | for (int i = start; i <= up; i++) |
963 | { | |
964 | 127535 | i = skipper.find(src, i, up); |
965 | 127535 | if (i < 0) |
966 | { | |
967 | 917 | charsMatched_ = matchFrom_ = -1; |
968 | 917 | return didMatch_ = false; |
969 | } | |
970 | 126618 | charsMatched_ = thePattern.matchAt(s, i, pt); |
971 | 126618 | if (charsMatched_ >= 0) |
972 | { | |
973 | 1544 | matchFrom_ = thePattern.mfrom; |
974 | 1544 | marks = pt.marks; |
975 | 1544 | gFlagto = matchFrom_ + charsMatched_; |
976 | 1544 | gFlags = s; |
977 | 1544 | return didMatch_ = true; |
978 | } | |
979 | } | |
980 | } | |
981 | 4338 | return didMatch_ = false; |
982 | } | |
983 | ||
984 | /* | |
985 | * final boolean _search(LongStringLike s,long start,long end) { if(gFlag && | |
986 | * gFlagto > 0 && s==gFlags) start = gFlagto; gFlags = null; | |
987 | * | |
988 | * Pthings pt=prep(s); | |
989 | * | |
990 | * int up = end;//(minMatch == null ? end : end-minMatch.i); | |
991 | * | |
992 | * if(up < start && end >= start) up = start; | |
993 | * | |
994 | * if(skipper == null) { for(long i=start;i<=up;i++) { charsMatched_ = | |
995 | * thePattern.matchAt(s,i,pt); if(charsMatched_ >= 0) { matchFrom_ = | |
996 | * thePattern.mfrom; marks = pt.marks; gFlagto = matchFrom_+charsMatched_; | |
997 | * return didMatch_=true; } } } else { pt.no_check = true; for(long | |
998 | * i=start;i<=up;i++) { i = skipper.find(src,i,up); if(i<0) { charsMatched_ = | |
999 | * matchFrom_ = -1; return didMatch_ = false; } charsMatched_ = | |
1000 | * thePattern.matchAt(s,i,pt); if(charsMatched_ >= 0) { matchFrom_ = | |
1001 | * thePattern.mfrom; marks = pt.marks; gFlagto = matchFrom_+charsMatched_; | |
1002 | * gFlags = s; return didMatch_=true; } else { i = s.adjustIndex(i); up = | |
1003 | * s.adjustEnd(i); } } } return didMatch_=false; } | |
1004 | */ | |
1005 | ||
1006 | 0 | boolean _reverseSearch(String s, int start, int end) |
1007 | { | |
1008 | 0 | return _reverseSearch(new StringWrap(s), start, end); |
1009 | } | |
1010 | ||
1011 | 0 | boolean _reverseSearch(StringLike s, int start, int end) |
1012 | { | |
1013 | 0 | if (gFlag && gFlagto > 0 && s.unwrap() == gFlags.unwrap()) |
1014 | { | |
1015 | 0 | end = gFlagto; |
1016 | } | |
1017 | 0 | gFlags = null; |
1018 | 0 | Pthings pt = prep(s); |
1019 | 0 | for (int i = end; i >= start; i--) |
1020 | { | |
1021 | 0 | charsMatched_ = thePattern.matchAt(s, i, pt); |
1022 | 0 | if (charsMatched_ >= 0) |
1023 | { | |
1024 | 0 | matchFrom_ = thePattern.mfrom; |
1025 | 0 | marks = pt.marks; |
1026 | 0 | gFlagto = matchFrom_ - 1; |
1027 | 0 | gFlags = s; |
1028 | 0 | return didMatch_ = true; |
1029 | } | |
1030 | } | |
1031 | 0 | return didMatch_ = false; |
1032 | } | |
1033 | ||
1034 | // This routine sets the cbits variable | |
1035 | // of class Pattern. Cbits is true for | |
1036 | // the bit corresponding to a character inside | |
1037 | // a set of quotes. | |
1038 | static StringLike lasts = null; | |
1039 | ||
1040 | static BitSet lastbs = null; | |
1041 | ||
1042 | 0 | static void setCbits(StringLike s, Pthings pt) |
1043 | { | |
1044 | 0 | if (s == lasts) |
1045 | { | |
1046 | 0 | pt.cbits = lastbs; |
1047 | 0 | return; |
1048 | } | |
1049 | 0 | BitSet bs = new BitSet(s.length()); |
1050 | 0 | char qc = ' '; |
1051 | 0 | boolean setBit = false; |
1052 | 0 | for (int i = 0; i < s.length(); i++) |
1053 | { | |
1054 | 0 | if (setBit) |
1055 | { | |
1056 | 0 | bs.set(i); |
1057 | } | |
1058 | 0 | char c = s.charAt(i); |
1059 | 0 | if (!setBit && c == '"') |
1060 | { | |
1061 | 0 | qc = c; |
1062 | 0 | setBit = true; |
1063 | 0 | bs.set(i); |
1064 | } | |
1065 | 0 | else if (!setBit && c == '\'') |
1066 | { | |
1067 | 0 | qc = c; |
1068 | 0 | setBit = true; |
1069 | 0 | bs.set(i); |
1070 | } | |
1071 | 0 | else if (setBit && c == qc) |
1072 | { | |
1073 | 0 | setBit = false; |
1074 | } | |
1075 | 0 | else if (setBit && c == '\\' && i + 1 < s.length()) |
1076 | { | |
1077 | 0 | i++; |
1078 | 0 | if (setBit) |
1079 | { | |
1080 | 0 | bs.set(i); |
1081 | } | |
1082 | } | |
1083 | } | |
1084 | 0 | pt.cbits = lastbs = bs; |
1085 | 0 | lasts = s; |
1086 | } | |
1087 | ||
1088 | // Wanted user to over-ride this in alpha version, | |
1089 | // but it wasn't really necessary because of this trick: | |
1090 | 2605 | Regex newRegex() |
1091 | { | |
1092 | 2605 | try |
1093 | { | |
1094 | 2605 | return getClass().getDeclaredConstructor().newInstance(); |
1095 | } catch (InstantiationException ie) | |
1096 | { | |
1097 | 0 | return null; |
1098 | } catch (IllegalAccessException iae) | |
1099 | { | |
1100 | 0 | return null; |
1101 | } catch (ReflectiveOperationException roe) | |
1102 | { | |
1103 | 0 | return null; |
1104 | } | |
1105 | } | |
1106 | ||
1107 | /** | |
1108 | * Only needed for creating your own extensions of Regex. This method adds the | |
1109 | * next Pattern in the chain of patterns or sets the Pattern if it is the | |
1110 | * first call. | |
1111 | */ | |
1112 | 11015 | protected void add(Pattern p2) |
1113 | { | |
1114 | 11015 | if (p == null) |
1115 | { | |
1116 | 4989 | p = p2; |
1117 | } | |
1118 | else | |
1119 | { | |
1120 | 6026 | p.add(p2); |
1121 | 6026 | p2 = p; |
1122 | } | |
1123 | } | |
1124 | ||
1125 | /** | |
1126 | * You only need to use this method if you are creating your own extentions to | |
1127 | * Regex. compile1 compiles one Pattern element, it can be over-ridden to | |
1128 | * allow the Regex compiler to understand new syntax. See deriv.java for an | |
1129 | * example. This routine is the heart of class Regex. Rthings has one integer | |
1130 | * member called intValue, it is used to keep track of the number of ()'s in | |
1131 | * the Pattern. | |
1132 | * | |
1133 | * @exception com.stevesoft.pat.RegSyntax | |
1134 | * is thrown when a nonsensensical pattern is supplied. For | |
1135 | * example, a pattern beginning with *. | |
1136 | */ | |
1137 | 15396 | protected void compile1(StrPos sp, Rthings mk) throws RegSyntax |
1138 | { | |
1139 | 15396 | if (sp.match('[')) |
1140 | { | |
1141 | 2173 | sp.inc(); |
1142 | 2173 | add(matchBracket(sp)); |
1143 | } | |
1144 | 13223 | else if (sp.match('|')) |
1145 | { | |
1146 | 173 | if (or == null) |
1147 | { | |
1148 | 15 | or = new Or(); |
1149 | } | |
1150 | 173 | if (p == null) |
1151 | { | |
1152 | 0 | p = new NullPattern(); |
1153 | } | |
1154 | 173 | or.addOr(p); |
1155 | 173 | p = null; |
1156 | } | |
1157 | 13050 | else if (sp.incMatch("(?<")) |
1158 | { | |
1159 | 0 | patInt i = sp.getPatInt(); |
1160 | 0 | if (i == null) |
1161 | { | |
1162 | 0 | RegSyntaxError.endItAll("No int after (?<"); |
1163 | } | |
1164 | 0 | add(new Backup(i.intValue())); |
1165 | 0 | if (!sp.match(')')) |
1166 | { | |
1167 | 0 | RegSyntaxError.endItAll("No ) after (?<"); |
1168 | } | |
1169 | } | |
1170 | 13050 | else if (sp.incMatch("(?>")) |
1171 | { | |
1172 | 0 | patInt i = sp.getPatInt(); |
1173 | 0 | if (i == null) |
1174 | { | |
1175 | 0 | RegSyntaxError.endItAll("No int after (?>"); |
1176 | } | |
1177 | 0 | add(new Backup(-i.intValue())); |
1178 | 0 | if (!sp.match(')')) |
1179 | { | |
1180 | 0 | RegSyntaxError.endItAll("No ) after (?<"); |
1181 | } | |
1182 | } | |
1183 | 13050 | else if (sp.incMatch("(?@")) |
1184 | { | |
1185 | 0 | char op = sp.c; |
1186 | 0 | sp.inc(); |
1187 | 0 | char cl = sp.c; |
1188 | 0 | sp.inc(); |
1189 | 0 | if (!sp.match(')')) |
1190 | { | |
1191 | 0 | RegSyntaxError.endItAll("(?@ does not have closing paren"); |
1192 | } | |
1193 | 0 | add(new Group(op, cl)); |
1194 | } | |
1195 | 13050 | else if (sp.incMatch("(?#")) |
1196 | { | |
1197 | 0 | while (!sp.match(')')) |
1198 | { | |
1199 | 0 | sp.inc(); |
1200 | } | |
1201 | } | |
1202 | 13050 | else if (sp.dontMatch && sp.c == 'w') |
1203 | { | |
1204 | // Regex r = new Regex(); | |
1205 | // r._compile("[a-zA-Z0-9_]",mk); | |
1206 | // add(new Goop("\\w",r.thePattern)); | |
1207 | 155 | Bracket b = new Bracket(false); |
1208 | 155 | b.addOr(new Range('a', 'z')); |
1209 | 155 | b.addOr(new Range('A', 'Z')); |
1210 | 155 | b.addOr(new Range('0', '9')); |
1211 | 155 | b.addOr(new oneChar('_')); |
1212 | 155 | add(b); |
1213 | } | |
1214 | 12895 | else if (sp.dontMatch && sp.c == 'G') |
1215 | { | |
1216 | 0 | add(new BackG()); |
1217 | } | |
1218 | 12895 | else if (sp.dontMatch && sp.c == 's') |
1219 | { | |
1220 | // Regex r = new Regex(); | |
1221 | // r._compile("[ \t\n\r\b]",mk); | |
1222 | // add(new Goop("\\s",r.thePattern)); | |
1223 | 1250 | Bracket b = new Bracket(false); |
1224 | 1250 | b.addOr(new oneChar((char) 32)); |
1225 | 1250 | b.addOr(new Range((char) 8, (char) 10)); |
1226 | 1250 | b.addOr(new oneChar((char) 13)); |
1227 | 1250 | add(b); |
1228 | } | |
1229 | 11645 | else if (sp.dontMatch && sp.c == 'd') |
1230 | { | |
1231 | // Regex r = new Regex(); | |
1232 | // r._compile("[0-9]",mk); | |
1233 | // add(new Goop("\\d",r.thePattern)); | |
1234 | 110 | Range digit = new Range('0', '9'); |
1235 | 110 | digit.printBrackets = true; |
1236 | 110 | add(digit); |
1237 | } | |
1238 | 11535 | else if (sp.dontMatch && sp.c == 'W') |
1239 | { | |
1240 | // Regex r = new Regex(); | |
1241 | // r._compile("[^a-zA-Z0-9_]",mk); | |
1242 | // add(new Goop("\\W",r.thePattern)); | |
1243 | 1 | Bracket b = new Bracket(true); |
1244 | 1 | b.addOr(new Range('a', 'z')); |
1245 | 1 | b.addOr(new Range('A', 'Z')); |
1246 | 1 | b.addOr(new Range('0', '9')); |
1247 | 1 | b.addOr(new oneChar('_')); |
1248 | 1 | add(b); |
1249 | } | |
1250 | 11534 | else if (sp.dontMatch && sp.c == 'S') |
1251 | { | |
1252 | // Regex r = new Regex(); | |
1253 | // r._compile("[^ \t\n\r\b]",mk); | |
1254 | // add(new Goop("\\S",r.thePattern)); | |
1255 | 225 | Bracket b = new Bracket(true); |
1256 | 225 | b.addOr(new oneChar((char) 32)); |
1257 | 225 | b.addOr(new Range((char) 8, (char) 10)); |
1258 | 225 | b.addOr(new oneChar((char) 13)); |
1259 | 225 | add(b); |
1260 | } | |
1261 | 11309 | else if (sp.dontMatch && sp.c == 'D') |
1262 | { | |
1263 | // Regex r = new Regex(); | |
1264 | // r._compile("[^0-9]",mk); | |
1265 | // add(new Goop("\\D",r.thePattern)); | |
1266 | 0 | Bracket b = new Bracket(true); |
1267 | 0 | b.addOr(new Range('0', '9')); |
1268 | 0 | add(b); |
1269 | } | |
1270 | 11309 | else if (sp.dontMatch && sp.c == 'B') |
1271 | { | |
1272 | 0 | Regex r = new Regex(); |
1273 | 0 | r._compile("(?!" + back_slash + "b)", mk); |
1274 | 0 | add(r.thePattern); |
1275 | } | |
1276 | 11309 | else if (isOctalString(sp)) |
1277 | { | |
1278 | 0 | int d = sp.c - '0'; |
1279 | 0 | sp.inc(); |
1280 | 0 | d = 8 * d + sp.c - '0'; |
1281 | 0 | StrPos sp2 = new StrPos(sp); |
1282 | 0 | sp2.inc(); |
1283 | 0 | if (isOctalDigit(sp2, false)) |
1284 | { | |
1285 | 0 | sp.inc(); |
1286 | 0 | d = 8 * d + sp.c - '0'; |
1287 | } | |
1288 | 0 | add(new oneChar((char) d)); |
1289 | } | |
1290 | 11309 | else if (sp.dontMatch && sp.c >= '1' && sp.c <= '9') |
1291 | { | |
1292 | 0 | int iv = sp.c - '0'; |
1293 | 0 | StrPos s2 = new StrPos(sp); |
1294 | 0 | s2.inc(); |
1295 | 0 | if (!s2.dontMatch && s2.c >= '0' && s2.c <= '9') |
1296 | { | |
1297 | 0 | iv = 10 * iv + (s2.c - '0'); |
1298 | 0 | sp.inc(); |
1299 | } | |
1300 | 0 | add(new BackMatch(iv)); |
1301 | } | |
1302 | 11309 | else if (sp.dontMatch && sp.c == 'b') |
1303 | { | |
1304 | 396 | add(new Boundary()); |
1305 | } | |
1306 | 10913 | else if (sp.match('\b')) |
1307 | { | |
1308 | 0 | add(new Boundary()); |
1309 | } | |
1310 | 10913 | else if (sp.match('$')) |
1311 | { | |
1312 | 31 | add(new End(true)); |
1313 | } | |
1314 | 10882 | else if (sp.dontMatch && sp.c == 'Z') |
1315 | { | |
1316 | 0 | add(new End(false)); |
1317 | } | |
1318 | 10882 | else if (sp.match('.')) |
1319 | { | |
1320 | 375 | add(new Any()); |
1321 | } | |
1322 | 10507 | else if (sp.incMatch("(??")) |
1323 | { | |
1324 | 0 | StringBuffer sb = new StringBuffer(); |
1325 | 0 | StringBuffer sb2 = new StringBuffer(); |
1326 | 0 | while (!sp.match(')') && !sp.match(':')) |
1327 | { | |
1328 | 0 | sb.append(sp.c); |
1329 | 0 | sp.inc(); |
1330 | } | |
1331 | 0 | if (sp.incMatch(":")) |
1332 | { | |
1333 | 0 | while (!sp.match(')')) |
1334 | { | |
1335 | 0 | sb2.append(sp.c); |
1336 | 0 | sp.inc(); |
1337 | } | |
1338 | } | |
1339 | 0 | String sbs = sb.toString(); |
1340 | 0 | if (validators.get(sbs) instanceof String) |
1341 | { | |
1342 | 0 | String pat = (String) validators.get(sbs); |
1343 | 0 | Regex r = newRegex(); |
1344 | 0 | Rthings rth = new Rthings(this); |
1345 | 0 | rth.noBackRefs = true; |
1346 | 0 | r._compile(pat, rth); |
1347 | 0 | add(r.thePattern); |
1348 | } | |
1349 | else | |
1350 | { | |
1351 | 0 | Custom cm = new Custom(sb.toString()); |
1352 | 0 | if (cm.v != null) |
1353 | { | |
1354 | 0 | Validator v2 = cm.v.arg(sb2.toString()); |
1355 | 0 | if (v2 != null) |
1356 | { | |
1357 | 0 | v2.argsave = sb2.toString(); |
1358 | 0 | String p = cm.v.pattern; |
1359 | 0 | cm.v = v2; |
1360 | 0 | v2.pattern = p; |
1361 | } | |
1362 | 0 | Regex r = newRegex(); |
1363 | 0 | Rthings rth = new Rthings(this); |
1364 | 0 | rth.noBackRefs = true; |
1365 | 0 | r._compile(cm.v.pattern, rth); |
1366 | 0 | cm.sub = r.thePattern; |
1367 | 0 | cm.sub.add(new CustomEndpoint(cm)); |
1368 | 0 | cm.sub.setParent(cm); |
1369 | 0 | add(cm); |
1370 | } | |
1371 | } | |
1372 | } | |
1373 | 10507 | else if (sp.match('(')) |
1374 | { | |
1375 | 2605 | mk.parenLevel++; |
1376 | 2605 | Regex r = newRegex(); |
1377 | // r.or = new Or(); | |
1378 | 2605 | sp.inc(); |
1379 | 2605 | if (sp.incMatch("?:")) |
1380 | { | |
1381 | 42 | r.or = new Or(); |
1382 | } | |
1383 | 2563 | else if (sp.incMatch("?=")) |
1384 | { | |
1385 | 0 | r.or = new lookAhead(false); |
1386 | } | |
1387 | 2563 | else if (sp.incMatch("?!")) |
1388 | { | |
1389 | 0 | r.or = new lookAhead(true); |
1390 | } | |
1391 | 2563 | else if (sp.match('?')) |
1392 | { | |
1393 | 0 | sp.inc(); |
1394 | 0 | do |
1395 | { | |
1396 | 0 | if (sp.c == 'i') |
1397 | { | |
1398 | 0 | mk.ignoreCase = true; |
1399 | } | |
1400 | 0 | if (sp.c == 'Q') |
1401 | { | |
1402 | 0 | mk.dontMatchInQuotes = true; |
1403 | } | |
1404 | 0 | if (sp.c == 'o') |
1405 | { | |
1406 | 0 | mk.optimizeMe = true; |
1407 | } | |
1408 | 0 | if (sp.c == 'g') |
1409 | { | |
1410 | 0 | mk.gFlag = true; |
1411 | } | |
1412 | 0 | if (sp.c == 's') |
1413 | { | |
1414 | 0 | mk.sFlag = true; |
1415 | } | |
1416 | 0 | if (sp.c == 'm') |
1417 | { | |
1418 | 0 | mk.mFlag = true; |
1419 | } | |
1420 | 0 | sp.inc(); |
1421 | 0 | } while (!sp.match(')') && !sp.eos); |
1422 | 0 | r = null; |
1423 | 0 | mk.parenLevel--; |
1424 | 0 | if (sp.eos) // throw new RegSyntax |
1425 | { | |
1426 | 0 | RegSyntaxError.endItAll("Unclosed ()"); |
1427 | } | |
1428 | } | |
1429 | else | |
1430 | { // just ordinary parenthesis | |
1431 | 2563 | r.or = mk.noBackRefs ? new Or() : new OrMark(mk.val++); |
1432 | } | |
1433 | 2605 | if (r != null) |
1434 | { | |
1435 | 2605 | add(r._compile(sp, mk)); |
1436 | } | |
1437 | } | |
1438 | 7902 | else if (sp.match('^')) |
1439 | { | |
1440 | 65 | add(new Start(true)); |
1441 | } | |
1442 | 7837 | else if (sp.dontMatch && sp.c == 'A') |
1443 | { | |
1444 | 0 | add(new Start(false)); |
1445 | } | |
1446 | 7837 | else if (sp.match('*')) |
1447 | { | |
1448 | 1460 | addMulti(new patInt(0), new patInf()); |
1449 | } | |
1450 | 6377 | else if (sp.match('+')) |
1451 | { | |
1452 | 2487 | addMulti(new patInt(1), new patInf()); |
1453 | } | |
1454 | 3890 | else if (sp.match('?')) |
1455 | { | |
1456 | 89 | addMulti(new patInt(0), new patInt(1)); |
1457 | } | |
1458 | 3801 | else if (sp.match('{')) |
1459 | { | |
1460 | 172 | boolean bad = false; |
1461 | 172 | StrPos sp2 = new StrPos(sp); |
1462 | // StringBuffer sb = new StringBuffer(); | |
1463 | 172 | sp.inc(); |
1464 | 172 | patInt i1 = sp.getPatInt(); |
1465 | 172 | patInt i2 = null; |
1466 | 172 | if (sp.match('}')) |
1467 | { | |
1468 | 157 | i2 = i1; |
1469 | } | |
1470 | else | |
1471 | { | |
1472 | 15 | if (!sp.match(',')) |
1473 | { | |
1474 | /* | |
1475 | * RegSyntaxError.endItAll( "String \"{"+i2+ "\" should be followed | |
1476 | * with , or }"); | |
1477 | */ | |
1478 | 0 | bad = true; |
1479 | } | |
1480 | 15 | sp.inc(); |
1481 | 15 | if (sp.match('}')) |
1482 | { | |
1483 | 15 | i2 = new patInf(); |
1484 | } | |
1485 | else | |
1486 | { | |
1487 | 0 | i2 = sp.getPatInt(); |
1488 | } | |
1489 | } | |
1490 | 172 | if (i1 == null || i2 == null) |
1491 | { | |
1492 | /* | |
1493 | * throw new RegSyntax("Badly formatted Multi: " +"{"+i1+","+i2+"}"); | |
1494 | */ | |
1495 | 0 | bad = true; |
1496 | } | |
1497 | 172 | if (bad) |
1498 | { | |
1499 | 0 | sp.dup(sp2); |
1500 | 0 | add(new oneChar(sp.c)); |
1501 | } | |
1502 | else | |
1503 | { | |
1504 | 172 | addMulti(i1, i2); |
1505 | } | |
1506 | } | |
1507 | 3629 | else if (sp.escMatch('x') && next2Hex(sp)) |
1508 | { | |
1509 | 0 | sp.inc(); |
1510 | 0 | int d = getHexDigit(sp); |
1511 | 0 | sp.inc(); |
1512 | 0 | d = 16 * d + getHexDigit(sp); |
1513 | 0 | add(new oneChar((char) d)); |
1514 | } | |
1515 | 3629 | else if (sp.escMatch('c')) |
1516 | { | |
1517 | 0 | sp.inc(); |
1518 | 0 | if (sp.c < Ctrl.cmap.length) |
1519 | { | |
1520 | 0 | add(new oneChar(Ctrl.cmap[sp.c])); |
1521 | } | |
1522 | else | |
1523 | { | |
1524 | 0 | add(new oneChar(sp.c)); |
1525 | } | |
1526 | } | |
1527 | 3629 | else if (sp.escMatch('f')) |
1528 | { | |
1529 | 0 | add(new oneChar((char) 12)); |
1530 | } | |
1531 | 3629 | else if (sp.escMatch('a')) |
1532 | { | |
1533 | 0 | add(new oneChar((char) 7)); |
1534 | } | |
1535 | 3629 | else if (sp.escMatch('t')) |
1536 | { | |
1537 | 0 | add(new oneChar('\t')); |
1538 | } | |
1539 | 3629 | else if (sp.escMatch('n')) |
1540 | { | |
1541 | 0 | add(new oneChar('\n')); |
1542 | } | |
1543 | 3629 | else if (sp.escMatch('r')) |
1544 | { | |
1545 | 0 | add(new oneChar('\r')); |
1546 | } | |
1547 | 3629 | else if (sp.escMatch('b')) |
1548 | { | |
1549 | 0 | add(new oneChar('\b')); |
1550 | } | |
1551 | 3629 | else if (sp.escMatch('e')) |
1552 | { | |
1553 | 0 | add(new oneChar((char) 27)); |
1554 | } | |
1555 | else | |
1556 | { | |
1557 | 3629 | add(new oneChar(sp.c)); |
1558 | 3629 | if (sp.match(')')) |
1559 | { | |
1560 | 0 | RegSyntaxError.endItAll("Unmatched right paren in pattern"); |
1561 | } | |
1562 | } | |
1563 | } | |
1564 | ||
1565 | // compiles all Pattern elements, internal method | |
1566 | 2226 | private Pattern _compile(String pat, Rthings mk) throws RegSyntax |
1567 | { | |
1568 | 2226 | minMatch = null; |
1569 | 2226 | sFlag = mFlag = ignoreCase = gFlag = false; |
1570 | 2226 | StrPos sp = new StrPos(pat, 0); |
1571 | 2226 | thePattern = _compile(sp, mk); |
1572 | 2225 | pt.marks = null; |
1573 | 2225 | return thePattern; |
1574 | } | |
1575 | ||
1576 | Pattern p = null; | |
1577 | ||
1578 | Or or = null; | |
1579 | ||
1580 | 4831 | Pattern _compile(StrPos sp, Rthings mk) throws RegSyntax |
1581 | { | |
1582 | 20226 | while (!(sp.eos || (or != null && sp.match(')')))) |
1583 | { | |
1584 | 15396 | compile1(sp, mk); |
1585 | 15395 | sp.inc(); |
1586 | } | |
1587 | 4830 | if (sp.match(')')) |
1588 | { | |
1589 | 2605 | mk.parenLevel--; |
1590 | } | |
1591 | 2225 | else if (sp.eos && mk.parenLevel != 0) |
1592 | { | |
1593 | 0 | RegSyntaxError.endItAll("Unclosed Parenthesis! lvl=" + mk.parenLevel); |
1594 | } | |
1595 | 4830 | if (or != null) |
1596 | { | |
1597 | 2620 | if (p == null) |
1598 | { | |
1599 | 15 | p = new NullPattern(); |
1600 | } | |
1601 | 2620 | or.addOr(p); |
1602 | 2620 | return or; |
1603 | } | |
1604 | 2210 | return p == null ? new NullPattern() : p; |
1605 | } | |
1606 | ||
1607 | // add a multi object to the end of the chain | |
1608 | // which applies to the last object | |
1609 | 4208 | void addMulti(patInt i1, patInt i2) throws RegSyntax |
1610 | { | |
1611 | 4208 | Pattern last, last2; |
1612 | 8372 | for (last = p; last != null && last.next != null; last = last.next) |
1613 | { | |
1614 | 4164 | ; |
1615 | } | |
1616 | 4208 | if (last == null || last == p) |
1617 | { | |
1618 | 2588 | last2 = null; |
1619 | } | |
1620 | else | |
1621 | { | |
1622 | 4164 | for (last2 = p; last2.next != last; last2 = last2.next) |
1623 | { | |
1624 | 2544 | ; |
1625 | } | |
1626 | } | |
1627 | 4208 | if (last instanceof Multi && i1.intValue() == 0 && i2.intValue() == 1) |
1628 | { | |
1629 | 0 | ((Multi) last).matchFewest = true; |
1630 | } | |
1631 | 4208 | else if (last instanceof FastMulti && i1.intValue() == 0 |
1632 | && i2.intValue() == 1) | |
1633 | { | |
1634 | 0 | ((FastMulti) last).matchFewest = true; |
1635 | } | |
1636 | 4208 | else if (last instanceof DotMulti && i1.intValue() == 0 |
1637 | && i2.intValue() == 1) | |
1638 | { | |
1639 | 0 | ((DotMulti) last).matchFewest = true; |
1640 | } | |
1641 | 4208 | else if (last instanceof Multi || last instanceof DotMulti |
1642 | || last instanceof FastMulti) | |
1643 | { | |
1644 | 1 | throw new RegSyntax("Syntax error."); |
1645 | } | |
1646 | 4207 | else if (last2 == null) |
1647 | { | |
1648 | 2587 | p = mkMulti(i1, i2, p); |
1649 | } | |
1650 | else | |
1651 | { | |
1652 | 1620 | last2.next = mkMulti(i1, i2, last); |
1653 | } | |
1654 | } | |
1655 | ||
1656 | 4207 | final static Pattern mkMulti(patInt lo, patInt hi, Pattern p) |
1657 | throws RegSyntax | |
1658 | { | |
1659 | 4207 | if (p instanceof Any && p.next == null) |
1660 | { | |
1661 | 371 | return new DotMulti(lo, hi); |
1662 | } | |
1663 | 3836 | return RegOpt.safe4fm(p) ? (Pattern) new FastMulti(lo, hi, p) |
1664 | : (Pattern) new Multi(lo, hi, p); | |
1665 | } | |
1666 | ||
1667 | // process the bracket operator | |
1668 | 2173 | Pattern matchBracket(StrPos sp) throws RegSyntax |
1669 | { | |
1670 | 2173 | Bracket ret; |
1671 | 2173 | if (sp.match('^')) |
1672 | { | |
1673 | 419 | ret = new Bracket(true); |
1674 | 419 | sp.inc(); |
1675 | } | |
1676 | else | |
1677 | { | |
1678 | 1754 | ret = new Bracket(false); |
1679 | } | |
1680 | 2173 | if (sp.match(']')) |
1681 | { | |
1682 | // throw new RegSyntax | |
1683 | 0 | RegSyntaxError.endItAll("Unmatched []"); |
1684 | } | |
1685 | ||
1686 | 10278 | while (!sp.eos && !sp.match(']')) |
1687 | { | |
1688 | 8105 | StrPos s1 = new StrPos(sp); |
1689 | 8105 | s1.inc(); |
1690 | 8105 | StrPos s1_ = new StrPos(s1); |
1691 | 8105 | s1_.inc(); |
1692 | 8105 | if (s1.match('-') && !s1_.match(']')) |
1693 | { | |
1694 | 1836 | StrPos s2 = new StrPos(s1); |
1695 | 1836 | s2.inc(); |
1696 | 1836 | if (!s2.eos) |
1697 | { | |
1698 | 1836 | ret.addOr(new Range(sp.c, s2.c)); |
1699 | } | |
1700 | 1836 | sp.inc(); |
1701 | 1836 | sp.inc(); |
1702 | } | |
1703 | 6269 | else if (sp.escMatch('Q')) |
1704 | { | |
1705 | 0 | sp.inc(); |
1706 | 0 | while (!sp.escMatch('E')) |
1707 | { | |
1708 | 0 | ret.addOr(new oneChar(sp.c)); |
1709 | 0 | sp.inc(); |
1710 | } | |
1711 | } | |
1712 | 6269 | else if (sp.escMatch('d')) |
1713 | { | |
1714 | 45 | ret.addOr(new Range('0', '9')); |
1715 | } | |
1716 | 6224 | else if (sp.escMatch('s')) |
1717 | { | |
1718 | 0 | ret.addOr(new oneChar((char) 32)); |
1719 | 0 | ret.addOr(new Range((char) 8, (char) 10)); |
1720 | 0 | ret.addOr(new oneChar((char) 13)); |
1721 | } | |
1722 | 6224 | else if (sp.escMatch('w')) |
1723 | { | |
1724 | 0 | ret.addOr(new Range('a', 'z')); |
1725 | 0 | ret.addOr(new Range('A', 'Z')); |
1726 | 0 | ret.addOr(new Range('0', '9')); |
1727 | 0 | ret.addOr(new oneChar('_')); |
1728 | } | |
1729 | 6224 | else if (sp.escMatch('D')) |
1730 | { | |
1731 | 0 | ret.addOr(new Range((char) 0, (char) 47)); |
1732 | 0 | ret.addOr(new Range((char) 58, (char) 65535)); |
1733 | } | |
1734 | 6224 | else if (sp.escMatch('S')) |
1735 | { | |
1736 | 0 | ret.addOr(new Range((char) 0, (char) 7)); |
1737 | 0 | ret.addOr(new Range((char) 11, (char) 12)); |
1738 | 0 | ret.addOr(new Range((char) 14, (char) 31)); |
1739 | 0 | ret.addOr(new Range((char) 33, (char) 65535)); |
1740 | } | |
1741 | 6224 | else if (sp.escMatch('W')) |
1742 | { | |
1743 | 0 | ret.addOr(new Range((char) 0, (char) 64)); |
1744 | 0 | ret.addOr(new Range((char) 91, (char) 94)); |
1745 | 0 | ret.addOr(new oneChar((char) 96)); |
1746 | 0 | ret.addOr(new Range((char) 123, (char) 65535)); |
1747 | } | |
1748 | 6224 | else if (sp.escMatch('x') && next2Hex(sp)) |
1749 | { | |
1750 | 0 | sp.inc(); |
1751 | 0 | int d = getHexDigit(sp); |
1752 | 0 | sp.inc(); |
1753 | 0 | d = 16 * d + getHexDigit(sp); |
1754 | 0 | ret.addOr(new oneChar((char) d)); |
1755 | } | |
1756 | 6224 | else if (sp.escMatch('a')) |
1757 | { | |
1758 | 0 | ret.addOr(new oneChar((char) 7)); |
1759 | } | |
1760 | 6224 | else if (sp.escMatch('f')) |
1761 | { | |
1762 | 0 | ret.addOr(new oneChar((char) 12)); |
1763 | } | |
1764 | 6224 | else if (sp.escMatch('e')) |
1765 | { | |
1766 | 0 | ret.addOr(new oneChar((char) 27)); |
1767 | } | |
1768 | 6224 | else if (sp.escMatch('n')) |
1769 | { | |
1770 | 0 | ret.addOr(new oneChar('\n')); |
1771 | } | |
1772 | 6224 | else if (sp.escMatch('t')) |
1773 | { | |
1774 | 0 | ret.addOr(new oneChar('\t')); |
1775 | } | |
1776 | 6224 | else if (sp.escMatch('r')) |
1777 | { | |
1778 | 0 | ret.addOr(new oneChar('\r')); |
1779 | } | |
1780 | 6224 | else if (sp.escMatch('c')) |
1781 | { | |
1782 | 0 | sp.inc(); |
1783 | 0 | if (sp.c < Ctrl.cmap.length) |
1784 | { | |
1785 | 0 | ret.addOr(new oneChar(Ctrl.cmap[sp.c])); |
1786 | } | |
1787 | else | |
1788 | { | |
1789 | 0 | ret.addOr(new oneChar(sp.c)); |
1790 | } | |
1791 | } | |
1792 | 6224 | else if (isOctalString(sp)) |
1793 | { | |
1794 | 0 | int d = sp.c - '0'; |
1795 | 0 | sp.inc(); |
1796 | 0 | d = 8 * d + sp.c - '0'; |
1797 | 0 | StrPos sp2 = new StrPos(sp); |
1798 | 0 | sp2.inc(); |
1799 | 0 | if (isOctalDigit(sp2, false)) |
1800 | { | |
1801 | 0 | sp.inc(); |
1802 | 0 | d = 8 * d + sp.c - '0'; |
1803 | } | |
1804 | 0 | ret.addOr(new oneChar((char) d)); |
1805 | } | |
1806 | else | |
1807 | { | |
1808 | 6224 | ret.addOr(new oneChar(sp.c)); |
1809 | } | |
1810 | 8105 | sp.inc(); |
1811 | } | |
1812 | 2173 | return ret; |
1813 | } | |
1814 | ||
1815 | /** | |
1816 | * Converts the stored Pattern to a String -- this is a decompile. Note that | |
1817 | * \t and \n will really print out here, Not just the two character | |
1818 | * representations. Also be prepared to see some strange output if your | |
1819 | * characters are not printable. | |
1820 | */ | |
1821 | 0 | @Override |
1822 | public String toString() | |
1823 | { | |
1824 | 0 | if (false && thePattern == null) |
1825 | { | |
1826 | 0 | return ""; |
1827 | } | |
1828 | else | |
1829 | { | |
1830 | 0 | StringBuffer sb = new StringBuffer(); |
1831 | 0 | if (esc != Pattern.ESC) |
1832 | { | |
1833 | 0 | sb.append("(?e="); |
1834 | 0 | sb.append(esc); |
1835 | 0 | sb.append(")"); |
1836 | } | |
1837 | 0 | if (gFlag || mFlag || !dotDoesntMatchCR || sFlag || ignoreCase |
1838 | || dontMatchInQuotes || optimized()) | |
1839 | { | |
1840 | 0 | sb.append("(?"); |
1841 | 0 | if (ignoreCase) |
1842 | { | |
1843 | 0 | sb.append("i"); |
1844 | } | |
1845 | 0 | if (mFlag) |
1846 | { | |
1847 | 0 | sb.append("m"); |
1848 | } | |
1849 | 0 | if (sFlag || !dotDoesntMatchCR) |
1850 | { | |
1851 | 0 | sb.append("s"); |
1852 | } | |
1853 | 0 | if (dontMatchInQuotes) |
1854 | { | |
1855 | 0 | sb.append("Q"); |
1856 | } | |
1857 | 0 | if (optimized()) |
1858 | { | |
1859 | 0 | sb.append("o"); |
1860 | } | |
1861 | 0 | if (gFlag) |
1862 | { | |
1863 | 0 | sb.append("g"); |
1864 | } | |
1865 | 0 | sb.append(")"); |
1866 | } | |
1867 | 0 | String patstr = thePattern.toString(); |
1868 | 0 | if (esc != Pattern.ESC) |
1869 | { | |
1870 | 0 | patstr = reEscape(patstr, Pattern.ESC, esc); |
1871 | } | |
1872 | 0 | sb.append(patstr); |
1873 | 0 | return sb.toString(); |
1874 | } | |
1875 | } | |
1876 | ||
1877 | // Re-escape Pattern, allows us to use a different escape | |
1878 | // character. | |
1879 | 0 | static String reEscape(String s, char oldEsc, char newEsc) |
1880 | { | |
1881 | 0 | if (oldEsc == newEsc) |
1882 | { | |
1883 | 0 | return s; |
1884 | } | |
1885 | 0 | int i; |
1886 | 0 | StringBuffer sb = new StringBuffer(); |
1887 | 0 | for (i = 0; i < s.length(); i++) |
1888 | { | |
1889 | 0 | if (s.charAt(i) == oldEsc && i + 1 < s.length()) |
1890 | { | |
1891 | 0 | if (s.charAt(i + 1) == oldEsc) |
1892 | { | |
1893 | 0 | sb.append(oldEsc); |
1894 | } | |
1895 | else | |
1896 | { | |
1897 | 0 | sb.append(newEsc); |
1898 | 0 | sb.append(s.charAt(i + 1)); |
1899 | } | |
1900 | 0 | i++; |
1901 | } | |
1902 | 0 | else if (s.charAt(i) == newEsc) |
1903 | { | |
1904 | 0 | sb.append(newEsc); |
1905 | 0 | sb.append(newEsc); |
1906 | } | |
1907 | else | |
1908 | { | |
1909 | 0 | sb.append(s.charAt(i)); |
1910 | } | |
1911 | } | |
1912 | 0 | return sb.toString(); |
1913 | } | |
1914 | ||
1915 | /** | |
1916 | * This method implements FilenameFilter, allowing one to use a Regex to | |
1917 | * search through a directory using File.list. There is a FileRegex now that | |
1918 | * does this better. | |
1919 | * | |
1920 | * @see com.stevesoft.pat.FileRegex | |
1921 | */ | |
1922 | 0 | @Override |
1923 | public boolean accept(File dir, String s) | |
1924 | { | |
1925 | 0 | return search(s); |
1926 | } | |
1927 | ||
1928 | /** The version of this package */ | |
1929 | 0 | final static public String version() |
1930 | { | |
1931 | 0 | return "lgpl release 1.5.3"; |
1932 | } | |
1933 | ||
1934 | /** | |
1935 | * Once this method is called, the state of variables ignoreCase and | |
1936 | * dontMatchInQuotes should not be changed as the results will be | |
1937 | * unpredictable. However, search and matchAt will run more quickly. Note that | |
1938 | * you can check to see if the pattern has been optimized by calling the | |
1939 | * optimized() method. | |
1940 | * <p> | |
1941 | * This method will attempt to rewrite your pattern in a way that makes it | |
1942 | * faster (not all patterns execute at the same speed). In general, "(?: ... | |
1943 | * )" will be faster than "( ... )" so if you don't need the backreference, | |
1944 | * you should group using the former pattern. | |
1945 | * <p> | |
1946 | * It will also introduce new pattern elements that you can't get to | |
1947 | * otherwise, for example if you have a large table of strings, i.e. the | |
1948 | * months of the year "(January|February|...)" optimize() will make a | |
1949 | * Hashtable that takes it to the next appropriate pattern element -- | |
1950 | * eliminating the need for a linear search. | |
1951 | * | |
1952 | * @see com.stevesoft.pat.Regex#optimized | |
1953 | * @see com.stevesoft.pat.Regex#ignoreCase | |
1954 | * @see com.stevesoft.pat.Regex#dontMatchInQuotes | |
1955 | * @see com.stevesoft.pat.Regex#matchAt | |
1956 | * @see com.stevesoft.pat.Regex#search | |
1957 | */ | |
1958 | 318 | public void optimize() |
1959 | { | |
1960 | 318 | if (optimized() || thePattern == null) |
1961 | { | |
1962 | 0 | return; |
1963 | } | |
1964 | 318 | minMatch = new patInt(0); // thePattern.countMinChars(); |
1965 | 318 | thePattern = RegOpt.opt(thePattern, ignoreCase, dontMatchInQuotes); |
1966 | 318 | skipper = Skip.findSkip(this); |
1967 | // RegOpt.setParents(this); | |
1968 | 318 | return; |
1969 | } | |
1970 | ||
1971 | Skip skipper; | |
1972 | ||
1973 | /** | |
1974 | * This function returns true if the optimize method has been called. | |
1975 | */ | |
1976 | 318 | public boolean optimized() |
1977 | { | |
1978 | 318 | return minMatch != null; |
1979 | } | |
1980 | ||
1981 | /** | |
1982 | * A bit of syntactic surgar for those who want to make their code look more | |
1983 | * perl-like. To use this initialize your Regex object by saying: | |
1984 | * | |
1985 | * <pre> | |
1986 | * Regex r1 = Regex.perlCode("s/hello/goodbye/"); | |
1987 | * Regex r2 = Regex.perlCode("s'fish'frog'i"); | |
1988 | * Regex r3 = Regex.perlCode("m'hello'); | |
1989 | * </pre> | |
1990 | * | |
1991 | * The i for ignoreCase is supported in this syntax, as well as m, s, and x. | |
1992 | * The g flat is a bit of a special case. | |
1993 | * <p> | |
1994 | * If you wish to replace all occurences of a pattern, you do not put a 'g' in | |
1995 | * the perlCode, but call Regex's replaceAll method. | |
1996 | * <p> | |
1997 | * If you wish to simply and only do a search for r2's pattern, you can do | |
1998 | * this by calling the searchFrom method method repeatedly, or by calling | |
1999 | * search repeatedly if the g flag is set. | |
2000 | * <p> | |
2001 | * Note: Currently perlCode does <em>not</em> support the (?e=#) syntax for | |
2002 | * changing the escape character. | |
2003 | */ | |
2004 | ||
2005 | 110 | public static Regex perlCode(String s) |
2006 | { | |
2007 | // this file is big enough, see parsePerl.java | |
2008 | // for this function. | |
2009 | 110 | return parsePerl.parse(s); |
2010 | } | |
2011 | ||
2012 | static final char back_slash = '\\'; | |
2013 | ||
2014 | /** | |
2015 | * Checks to see if there are only literal and no special pattern elements in | |
2016 | * this Regex. | |
2017 | */ | |
2018 | 0 | public boolean isLiteral() |
2019 | { | |
2020 | 0 | Pattern x = thePattern; |
2021 | 0 | while (x != null) |
2022 | { | |
2023 | 0 | if (x instanceof oneChar) |
2024 | { | |
2025 | 0 | ; |
2026 | } | |
2027 | 0 | else if (x instanceof Skipped) |
2028 | { | |
2029 | 0 | ; |
2030 | } | |
2031 | else | |
2032 | { | |
2033 | 0 | return false; |
2034 | } | |
2035 | 0 | x = x.next; |
2036 | } | |
2037 | 0 | return true; |
2038 | } | |
2039 | ||
2040 | /** | |
2041 | * You only need to know about this if you are inventing your own pattern | |
2042 | * elements. | |
2043 | */ | |
2044 | 0 | public patInt countMinChars() |
2045 | { | |
2046 | 0 | return thePattern.countMinChars(); |
2047 | } | |
2048 | ||
2049 | /** | |
2050 | * You only need to know about this if you are inventing your own pattern | |
2051 | * elements. | |
2052 | */ | |
2053 | 0 | public patInt countMaxChars() |
2054 | { | |
2055 | 0 | return thePattern.countMaxChars(); |
2056 | } | |
2057 | ||
2058 | 0 | boolean isHexDigit(StrPos sp) |
2059 | { | |
2060 | 0 | boolean r = !sp.eos && !sp.dontMatch |
2061 | && ((sp.c >= '0' && sp.c <= '9') || (sp.c >= 'a' && sp.c <= 'f') | |
2062 | || (sp.c >= 'A' && sp.c <= 'F')); | |
2063 | 0 | return r; |
2064 | } | |
2065 | ||
2066 | 17533 | boolean isOctalDigit(StrPos sp, boolean first) |
2067 | { | |
2068 | 17533 | boolean r = !sp.eos && !(first ^ sp.dontMatch) && sp.c >= '0' |
2069 | && sp.c <= '7'; | |
2070 | 17533 | return r; |
2071 | } | |
2072 | ||
2073 | 0 | int getHexDigit(StrPos sp) |
2074 | { | |
2075 | 0 | if (sp.c >= '0' && sp.c <= '9') |
2076 | { | |
2077 | 0 | return sp.c - '0'; |
2078 | } | |
2079 | 0 | if (sp.c >= 'a' && sp.c <= 'f') |
2080 | { | |
2081 | 0 | return sp.c - 'a' + 10; |
2082 | } | |
2083 | 0 | return sp.c - 'A' + 10; |
2084 | } | |
2085 | ||
2086 | 0 | boolean next2Hex(StrPos sp) |
2087 | { | |
2088 | 0 | StrPos sp2 = new StrPos(sp); |
2089 | 0 | sp2.inc(); |
2090 | 0 | if (!isHexDigit(sp2)) |
2091 | { | |
2092 | 0 | return false; |
2093 | } | |
2094 | 0 | sp2.inc(); |
2095 | 0 | if (!isHexDigit(sp2)) |
2096 | { | |
2097 | 0 | return false; |
2098 | } | |
2099 | 0 | return true; |
2100 | } | |
2101 | ||
2102 | 17533 | boolean isOctalString(StrPos sp) |
2103 | { | |
2104 | 17533 | if (!isOctalDigit(sp, true)) |
2105 | { | |
2106 | 17533 | return false; |
2107 | } | |
2108 | 0 | StrPos sp2 = new StrPos(sp); |
2109 | 0 | sp2.inc(); |
2110 | 0 | if (!isOctalDigit(sp2, false)) |
2111 | { | |
2112 | 0 | return false; |
2113 | } | |
2114 | 0 | return true; |
2115 | } | |
2116 | } |