Clover icon

Coverage Report

  1. Project Clover database Thu Aug 13 2020 12:04:21 BST
  2. Package com.stevesoft.pat

File Regex.java

 

Coverage histogram

../../../img/srcFileCovDistChart5.png
39% of files have more coverage

Code metrics

404
682
92
17
2,121
1,529
375
0.55
7.41
5.41
4.08

Classes

Class Line # Actions
UnicodePunct 20 1 3
0.00%
UnicodeWhite 30 1 3
0.00%
NUnicodePunct 42 1 3
0.00%
NUnicodeWhite 54 1 3
0.00%
UnicodeW 64 4 5
0.00%
NUnicodeW 80 4 5
0.00%
UnicodeDigit 96 1 3
0.00%
NUnicodeDigit 107 1 3
0.00%
UnicodeMath 118 1 3
0.00%
NUnicodeMath 128 1 3
0.00%
UnicodeCurrency 138 1 3
0.00%
NUnicodeCurrency 148 1 3
0.00%
UnicodeAlpha 158 1 3
0.00%
NUnicodeAlpha 168 1 3
0.00%
UnicodeUpper 179 2 4
0.00%
UnicodeLower 194 2 4
0.00%
Regex 307 658 321
0.4509090845.1%
 

Contributing tests

This file is covered by 44 tests. .

Source view

1    //
2    // This software is now distributed according to
3    // the Lesser Gnu Public License. Please see
4    // http://www.gnu.org/copyleft/lesser.txt for
5    // the details.
6    // -- Happy Computing!
7    //
8    package com.stevesoft.pat;
9   
10    import jalview.util.MessageManager;
11   
12    import java.io.File;
13    import java.io.FilenameFilter;
14    import java.util.BitSet;
15    import java.util.Hashtable;
16   
17    import com.stevesoft.pat.wrap.StringWrap;
18   
19    /** Matches a Unicode punctuation character. */
 
20    class UnicodePunct extends UniValidator
21    {
 
22  0 toggle @Override
23    public int validate(StringLike s, int from, int to)
24    {
25  0 return from < s.length() && Prop.isPunct(s.charAt(from)) ? to : -1;
26    }
27    }
28   
29    /** Matches a Unicode white space character. */
 
30    class UnicodeWhite extends UniValidator
31    {
 
32  0 toggle @Override
33    public int validate(StringLike s, int from, int to)
34    {
35  0 return from < s.length() && Prop.isWhite(s.charAt(from)) ? to : -1;
36    }
37    }
38   
39    /**
40    * Matches a character that is not a Unicode punctuation character.
41    */
 
42    class NUnicodePunct extends UniValidator
43    {
 
44  0 toggle @Override
45    public int validate(StringLike s, int from, int to)
46    {
47  0 return from < s.length() && !Prop.isPunct(s.charAt(from)) ? to : -1;
48    }
49    }
50   
51    /**
52    * Matches a character that is not a Unicode white space character.
53    */
 
54    class NUnicodeWhite extends UniValidator
55    {
 
56  0 toggle @Override
57    public int validate(StringLike s, int from, int to)
58    {
59  0 return from < s.length() && !Prop.isWhite(s.charAt(from)) ? to : -1;
60    }
61    }
62   
63    /** Matches a Unicode word character: an alphanumeric or underscore. */
 
64    class UnicodeW extends UniValidator
65    {
 
66  0 toggle @Override
67    public int validate(StringLike s, int from, int to)
68    {
69  0 if (from >= s.length())
70    {
71  0 return -1;
72    }
73  0 char c = s.charAt(from);
74  0 return (Prop.isAlphabetic(c) || Prop.isDecimalDigit(c) || c == '_') ? to
75    : -1;
76    }
77    }
78   
79    /** Matches a character that is not a Unicode alphanumeric or underscore. */
 
80    class NUnicodeW extends UniValidator
81    {
 
82  0 toggle @Override
83    public int validate(StringLike s, int from, int to)
84    {
85  0 if (from >= s.length())
86    {
87  0 return -1;
88    }
89  0 char c = s.charAt(from);
90  0 return !(Prop.isAlphabetic(c) || Prop.isDecimalDigit(c) || c == '_') ? to
91    : -1;
92    }
93    }
94   
95    /** Matches a Unicode decimal digit. */
 
96    class UnicodeDigit extends UniValidator
97    {
 
98  0 toggle @Override
99    public int validate(StringLike s, int from, int to)
100    {
101  0 return from < s.length() && Prop.isDecimalDigit(s.charAt(from)) ? to
102    : -1;
103    }
104    }
105   
106    /** Matches a character that is not a Unicode digit. */
 
107    class NUnicodeDigit extends UniValidator
108    {
 
109  0 toggle @Override
110    public int validate(StringLike s, int from, int to)
111    {
112  0 return from < s.length() && !Prop.isDecimalDigit(s.charAt(from)) ? to
113    : -1;
114    }
115    }
116   
117    /** Matches a Unicode math character. */
 
118    class UnicodeMath extends UniValidator
119    {
 
120  0 toggle @Override
121    public int validate(StringLike s, int from, int to)
122    {
123  0 return from < s.length() && Prop.isMath(s.charAt(from)) ? to : -1;
124    }
125    }
126   
127    /** Matches a non-math Unicode character. */
 
128    class NUnicodeMath extends UniValidator
129    {
 
130  0 toggle @Override
131    public int validate(StringLike s, int from, int to)
132    {
133  0 return from < s.length() && !Prop.isMath(s.charAt(from)) ? to : -1;
134    }
135    }
136   
137    /** Matches a Unicode currency symbol. */
 
138    class UnicodeCurrency extends UniValidator
139    {
 
140  0 toggle @Override
141    public int validate(StringLike s, int from, int to)
142    {
143  0 return from < s.length() && Prop.isCurrency(s.charAt(from)) ? to : -1;
144    }
145    }
146   
147    /** Matches a non-currency symbol Unicode character. */
 
148    class NUnicodeCurrency extends UniValidator
149    {
 
150  0 toggle @Override
151    public int validate(StringLike s, int from, int to)
152    {
153  0 return from < s.length() && !Prop.isCurrency(s.charAt(from)) ? to : -1;
154    }
155    }
156   
157    /** Matches a Unicode alphabetic character. */
 
158    class UnicodeAlpha extends UniValidator
159    {
 
160  0 toggle @Override
161    public int validate(StringLike s, int from, int to)
162    {
163  0 return from < s.length() && Prop.isAlphabetic(s.charAt(from)) ? to : -1;
164    }
165    }
166   
167    /** Matches a non-alphabetic Unicode character. */
 
168    class NUnicodeAlpha extends UniValidator
169    {
 
170  0 toggle @Override
171    public int validate(StringLike s, int from, int to)
172    {
173  0 return from < s.length() && !Prop.isAlphabetic(s.charAt(from)) ? to
174    : -1;
175    }
176    }
177   
178    /** Matches an upper case Unicode character. */
 
179    class UnicodeUpper extends UniValidator
180    {
 
181  0 toggle @Override
182    public int validate(StringLike s, int from, int to)
183    {
184  0 return from < s.length() && isUpper(s.charAt(from)) ? to : -1;
185    }
186   
 
187  0 toggle final boolean isUpper(char c)
188    {
189  0 return c == CaseMgr.toUpperCase(c) && c != CaseMgr.toLowerCase(c);
190    }
191    }
192   
193    /** Matches an upper case Unicode character. */
 
194    class UnicodeLower extends UniValidator
195    {
 
196  0 toggle @Override
197    public int validate(StringLike s, int from, int to)
198    {
199  0 return from < s.length() && isLower(s.charAt(from)) ? to : -1;
200    }
201   
 
202  0 toggle final boolean isLower(char c)
203    {
204  0 return c != CaseMgr.toUpperCase(c) && c == CaseMgr.toLowerCase(c);
205    }
206    }
207   
208    /**
209    * Regex provides the parser which constructs the linked list of Pattern classes
210    * from a String.
211    * <p>
212    * For the purpose of this documentation, the fact that java interprets the
213    * backslash will be ignored. In practice, however, you will need a double
214    * backslash to obtain a string that contains a single backslash character.
215    * Thus, the example pattern "\b" should really be typed as "\\b" inside java
216    * code.
217    * <p>
218    * Note that Regex is part of package "com.stevesoft.pat". To use it, simply
219    * import com.stevesoft.pat.Regex at the top of your file.
220    * <p>
221    * Regex is made with a constructor that takes a String that defines the regular
222    * expression. Thus, for example
223    *
224    * <pre>
225    * Regex r = new Regex(&quot;[a-c]*&quot;);
226    * </pre>
227    *
228    * matches any number of characters so long as the are 'a', 'b', or 'c').
229    * <p>
230    * To attempt to match the Pattern to a given string, you can use either the
231    * search(String) member function, or the matchAt(String,int position) member
232    * function. These functions return a boolean which tells you whether or not the
233    * thing worked, and sets the methods "charsMatched()" and "matchedFrom()" in
234    * the Regex object appropriately.
235    * <p>
236    * The portion of the string before the match can be obtained by the left()
237    * member, and the portion after the match can be obtained by the right()
238    * member.
239    * <p>
240    * Essentially, this package implements a syntax that is very much like the perl
241    * 5 regular expression syntax.
242    *
243    * Longer example:
244    *
245    * <pre>
246    * Regex r = new Regex(&quot;x(a|b)y&quot;);
247    * r.matchAt(&quot;xay&quot;, 0);
248    * System.out.println(&quot;sub = &quot; + r.stringMatched(1));
249    * </pre>
250    *
251    * The above would print "sub = a".
252    *
253    * <pre>
254    * r.left() // would return &quot;x&quot;
255    * r.right() // would return &quot;y&quot;
256    * </pre>
257    *
258    * <p>
259    * Differences between this package and perl5:<br>
260    * The extended Pattern for setting flags, is now supported, but the flags are
261    * different. "(?i)" tells the pattern to ignore case, "(?Q)" sets the
262    * "dontMatchInQuotes" flag, and "(?iQ)" sets them both. You can change the
263    * escape character. The pattern
264    *
265    * <pre>
266    * (?e=#)#d+
267    * </pre>
268    *
269    * is the same as
270    *
271    * <pre>
272    * \d+
273    * </pre>
274    *
275    * , but note that the sequence
276    *
277    * <pre>
278    * (?e=#)
279    * </pre>
280    *
281    * <b>must</b> occur at the very beginning of the pattern. There may be other
282    * small differences as well. I will either make my package conform or note them
283    * as I become aware of them.
284    * <p>
285    * This package supports additional patterns not in perl5: <center>
286    * <table * border=1>
287    * <tr>
288    * <td>(?@())</td>
289    * <td>Group</td>
290    * <td>This matches all characters between the '(' character and the balancing
291    * ')' character. Thus, it will match "()" as well as "(())". The balancing
292    * characters are arbitrary, thus (?@{}) matches on "{}" and "{{}}".</td>
293    * <tr>
294    * <td>(?&lt1)</td>
295    * <td>Backup</td>
296    * <td>Moves the pointer backwards within the text. This allows you to make a
297    * "look behind." It fails if it attempts to move to a position before the
298    * beginning of the string. "x(?&lt1)" is equivalent to "(?=x)". The number, 1
299    * in this example, is the number of characters to move backwards.</td>
300    * </table>
301    * </center> </dl>
302    *
303    * @author Steven R. Brandt
304    * @version package com.stevesoft.pat, release 1.5.3
305    * @see Pattern
306    */
 
307    public class Regex extends RegRes implements FilenameFilter
308    {
309    /**
310    * BackRefOffset gives the identity number of the first pattern. Version 1.0
311    * used zero, version 1.1 uses 1 to be more compatible with perl.
312    */
313    static int BackRefOffset = 1;
314   
315    private static Pattern none = new NoPattern();
316   
317    Pattern thePattern = none;
318   
319    patInt minMatch = new patInt(0);
320   
321    static Hashtable validators = new Hashtable();
 
322  2 toggle static
323    {
324  2 define("p", "(?>1)", new UnicodePunct());
325  2 define("P", "(?>1)", new NUnicodePunct());
326  2 define("s", "(?>1)", new UnicodeWhite());
327  2 define("S", "(?>1)", new NUnicodeWhite());
328  2 define("w", "(?>1)", new UnicodeW());
329  2 define("W", "(?>1)", new NUnicodeW());
330  2 define("d", "(?>1)", new UnicodeDigit());
331  2 define("D", "(?>1)", new NUnicodeDigit());
332  2 define("m", "(?>1)", new UnicodeMath());
333  2 define("M", "(?>1)", new NUnicodeMath());
334  2 define("c", "(?>1)", new UnicodeCurrency());
335  2 define("C", "(?>1)", new NUnicodeCurrency());
336  2 define("a", "(?>1)", new UnicodeAlpha());
337  2 define("A", "(?>1)", new NUnicodeAlpha());
338  2 define("uc", "(?>1)", new UnicodeUpper());
339  2 define("lc", "(?>1)", new UnicodeLower());
340    }
341   
342    /** Set the dontMatch in quotes flag. */
 
343  0 toggle public void setDontMatchInQuotes(boolean b)
344    {
345  0 dontMatchInQuotes = b;
346    }
347   
348    /** Find out if the dontMatchInQuotes flag is enabled. */
 
349  0 toggle public boolean getDontMatchInQuotes()
350    {
351  0 return dontMatchInQuotes;
352    }
353   
354    boolean dontMatchInQuotes = false;
355   
356    /**
357    * Set the state of the ignoreCase flag. If set to true, then the pattern
358    * matcher will ignore case when searching for a match.
359    */
 
360  40 toggle public void setIgnoreCase(boolean b)
361    {
362  40 ignoreCase = b;
363    }
364   
365    /**
366    * Get the state of the ignoreCase flag. Returns true if we are ignoring the
367    * case of the pattern, false otherwise.
368    */
 
369  0 toggle public boolean getIgnoreCase()
370    {
371  0 return ignoreCase;
372    }
373   
374    boolean ignoreCase = false;
375   
376    static boolean defaultMFlag = false;
377   
378    /**
379    * Set the default value of the m flag. If it is set to true, then the MFlag
380    * will be on for any regex search executed.
381    */
 
382  0 toggle public static void setDefaultMFlag(boolean mFlag)
383    {
384  0 defaultMFlag = mFlag;
385    }
386   
387    /**
388    * Get the default value of the m flag. If it is set to true, then the MFlag
389    * will be on for any regex search executed.
390    */
 
391  0 toggle public static boolean getDefaultMFlag()
392    {
393  0 return defaultMFlag;
394    }
395   
396    /**
397    * Initializes the object without a Pattern. To supply a Pattern use
398    * compile(String s).
399    *
400    * @see com.stevesoft.pat.Regex#compile(java.lang.String)
401    */
 
402  2040 toggle public Regex()
403    {
404    }
405   
406    /**
407    * Create and compile a Regex, but do not throw any exceptions. If you wish to
408    * have exceptions thrown for syntax errors, you must use the Regex(void)
409    * constructor to create the Regex object, and then call the compile method.
410    * Therefore, you should only call this method when you know your pattern is
411    * right. I will probably become more like
412    *
413    * @see com.stevesoft.pat.Regex#search(java.lang.String)
414    * @see com.stevesoft.pat.Regex#compile(java.lang.String)
415    */
 
416  1428 toggle public Regex(String s)
417    {
418  1428 try
419    {
420  1428 compile(s);
421    } catch (RegSyntax rs)
422    {
423    }
424    }
425   
426    ReplaceRule rep = null;
427   
428    /**
429    * Create and compile both a Regex and a ReplaceRule.
430    *
431    * @see com.stevesoft.pat.ReplaceRule
432    * @see com.stevesoft.pat.Regex#compile(java.lang.String)
433    */
 
434  84 toggle public Regex(String s, String rp)
435    {
436  84 this(s);
437  84 rep = ReplaceRule.perlCode(rp);
438    }
439   
440    /**
441    * Create and compile a Regex, but give it the ReplaceRule specified. This
442    * allows the user finer control of the Replacement process, if that is
443    * desired.
444    *
445    * @see com.stevesoft.pat.ReplaceRule
446    * @see com.stevesoft.pat.Regex#compile(java.lang.String)
447    */
 
448  0 toggle public Regex(String s, ReplaceRule rp)
449    {
450  0 this(s);
451  0 rep = rp;
452    }
453   
454    /**
455    * Change the ReplaceRule of this Regex by compiling a new one using String
456    * rp.
457    */
 
458  0 toggle public void setReplaceRule(String rp)
459    {
460  0 rep = ReplaceRule.perlCode(rp);
461  0 repr = null; // Clear Replacer history
462    }
463   
464    /** Change the ReplaceRule of this Regex to rp. */
 
465  79 toggle public void setReplaceRule(ReplaceRule rp)
466    {
467  79 rep = rp;
468    }
469   
470    /**
471    * Test to see if a custom defined rule exists.
472    *
473    * @see com.stevesoft.pat#define(java.lang.String,java.lang.String,Validator)
474    */
 
475  0 toggle public static boolean isDefined(String nm)
476    {
477  0 return validators.get(nm) != null;
478    }
479   
480    /**
481    * Removes a custom defined rule.
482    *
483    * @see com.stevesoft.pat#define(java.lang.String,java.lang.String,Validator)
484    */
 
485  0 toggle public static void undefine(String nm)
486    {
487  0 validators.remove(nm);
488    }
489   
490    /**
491    * Defines a method to create a new rule. See test/deriv2.java and
492    * test/deriv3.java for examples of how to use it.
493    */
 
494  32 toggle public static void define(String nm, String pat, Validator v)
495    {
496  32 v.pattern = pat;
497  32 validators.put(nm, v);
498    }
499   
500    /**
501    * Defines a shorthand for a pattern. The pattern will be invoked by a string
502    * that has the form "(??"+nm+")".
503    */
 
504  0 toggle public static void define(String nm, String pat)
505    {
506  0 validators.put(nm, pat);
507    }
508   
509    /** Get the current ReplaceRule. */
 
510  3 toggle public ReplaceRule getReplaceRule()
511    {
512  3 return rep;
513    }
514   
515    Replacer repr = null;
516   
 
517  47 toggle final Replacer _getReplacer()
518    {
519  47 return repr == null ? repr = new Replacer() : repr;
520    }
521   
 
522  0 toggle public Replacer getReplacer()
523    {
524  0 if (repr == null)
525    {
526  0 repr = new Replacer();
527    }
528  0 repr.rh.me = this;
529  0 repr.rh.prev = null;
530  0 return repr;
531    }
532   
533    /**
534    * Replace the first occurence of this pattern in String s according to the
535    * ReplaceRule.
536    *
537    * @see com.stevesoft.pat.ReplaceRule
538    * @see com.stevesoft.pat.Regex#getReplaceRule()
539    */
 
540  0 toggle public String replaceFirst(String s)
541    {
542  0 return _getReplacer().replaceFirstRegion(s, this, 0, s.length())
543    .toString();
544    }
545   
546    /**
547    * Replace the first occurence of this pattern in String s beginning with
548    * position pos according to the ReplaceRule.
549    *
550    * @see com.stevesoft.pat.ReplaceRule
551    * @see com.stevesoft.pat.Regex#getReplaceRule()
552    */
 
553  0 toggle public String replaceFirstFrom(String s, int pos)
554    {
555  0 return _getReplacer().replaceFirstRegion(s, this, pos, s.length())
556    .toString();
557    }
558   
559    /**
560    * Replace the first occurence of this pattern in String s beginning with
561    * position start and ending with end according to the ReplaceRule.
562    *
563    * @see com.stevesoft.pat.ReplaceRule
564    * @see com.stevesoft.pat.Regex#getReplaceRule()
565    */
 
566  0 toggle public String replaceFirstRegion(String s, int start, int end)
567    {
568  0 return _getReplacer().replaceFirstRegion(s, this, start, end)
569    .toString();
570    }
571   
572    /**
573    * Replace all occurences of this pattern in String s according to the
574    * ReplaceRule.
575    *
576    * @see com.stevesoft.pat.ReplaceRule
577    * @see com.stevesoft.pat.Regex#getReplaceRule()
578    */
 
579  47 toggle public String replaceAll(String s)
580    {
581  47 return _getReplacer().replaceAllRegion(s, this, 0, s.length())
582    .toString();
583    }
584   
 
585  0 toggle public StringLike replaceAll(StringLike s)
586    {
587  0 return _getReplacer().replaceAllRegion(s, this, 0, s.length());
588    }
589   
590    /**
591    * Replace all occurences of this pattern in String s beginning with position
592    * pos according to the ReplaceRule.
593    *
594    * @see com.stevesoft.pat.ReplaceRule
595    * @see com.stevesoft.pat.Regex#getReplaceRule()
596    */
 
597  0 toggle public String replaceAllFrom(String s, int pos)
598    {
599  0 return _getReplacer().replaceAllRegion(s, this, pos, s.length())
600    .toString();
601    }
602   
603    /**
604    * Replace all occurences of this pattern in String s beginning with position
605    * start and ending with end according to the ReplaceRule.
606    *
607    * @see com.stevesoft.pat.ReplaceRule
608    * @see com.stevesoft.pat.Regex#getReplaceRule()
609    */
 
610  0 toggle public String replaceAllRegion(String s, int start, int end)
611    {
612  0 return _getReplacer().replaceAllRegion(s, this, start, end).toString();
613    }
614   
615    /** Essentially clones the Regex object */
 
616  161 toggle public Regex(Regex r)
617    {
618  161 super(r);
619  161 dontMatchInQuotes = r.dontMatchInQuotes;
620  161 esc = r.esc;
621  161 ignoreCase = r.ignoreCase;
622  161 gFlag = r.gFlag;
623  161 if (r.rep == null)
624    {
625  161 rep = null;
626    }
627    else
628    {
629  0 rep = (ReplaceRule) r.rep.clone();
630    }
631    /*
632    * try { compile(r.toString()); } catch(RegSyntax r_) {}
633    */
634  161 thePattern = r.thePattern.clone(new Hashtable());
635  161 minMatch = r.minMatch;
636  161 skipper = r.skipper;
637    }
638   
639    /**
640    * By default, the escape character is the backslash, but you can make it
641    * anything you want by setting this variable.
642    */
643    public char esc = Pattern.ESC;
644   
645    /**
646    * This method compiles a regular expression, making it possible to call the
647    * search or matchAt methods.
648    *
649    * @exception com.stevesoft.pat.RegSyntax
650    * is thrown if a syntax error is encountered in the pattern. For
651    * example, "x{3,1}" or "*a" are not valid patterns.
652    * @see com.stevesoft.pat.Regex#search
653    * @see com.stevesoft.pat.Regex#matchAt
654    */
 
655  1508 toggle public void compile(String prepat) throws RegSyntax
656    {
657  1508 String postpat = parsePerl.codify(prepat, true);
658  1508 String pat = postpat == null ? prepat : postpat;
659  1508 minMatch = null;
660  1508 ignoreCase = false;
661  1508 dontMatchInQuotes = false;
662  1508 Rthings mk = new Rthings(this);
663  1508 int offset = mk.val;
664  1508 String newpat = pat;
665  1508 thePattern = none;
666  1508 p = null;
667  1508 or = null;
668  1508 minMatch = new patInt(0);
669  1508 StrPos sp = new StrPos(pat, 0);
670  1508 if (sp.incMatch("(?e="))
671    {
672  0 char newEsc = sp.c;
673  0 sp.inc();
674  0 if (sp.match(')'))
675    {
676  0 newpat = reEscape(pat.substring(6), newEsc, Pattern.ESC);
677    }
678    }
679  1508 else if (esc != Pattern.ESC)
680    {
681  0 newpat = reEscape(pat, esc, Pattern.ESC);
682    }
683  1508 thePattern = _compile(newpat, mk);
684  1507 numSubs_ = mk.val - offset;
685  1507 mk.set(this);
686    }
687   
688    /*
689    * If a Regex is compared against a Regex, a check is done to see that the
690    * patterns are equal as well as the most recent match. If a Regex is compare
691    * with a RegRes, only the result of the most recent match is compared.
692    */
 
693  0 toggle @Override
694    public boolean equals(Object o)
695    {
696  0 if (o instanceof Regex)
697    {
698  0 if (toString().equals(o.toString()))
699    {
700  0 return super.equals(o);
701    }
702    else
703    {
704  0 return false;
705    }
706    }
707    else
708    {
709  0 return super.equals(o);
710    }
711    }
712   
713    /** A clone by any other name would smell as sweet. */
 
714  161 toggle @Override
715    public Object clone()
716    {
717  161 return new Regex(this);
718    }
719   
720    /** Return a clone of the underlying RegRes object. */
 
721  0 toggle public RegRes result()
722    {
723  0 return (RegRes) super.clone();
724    }
725   
726    // prep sets global variables of class
727    // Pattern so that it can access them
728    // during an attempt at a match
729    Pthings pt = new Pthings();
730   
 
731  10357 toggle final Pthings prep(StringLike s)
732    {
733    // if(gFlag)
734  10357 pt.lastPos = matchedTo();
735  10357 if (pt.lastPos < 0)
736    {
737  5542 pt.lastPos = 0;
738    }
739  10357 if ((s == null ? null : s.unwrap()) != (src == null ? null : s.unwrap()))
740    {
741  1282 pt.lastPos = 0;
742    }
743  10357 src = s;
744  10357 pt.dotDoesntMatchCR = dotDoesntMatchCR && (!sFlag);
745  10357 pt.mFlag = (mFlag | defaultMFlag);
746  10357 pt.ignoreCase = ignoreCase;
747  10357 pt.no_check = false;
748  10357 if (pt.marks != null)
749    {
750  36446 for (int i = 0; i < pt.marks.length; i++)
751    {
752  31858 pt.marks[i] = -1;
753    }
754    }
755  10357 pt.marks = null;
756  10357 pt.nMarks = numSubs_;
757  10357 pt.src = s;
758  10357 if (dontMatchInQuotes)
759    {
760  0 setCbits(s, pt);
761    }
762    else
763    {
764  10357 pt.cbits = null;
765    }
766  10357 return pt;
767    }
768   
769    /**
770    * Attempt to match a Pattern beginning at a specified location within the
771    * string.
772    *
773    * @see com.stevesoft.pat.Regex#search
774    */
 
775  0 toggle public boolean matchAt(String s, int start_pos)
776    {
777  0 return _search(s, start_pos, start_pos);
778    }
779   
780    /**
781    * Attempt to match a Pattern beginning at a specified location within the
782    * StringLike.
783    *
784    * @see com.stevesoft.pat.Regex#search
785    */
 
786  0 toggle public boolean matchAt(StringLike s, int start_pos)
787    {
788  0 return _search(s, start_pos, start_pos);
789    }
790   
791    /**
792    * Search through a String for the first occurrence of a match.
793    *
794    * @see com.stevesoft.pat.Regex#searchFrom
795    * @see com.stevesoft.pat.Regex#matchAt
796    */
 
797  9705 toggle public boolean search(String s)
798    {
799  9705 if (s == null)
800    {
801  0 throw new NullPointerException(
802    MessageManager
803    .getString("exception.null_string_given_to_regex_search"));
804    }
805  9705 return _search(s, 0, s.length());
806    }
807   
 
808  0 toggle public boolean search(StringLike sl)
809    {
810  0 if (sl == null)
811    {
812  0 throw new NullPointerException(
813    MessageManager
814    .getString("exception.null_string_like_given_to_regex_search"));
815    }
816  0 return _search(sl, 0, sl.length());
817    }
818   
 
819  0 toggle public boolean reverseSearch(String s)
820    {
821  0 if (s == null)
822    {
823  0 throw new NullPointerException(
824    MessageManager
825    .getString("exception.null_string_given_to_regex_reverse_search"));
826    }
827  0 return _reverseSearch(s, 0, s.length());
828    }
829   
 
830  0 toggle public boolean reverseSearch(StringLike sl)
831    {
832  0 if (sl == null)
833    {
834  0 throw new NullPointerException(
835    MessageManager
836    .getString("exception.null_string_like_given_to_regex_reverse_search"));
837    }
838  0 return _reverseSearch(sl, 0, sl.length());
839    }
840   
841    /**
842    * Search through a String for the first occurence of a match, but start at
843    * position
844    *
845    * <pre>
846    * start
847    * </pre>
848    */
 
849  602 toggle public boolean searchFrom(String s, int start)
850    {
851  602 if (s == null)
852    {
853  0 throw new NullPointerException(
854    MessageManager
855    .getString("exception.null_string_like_given_to_regex_search_from"));
856    }
857  602 return _search(s, start, s.length());
858    }
859   
 
860  0 toggle public boolean searchFrom(StringLike s, int start)
861    {
862  0 if (s == null)
863    {
864  0 throw new NullPointerException(
865    MessageManager
866    .getString("exception.null_string_like_given_to_regex_search_from"));
867    }
868  0 return _search(s, start, s.length());
869    }
870   
871    /**
872    * Search through a region of a String for the first occurence of a match.
873    */
 
874  0 toggle public boolean searchRegion(String s, int start, int end)
875    {
876  0 if (s == null)
877    {
878  0 throw new NullPointerException(
879    MessageManager
880    .getString("exception.null_string_like_given_to_regex_search_region"));
881    }
882  0 return _search(s, start, end);
883    }
884   
885    /**
886    * Set this to change the default behavior of the "." pattern. By default it
887    * now matches perl's behavior and fails to match the '\n' character.
888    */
889    public static boolean dotDoesntMatchCR = true;
890   
891    StringLike gFlags;
892   
893    int gFlagto = 0;
894   
895    boolean gFlag = false;
896   
897    /** Set the 'g' flag */
 
898  0 toggle public void setGFlag(boolean b)
899    {
900  0 gFlag = b;
901    }
902   
903    /** Get the state of the 'g' flag. */
 
904  0 toggle public boolean getGFlag()
905    {
906  0 return gFlag;
907    }
908   
909    boolean sFlag = false;
910   
911    /** Get the state of the sFlag */
 
912  0 toggle public boolean getSFlag()
913    {
914  0 return sFlag;
915    }
916   
917    boolean mFlag = false;
918   
919    /** Get the state of the sFlag */
 
920  0 toggle public boolean getMFlag()
921    {
922  0 return mFlag;
923    }
924   
 
925  10307 toggle final boolean _search(String s, int start, int end)
926    {
927  10307 return _search(new StringWrap(s), start, end);
928    }
929   
 
930  10357 toggle final boolean _search(StringLike s, int start, int end)
931    {
932  10357 if (gFlag && gFlagto > 0 && gFlags != null
933    && s.unwrap() == gFlags.unwrap())
934    {
935  0 start = gFlagto;
936    }
937  10357 gFlags = null;
938   
939  10357 Pthings pt = prep(s);
940   
941  10357 int up = (minMatch == null ? end : end - minMatch.i);
942   
943  10357 if (up < start && end >= start)
944    {
945  0 up = start;
946    }
947   
948  10357 if (skipper == null)
949    {
950  258134 for (int i = start; i <= up; i++)
951    {
952  254395 charsMatched_ = thePattern.matchAt(s, i, pt);
953  254395 if (charsMatched_ >= 0)
954    {
955  4192 matchFrom_ = thePattern.mfrom;
956  4192 marks = pt.marks;
957  4192 gFlagto = matchFrom_ + charsMatched_;
958  4192 gFlags = s;
959  4192 return didMatch_ = true;
960    }
961    }
962    }
963    else
964    {
965  2426 pt.no_check = true;
966  127202 for (int i = start; i <= up; i++)
967    {
968  127202 i = skipper.find(src, i, up);
969  127202 if (i < 0)
970    {
971  910 charsMatched_ = matchFrom_ = -1;
972  910 return didMatch_ = false;
973    }
974  126292 charsMatched_ = thePattern.matchAt(s, i, pt);
975  126292 if (charsMatched_ >= 0)
976    {
977  1516 matchFrom_ = thePattern.mfrom;
978  1516 marks = pt.marks;
979  1516 gFlagto = matchFrom_ + charsMatched_;
980  1516 gFlags = s;
981  1516 return didMatch_ = true;
982    }
983    }
984    }
985  3739 return didMatch_ = false;
986    }
987   
988    /*
989    * final boolean _search(LongStringLike s,long start,long end) { if(gFlag &&
990    * gFlagto > 0 && s==gFlags) start = gFlagto; gFlags = null;
991    *
992    * Pthings pt=prep(s);
993    *
994    * int up = end;//(minMatch == null ? end : end-minMatch.i);
995    *
996    * if(up < start && end >= start) up = start;
997    *
998    * if(skipper == null) { for(long i=start;i<=up;i++) { charsMatched_ =
999    * thePattern.matchAt(s,i,pt); if(charsMatched_ >= 0) { matchFrom_ =
1000    * thePattern.mfrom; marks = pt.marks; gFlagto = matchFrom_+charsMatched_;
1001    * return didMatch_=true; } } } else { pt.no_check = true; for(long
1002    * i=start;i<=up;i++) { i = skipper.find(src,i,up); if(i<0) { charsMatched_ =
1003    * matchFrom_ = -1; return didMatch_ = false; } charsMatched_ =
1004    * thePattern.matchAt(s,i,pt); if(charsMatched_ >= 0) { matchFrom_ =
1005    * thePattern.mfrom; marks = pt.marks; gFlagto = matchFrom_+charsMatched_;
1006    * gFlags = s; return didMatch_=true; } else { i = s.adjustIndex(i); up =
1007    * s.adjustEnd(i); } } } return didMatch_=false; }
1008    */
1009   
 
1010  0 toggle boolean _reverseSearch(String s, int start, int end)
1011    {
1012  0 return _reverseSearch(new StringWrap(s), start, end);
1013    }
1014   
 
1015  0 toggle boolean _reverseSearch(StringLike s, int start, int end)
1016    {
1017  0 if (gFlag && gFlagto > 0 && s.unwrap() == gFlags.unwrap())
1018    {
1019  0 end = gFlagto;
1020    }
1021  0 gFlags = null;
1022  0 Pthings pt = prep(s);
1023  0 for (int i = end; i >= start; i--)
1024    {
1025  0 charsMatched_ = thePattern.matchAt(s, i, pt);
1026  0 if (charsMatched_ >= 0)
1027    {
1028  0 matchFrom_ = thePattern.mfrom;
1029  0 marks = pt.marks;
1030  0 gFlagto = matchFrom_ - 1;
1031  0 gFlags = s;
1032  0 return didMatch_ = true;
1033    }
1034    }
1035  0 return didMatch_ = false;
1036    }
1037   
1038    // This routine sets the cbits variable
1039    // of class Pattern. Cbits is true for
1040    // the bit corresponding to a character inside
1041    // a set of quotes.
1042    static StringLike lasts = null;
1043   
1044    static BitSet lastbs = null;
1045   
 
1046  0 toggle static void setCbits(StringLike s, Pthings pt)
1047    {
1048  0 if (s == lasts)
1049    {
1050  0 pt.cbits = lastbs;
1051  0 return;
1052    }
1053  0 BitSet bs = new BitSet(s.length());
1054  0 char qc = ' ';
1055  0 boolean setBit = false;
1056  0 for (int i = 0; i < s.length(); i++)
1057    {
1058  0 if (setBit)
1059    {
1060  0 bs.set(i);
1061    }
1062  0 char c = s.charAt(i);
1063  0 if (!setBit && c == '"')
1064    {
1065  0 qc = c;
1066  0 setBit = true;
1067  0 bs.set(i);
1068    }
1069  0 else if (!setBit && c == '\'')
1070    {
1071  0 qc = c;
1072  0 setBit = true;
1073  0 bs.set(i);
1074    }
1075  0 else if (setBit && c == qc)
1076    {
1077  0 setBit = false;
1078    }
1079  0 else if (setBit && c == '\\' && i + 1 < s.length())
1080    {
1081  0 i++;
1082  0 if (setBit)
1083    {
1084  0 bs.set(i);
1085    }
1086    }
1087    }
1088  0 pt.cbits = lastbs = bs;
1089  0 lasts = s;
1090    }
1091   
1092    // Wanted user to over-ride this in alpha version,
1093    // but it wasn't really necessary because of this trick:
 
1094  1900 toggle Regex newRegex()
1095    {
1096  1900 try
1097    {
1098  1900 return getClass().getDeclaredConstructor().newInstance();
1099    } catch (InstantiationException ie)
1100    {
1101  0 return null;
1102    } catch (IllegalAccessException iae)
1103    {
1104  0 return null;
1105    } catch (ReflectiveOperationException roe)
1106    {
1107  0 return null;
1108    }
1109    }
1110   
1111    /**
1112    * Only needed for creating your own extensions of Regex. This method adds the
1113    * next Pattern in the chain of patterns or sets the Pattern if it is the
1114    * first call.
1115    */
 
1116  7111 toggle protected void add(Pattern p2)
1117    {
1118  7111 if (p == null)
1119    {
1120  3539 p = p2;
1121    }
1122    else
1123    {
1124  3572 p.add(p2);
1125  3572 p2 = p;
1126    }
1127    }
1128   
1129    /**
1130    * You only need to use this method if you are creating your own extentions to
1131    * Regex. compile1 compiles one Pattern element, it can be over-ridden to
1132    * allow the Regex compiler to understand new syntax. See deriv.java for an
1133    * example. This routine is the heart of class Regex. Rthings has one integer
1134    * member called intValue, it is used to keep track of the number of ()'s in
1135    * the Pattern.
1136    *
1137    * @exception com.stevesoft.pat.RegSyntax
1138    * is thrown when a nonsensensical pattern is supplied. For
1139    * example, a pattern beginning with *.
1140    */
 
1141  10069 toggle protected void compile1(StrPos sp, Rthings mk) throws RegSyntax
1142    {
1143  10069 if (sp.match('['))
1144    {
1145  1117 sp.inc();
1146  1117 add(matchBracket(sp));
1147    }
1148  8952 else if (sp.match('|'))
1149    {
1150  134 if (or == null)
1151    {
1152  3 or = new Or();
1153    }
1154  134 if (p == null)
1155    {
1156  0 p = new NullPattern();
1157    }
1158  134 or.addOr(p);
1159  134 p = null;
1160    }
1161  8818 else if (sp.incMatch("(?<"))
1162    {
1163  0 patInt i = sp.getPatInt();
1164  0 if (i == null)
1165    {
1166  0 RegSyntaxError.endItAll("No int after (?<");
1167    }
1168  0 add(new Backup(i.intValue()));
1169  0 if (!sp.match(')'))
1170    {
1171  0 RegSyntaxError.endItAll("No ) after (?<");
1172    }
1173    }
1174  8818 else if (sp.incMatch("(?>"))
1175    {
1176  0 patInt i = sp.getPatInt();
1177  0 if (i == null)
1178    {
1179  0 RegSyntaxError.endItAll("No int after (?>");
1180    }
1181  0 add(new Backup(-i.intValue()));
1182  0 if (!sp.match(')'))
1183    {
1184  0 RegSyntaxError.endItAll("No ) after (?<");
1185    }
1186    }
1187  8818 else if (sp.incMatch("(?@"))
1188    {
1189  0 char op = sp.c;
1190  0 sp.inc();
1191  0 char cl = sp.c;
1192  0 sp.inc();
1193  0 if (!sp.match(')'))
1194    {
1195  0 RegSyntaxError.endItAll("(?@ does not have closing paren");
1196    }
1197  0 add(new Group(op, cl));
1198    }
1199  8818 else if (sp.incMatch("(?#"))
1200    {
1201  0 while (!sp.match(')'))
1202    {
1203  0 sp.inc();
1204    }
1205    }
1206  8818 else if (sp.dontMatch && sp.c == 'w')
1207    {
1208    // Regex r = new Regex();
1209    // r._compile("[a-zA-Z0-9_]",mk);
1210    // add(new Goop("\\w",r.thePattern));
1211  154 Bracket b = new Bracket(false);
1212  154 b.addOr(new Range('a', 'z'));
1213  154 b.addOr(new Range('A', 'Z'));
1214  154 b.addOr(new Range('0', '9'));
1215  154 b.addOr(new oneChar('_'));
1216  154 add(b);
1217    }
1218  8664 else if (sp.dontMatch && sp.c == 'G')
1219    {
1220  0 add(new BackG());
1221    }
1222  8664 else if (sp.dontMatch && sp.c == 's')
1223    {
1224    // Regex r = new Regex();
1225    // r._compile("[ \t\n\r\b]",mk);
1226    // add(new Goop("\\s",r.thePattern));
1227  945 Bracket b = new Bracket(false);
1228  945 b.addOr(new oneChar((char) 32));
1229  945 b.addOr(new Range((char) 8, (char) 10));
1230  945 b.addOr(new oneChar((char) 13));
1231  945 add(b);
1232    }
1233  7719 else if (sp.dontMatch && sp.c == 'd')
1234    {
1235    // Regex r = new Regex();
1236    // r._compile("[0-9]",mk);
1237    // add(new Goop("\\d",r.thePattern));
1238  101 Range digit = new Range('0', '9');
1239  101 digit.printBrackets = true;
1240  101 add(digit);
1241    }
1242  7618 else if (sp.dontMatch && sp.c == 'W')
1243    {
1244    // Regex r = new Regex();
1245    // r._compile("[^a-zA-Z0-9_]",mk);
1246    // add(new Goop("\\W",r.thePattern));
1247  1 Bracket b = new Bracket(true);
1248  1 b.addOr(new Range('a', 'z'));
1249  1 b.addOr(new Range('A', 'Z'));
1250  1 b.addOr(new Range('0', '9'));
1251  1 b.addOr(new oneChar('_'));
1252  1 add(b);
1253    }
1254  7617 else if (sp.dontMatch && sp.c == 'S')
1255    {
1256    // Regex r = new Regex();
1257    // r._compile("[^ \t\n\r\b]",mk);
1258    // add(new Goop("\\S",r.thePattern));
1259  210 Bracket b = new Bracket(true);
1260  210 b.addOr(new oneChar((char) 32));
1261  210 b.addOr(new Range((char) 8, (char) 10));
1262  210 b.addOr(new oneChar((char) 13));
1263  210 add(b);
1264    }
1265  7407 else if (sp.dontMatch && sp.c == 'D')
1266    {
1267    // Regex r = new Regex();
1268    // r._compile("[^0-9]",mk);
1269    // add(new Goop("\\D",r.thePattern));
1270  0 Bracket b = new Bracket(true);
1271  0 b.addOr(new Range('0', '9'));
1272  0 add(b);
1273    }
1274  7407 else if (sp.dontMatch && sp.c == 'B')
1275    {
1276  0 Regex r = new Regex();
1277  0 r._compile("(?!" + back_slash + "b)", mk);
1278  0 add(r.thePattern);
1279    }
1280  7407 else if (isOctalString(sp))
1281    {
1282  0 int d = sp.c - '0';
1283  0 sp.inc();
1284  0 d = 8 * d + sp.c - '0';
1285  0 StrPos sp2 = new StrPos(sp);
1286  0 sp2.inc();
1287  0 if (isOctalDigit(sp2, false))
1288    {
1289  0 sp.inc();
1290  0 d = 8 * d + sp.c - '0';
1291    }
1292  0 add(new oneChar((char) d));
1293    }
1294  7407 else if (sp.dontMatch && sp.c >= '1' && sp.c <= '9')
1295    {
1296  0 int iv = sp.c - '0';
1297  0 StrPos s2 = new StrPos(sp);
1298  0 s2.inc();
1299  0 if (!s2.dontMatch && s2.c >= '0' && s2.c <= '9')
1300    {
1301  0 iv = 10 * iv + (s2.c - '0');
1302  0 sp.inc();
1303    }
1304  0 add(new BackMatch(iv));
1305    }
1306  7407 else if (sp.dontMatch && sp.c == 'b')
1307    {
1308  251 add(new Boundary());
1309    }
1310  7156 else if (sp.match('\b'))
1311    {
1312  0 add(new Boundary());
1313    }
1314  7156 else if (sp.match('$'))
1315    {
1316  7 add(new End(true));
1317    }
1318  7149 else if (sp.dontMatch && sp.c == 'Z')
1319    {
1320  0 add(new End(false));
1321    }
1322  7149 else if (sp.match('.'))
1323    {
1324  354 add(new Any());
1325    }
1326  6795 else if (sp.incMatch("(??"))
1327    {
1328  0 StringBuffer sb = new StringBuffer();
1329  0 StringBuffer sb2 = new StringBuffer();
1330  0 while (!sp.match(')') && !sp.match(':'))
1331    {
1332  0 sb.append(sp.c);
1333  0 sp.inc();
1334    }
1335  0 if (sp.incMatch(":"))
1336    {
1337  0 while (!sp.match(')'))
1338    {
1339  0 sb2.append(sp.c);
1340  0 sp.inc();
1341    }
1342    }
1343  0 String sbs = sb.toString();
1344  0 if (validators.get(sbs) instanceof String)
1345    {
1346  0 String pat = (String) validators.get(sbs);
1347  0 Regex r = newRegex();
1348  0 Rthings rth = new Rthings(this);
1349  0 rth.noBackRefs = true;
1350  0 r._compile(pat, rth);
1351  0 add(r.thePattern);
1352    }
1353    else
1354    {
1355  0 Custom cm = new Custom(sb.toString());
1356  0 if (cm.v != null)
1357    {
1358  0 Validator v2 = cm.v.arg(sb2.toString());
1359  0 if (v2 != null)
1360    {
1361  0 v2.argsave = sb2.toString();
1362  0 String p = cm.v.pattern;
1363  0 cm.v = v2;
1364  0 v2.pattern = p;
1365    }
1366  0 Regex r = newRegex();
1367  0 Rthings rth = new Rthings(this);
1368  0 rth.noBackRefs = true;
1369  0 r._compile(cm.v.pattern, rth);
1370  0 cm.sub = r.thePattern;
1371  0 cm.sub.add(new CustomEndpoint(cm));
1372  0 cm.sub.setParent(cm);
1373  0 add(cm);
1374    }
1375    }
1376    }
1377  6795 else if (sp.match('('))
1378    {
1379  1900 mk.parenLevel++;
1380  1900 Regex r = newRegex();
1381    // r.or = new Or();
1382  1900 sp.inc();
1383  1900 if (sp.incMatch("?:"))
1384    {
1385  39 r.or = new Or();
1386    }
1387  1861 else if (sp.incMatch("?="))
1388    {
1389  0 r.or = new lookAhead(false);
1390    }
1391  1861 else if (sp.incMatch("?!"))
1392    {
1393  0 r.or = new lookAhead(true);
1394    }
1395  1861 else if (sp.match('?'))
1396    {
1397  0 sp.inc();
1398  0 do
1399    {
1400  0 if (sp.c == 'i')
1401    {
1402  0 mk.ignoreCase = true;
1403    }
1404  0 if (sp.c == 'Q')
1405    {
1406  0 mk.dontMatchInQuotes = true;
1407    }
1408  0 if (sp.c == 'o')
1409    {
1410  0 mk.optimizeMe = true;
1411    }
1412  0 if (sp.c == 'g')
1413    {
1414  0 mk.gFlag = true;
1415    }
1416  0 if (sp.c == 's')
1417    {
1418  0 mk.sFlag = true;
1419    }
1420  0 if (sp.c == 'm')
1421    {
1422  0 mk.mFlag = true;
1423    }
1424  0 sp.inc();
1425  0 } while (!sp.match(')') && !sp.eos);
1426  0 r = null;
1427  0 mk.parenLevel--;
1428  0 if (sp.eos) // throw new RegSyntax
1429    {
1430  0 RegSyntaxError.endItAll("Unclosed ()");
1431    }
1432    }
1433    else
1434    { // just ordinary parenthesis
1435  1861 r.or = mk.noBackRefs ? new Or() : new OrMark(mk.val++);
1436    }
1437  1900 if (r != null)
1438    {
1439  1900 add(r._compile(sp, mk));
1440    }
1441    }
1442  4895 else if (sp.match('^'))
1443    {
1444  54 add(new Start(true));
1445    }
1446  4841 else if (sp.dontMatch && sp.c == 'A')
1447    {
1448  0 add(new Start(false));
1449    }
1450  4841 else if (sp.match('*'))
1451    {
1452  1145 addMulti(new patInt(0), new patInf());
1453    }
1454  3696 else if (sp.match('+'))
1455    {
1456  1472 addMulti(new patInt(1), new patInf());
1457    }
1458  2224 else if (sp.match('?'))
1459    {
1460  84 addMulti(new patInt(0), new patInt(1));
1461    }
1462  2140 else if (sp.match('{'))
1463    {
1464  123 boolean bad = false;
1465  123 StrPos sp2 = new StrPos(sp);
1466    // StringBuffer sb = new StringBuffer();
1467  123 sp.inc();
1468  123 patInt i1 = sp.getPatInt();
1469  123 patInt i2 = null;
1470  123 if (sp.match('}'))
1471    {
1472  120 i2 = i1;
1473    }
1474    else
1475    {
1476  3 if (!sp.match(','))
1477    {
1478    /*
1479    * RegSyntaxError.endItAll( "String \"{"+i2+ "\" should be followed
1480    * with , or }");
1481    */
1482  0 bad = true;
1483    }
1484  3 sp.inc();
1485  3 if (sp.match('}'))
1486    {
1487  3 i2 = new patInf();
1488    }
1489    else
1490    {
1491  0 i2 = sp.getPatInt();
1492    }
1493    }
1494  123 if (i1 == null || i2 == null)
1495    {
1496    /*
1497    * throw new RegSyntax("Badly formatted Multi: " +"{"+i1+","+i2+"}");
1498    */
1499  0 bad = true;
1500    }
1501  123 if (bad)
1502    {
1503  0 sp.dup(sp2);
1504  0 add(new oneChar(sp.c));
1505    }
1506    else
1507    {
1508  123 addMulti(i1, i2);
1509    }
1510    }
1511  2017 else if (sp.escMatch('x') && next2Hex(sp))
1512    {
1513  0 sp.inc();
1514  0 int d = getHexDigit(sp);
1515  0 sp.inc();
1516  0 d = 16 * d + getHexDigit(sp);
1517  0 add(new oneChar((char) d));
1518    }
1519  2017 else if (sp.escMatch('c'))
1520    {
1521  0 sp.inc();
1522  0 if (sp.c < Ctrl.cmap.length)
1523    {
1524  0 add(new oneChar(Ctrl.cmap[sp.c]));
1525    }
1526    else
1527    {
1528  0 add(new oneChar(sp.c));
1529    }
1530    }
1531  2017 else if (sp.escMatch('f'))
1532    {
1533  0 add(new oneChar((char) 12));
1534    }
1535  2017 else if (sp.escMatch('a'))
1536    {
1537  0 add(new oneChar((char) 7));
1538    }
1539  2017 else if (sp.escMatch('t'))
1540    {
1541  0 add(new oneChar('\t'));
1542    }
1543  2017 else if (sp.escMatch('n'))
1544    {
1545  0 add(new oneChar('\n'));
1546    }
1547  2017 else if (sp.escMatch('r'))
1548    {
1549  0 add(new oneChar('\r'));
1550    }
1551  2017 else if (sp.escMatch('b'))
1552    {
1553  0 add(new oneChar('\b'));
1554    }
1555  2017 else if (sp.escMatch('e'))
1556    {
1557  0 add(new oneChar((char) 27));
1558    }
1559    else
1560    {
1561  2017 add(new oneChar(sp.c));
1562  2017 if (sp.match(')'))
1563    {
1564  0 RegSyntaxError.endItAll("Unmatched right paren in pattern");
1565    }
1566    }
1567    }
1568   
1569    // compiles all Pattern elements, internal method
 
1570  1508 toggle private Pattern _compile(String pat, Rthings mk) throws RegSyntax
1571    {
1572  1508 minMatch = null;
1573  1508 sFlag = mFlag = ignoreCase = gFlag = false;
1574  1508 StrPos sp = new StrPos(pat, 0);
1575  1508 thePattern = _compile(sp, mk);
1576  1507 pt.marks = null;
1577  1507 return thePattern;
1578    }
1579   
1580    Pattern p = null;
1581   
1582    Or or = null;
1583   
 
1584  3408 toggle Pattern _compile(StrPos sp, Rthings mk) throws RegSyntax
1585    {
1586  13476 while (!(sp.eos || (or != null && sp.match(')'))))
1587    {
1588  10069 compile1(sp, mk);
1589  10068 sp.inc();
1590    }
1591  3407 if (sp.match(')'))
1592    {
1593  1900 mk.parenLevel--;
1594    }
1595  1507 else if (sp.eos && mk.parenLevel != 0)
1596    {
1597  0 RegSyntaxError.endItAll("Unclosed Parenthesis! lvl=" + mk.parenLevel);
1598    }
1599  3407 if (or != null)
1600    {
1601  1903 if (p == null)
1602    {
1603  3 p = new NullPattern();
1604    }
1605  1903 or.addOr(p);
1606  1903 return or;
1607    }
1608  1504 return p == null ? new NullPattern() : p;
1609    }
1610   
1611    // add a multi object to the end of the chain
1612    // which applies to the last object
 
1613  2824 toggle void addMulti(patInt i1, patInt i2) throws RegSyntax
1614    {
1615  2824 Pattern last, last2;
1616  4397 for (last = p; last != null && last.next != null; last = last.next)
1617    {
1618  1573 ;
1619    }
1620  2824 if (last == null || last == p)
1621    {
1622  1946 last2 = null;
1623    }
1624    else
1625    {
1626  1573 for (last2 = p; last2.next != last; last2 = last2.next)
1627    {
1628  695 ;
1629    }
1630    }
1631  2824 if (last instanceof Multi && i1.intValue() == 0 && i2.intValue() == 1)
1632    {
1633  0 ((Multi) last).matchFewest = true;
1634    }
1635  2824 else if (last instanceof FastMulti && i1.intValue() == 0
1636    && i2.intValue() == 1)
1637    {
1638  0 ((FastMulti) last).matchFewest = true;
1639    }
1640  2824 else if (last instanceof DotMulti && i1.intValue() == 0
1641    && i2.intValue() == 1)
1642    {
1643  0 ((DotMulti) last).matchFewest = true;
1644    }
1645  2824 else if (last instanceof Multi || last instanceof DotMulti
1646    || last instanceof FastMulti)
1647    {
1648  1 throw new RegSyntax("Syntax error.");
1649    }
1650  2823 else if (last2 == null)
1651    {
1652  1945 p = mkMulti(i1, i2, p);
1653    }
1654    else
1655    {
1656  878 last2.next = mkMulti(i1, i2, last);
1657    }
1658    }
1659   
 
1660  2823 toggle final static Pattern mkMulti(patInt lo, patInt hi, Pattern p)
1661    throws RegSyntax
1662    {
1663  2823 if (p instanceof Any && p.next == null)
1664    {
1665  352 return new DotMulti(lo, hi);
1666    }
1667  2471 return RegOpt.safe4fm(p) ? (Pattern) new FastMulti(lo, hi, p)
1668    : (Pattern) new Multi(lo, hi, p);
1669    }
1670   
1671    // process the bracket operator
 
1672  1117 toggle Pattern matchBracket(StrPos sp) throws RegSyntax
1673    {
1674  1117 Bracket ret;
1675  1117 if (sp.match('^'))
1676    {
1677  273 ret = new Bracket(true);
1678  273 sp.inc();
1679    }
1680    else
1681    {
1682  844 ret = new Bracket(false);
1683    }
1684  1117 if (sp.match(']'))
1685    {
1686    // throw new RegSyntax
1687  0 RegSyntaxError.endItAll("Unmatched []");
1688    }
1689   
1690  5957 while (!sp.eos && !sp.match(']'))
1691    {
1692  4840 StrPos s1 = new StrPos(sp);
1693  4840 s1.inc();
1694  4840 StrPos s1_ = new StrPos(s1);
1695  4840 s1_.inc();
1696  4840 if (s1.match('-') && !s1_.match(']'))
1697    {
1698  782 StrPos s2 = new StrPos(s1);
1699  782 s2.inc();
1700  782 if (!s2.eos)
1701    {
1702  782 ret.addOr(new Range(sp.c, s2.c));
1703    }
1704  782 sp.inc();
1705  782 sp.inc();
1706    }
1707  4058 else if (sp.escMatch('Q'))
1708    {
1709  0 sp.inc();
1710  0 while (!sp.escMatch('E'))
1711    {
1712  0 ret.addOr(new oneChar(sp.c));
1713  0 sp.inc();
1714    }
1715    }
1716  4058 else if (sp.escMatch('d'))
1717    {
1718  42 ret.addOr(new Range('0', '9'));
1719    }
1720  4016 else if (sp.escMatch('s'))
1721    {
1722  0 ret.addOr(new oneChar((char) 32));
1723  0 ret.addOr(new Range((char) 8, (char) 10));
1724  0 ret.addOr(new oneChar((char) 13));
1725    }
1726  4016 else if (sp.escMatch('w'))
1727    {
1728  0 ret.addOr(new Range('a', 'z'));
1729  0 ret.addOr(new Range('A', 'Z'));
1730  0 ret.addOr(new Range('0', '9'));
1731  0 ret.addOr(new oneChar('_'));
1732    }
1733  4016 else if (sp.escMatch('D'))
1734    {
1735  0 ret.addOr(new Range((char) 0, (char) 47));
1736  0 ret.addOr(new Range((char) 58, (char) 65535));
1737    }
1738  4016 else if (sp.escMatch('S'))
1739    {
1740  0 ret.addOr(new Range((char) 0, (char) 7));
1741  0 ret.addOr(new Range((char) 11, (char) 12));
1742  0 ret.addOr(new Range((char) 14, (char) 31));
1743  0 ret.addOr(new Range((char) 33, (char) 65535));
1744    }
1745  4016 else if (sp.escMatch('W'))
1746    {
1747  0 ret.addOr(new Range((char) 0, (char) 64));
1748  0 ret.addOr(new Range((char) 91, (char) 94));
1749  0 ret.addOr(new oneChar((char) 96));
1750  0 ret.addOr(new Range((char) 123, (char) 65535));
1751    }
1752  4016 else if (sp.escMatch('x') && next2Hex(sp))
1753    {
1754  0 sp.inc();
1755  0 int d = getHexDigit(sp);
1756  0 sp.inc();
1757  0 d = 16 * d + getHexDigit(sp);
1758  0 ret.addOr(new oneChar((char) d));
1759    }
1760  4016 else if (sp.escMatch('a'))
1761    {
1762  0 ret.addOr(new oneChar((char) 7));
1763    }
1764  4016 else if (sp.escMatch('f'))
1765    {
1766  0 ret.addOr(new oneChar((char) 12));
1767    }
1768  4016 else if (sp.escMatch('e'))
1769    {
1770  0 ret.addOr(new oneChar((char) 27));
1771    }
1772  4016 else if (sp.escMatch('n'))
1773    {
1774  0 ret.addOr(new oneChar('\n'));
1775    }
1776  4016 else if (sp.escMatch('t'))
1777    {
1778  0 ret.addOr(new oneChar('\t'));
1779    }
1780  4016 else if (sp.escMatch('r'))
1781    {
1782  0 ret.addOr(new oneChar('\r'));
1783    }
1784  4016 else if (sp.escMatch('c'))
1785    {
1786  0 sp.inc();
1787  0 if (sp.c < Ctrl.cmap.length)
1788    {
1789  0 ret.addOr(new oneChar(Ctrl.cmap[sp.c]));
1790    }
1791    else
1792    {
1793  0 ret.addOr(new oneChar(sp.c));
1794    }
1795    }
1796  4016 else if (isOctalString(sp))
1797    {
1798  0 int d = sp.c - '0';
1799  0 sp.inc();
1800  0 d = 8 * d + sp.c - '0';
1801  0 StrPos sp2 = new StrPos(sp);
1802  0 sp2.inc();
1803  0 if (isOctalDigit(sp2, false))
1804    {
1805  0 sp.inc();
1806  0 d = 8 * d + sp.c - '0';
1807    }
1808  0 ret.addOr(new oneChar((char) d));
1809    }
1810    else
1811    {
1812  4016 ret.addOr(new oneChar(sp.c));
1813    }
1814  4840 sp.inc();
1815    }
1816  1117 return ret;
1817    }
1818   
1819    /**
1820    * Converts the stored Pattern to a String -- this is a decompile. Note that
1821    * \t and \n will really print out here, Not just the two character
1822    * representations. Also be prepared to see some strange output if your
1823    * characters are not printable.
1824    */
 
1825  0 toggle @Override
1826    public String toString()
1827    {
1828  0 if (false && thePattern == null)
1829    {
1830  0 return "";
1831    }
1832    else
1833    {
1834  0 StringBuffer sb = new StringBuffer();
1835  0 if (esc != Pattern.ESC)
1836    {
1837  0 sb.append("(?e=");
1838  0 sb.append(esc);
1839  0 sb.append(")");
1840    }
1841  0 if (gFlag || mFlag || !dotDoesntMatchCR || sFlag || ignoreCase
1842    || dontMatchInQuotes || optimized())
1843    {
1844  0 sb.append("(?");
1845  0 if (ignoreCase)
1846    {
1847  0 sb.append("i");
1848    }
1849  0 if (mFlag)
1850    {
1851  0 sb.append("m");
1852    }
1853  0 if (sFlag || !dotDoesntMatchCR)
1854    {
1855  0 sb.append("s");
1856    }
1857  0 if (dontMatchInQuotes)
1858    {
1859  0 sb.append("Q");
1860    }
1861  0 if (optimized())
1862    {
1863  0 sb.append("o");
1864    }
1865  0 if (gFlag)
1866    {
1867  0 sb.append("g");
1868    }
1869  0 sb.append(")");
1870    }
1871  0 String patstr = thePattern.toString();
1872  0 if (esc != Pattern.ESC)
1873    {
1874  0 patstr = reEscape(patstr, Pattern.ESC, esc);
1875    }
1876  0 sb.append(patstr);
1877  0 return sb.toString();
1878    }
1879    }
1880   
1881    // Re-escape Pattern, allows us to use a different escape
1882    // character.
 
1883  0 toggle static String reEscape(String s, char oldEsc, char newEsc)
1884    {
1885  0 if (oldEsc == newEsc)
1886    {
1887  0 return s;
1888    }
1889  0 int i;
1890  0 StringBuffer sb = new StringBuffer();
1891  0 for (i = 0; i < s.length(); i++)
1892    {
1893  0 if (s.charAt(i) == oldEsc && i + 1 < s.length())
1894    {
1895  0 if (s.charAt(i + 1) == oldEsc)
1896    {
1897  0 sb.append(oldEsc);
1898    }
1899    else
1900    {
1901  0 sb.append(newEsc);
1902  0 sb.append(s.charAt(i + 1));
1903    }
1904  0 i++;
1905    }
1906  0 else if (s.charAt(i) == newEsc)
1907    {
1908  0 sb.append(newEsc);
1909  0 sb.append(newEsc);
1910    }
1911    else
1912    {
1913  0 sb.append(s.charAt(i));
1914    }
1915    }
1916  0 return sb.toString();
1917    }
1918   
1919    /**
1920    * This method implements FilenameFilter, allowing one to use a Regex to
1921    * search through a directory using File.list. There is a FileRegex now that
1922    * does this better.
1923    *
1924    * @see com.stevesoft.pat.FileRegex
1925    */
 
1926  0 toggle @Override
1927    public boolean accept(File dir, String s)
1928    {
1929  0 return search(s);
1930    }
1931   
1932    /** The version of this package */
 
1933  0 toggle final static public String version()
1934    {
1935  0 return "lgpl release 1.5.3";
1936    }
1937   
1938    /**
1939    * Once this method is called, the state of variables ignoreCase and
1940    * dontMatchInQuotes should not be changed as the results will be
1941    * unpredictable. However, search and matchAt will run more quickly. Note that
1942    * you can check to see if the pattern has been optimized by calling the
1943    * optimized() method.
1944    * <p>
1945    * This method will attempt to rewrite your pattern in a way that makes it
1946    * faster (not all patterns execute at the same speed). In general,
1947    * "(?: ... )" will be faster than "( ... )" so if you don't need the
1948    * backreference, you should group using the former pattern.
1949    * <p>
1950    * It will also introduce new pattern elements that you can't get to
1951    * otherwise, for example if you have a large table of strings, i.e. the
1952    * months of the year "(January|February|...)" optimize() will make a
1953    * Hashtable that takes it to the next appropriate pattern element --
1954    * eliminating the need for a linear search.
1955    *
1956    * @see com.stevesoft.pat.Regex#optimized
1957    * @see com.stevesoft.pat.Regex#ignoreCase
1958    * @see com.stevesoft.pat.Regex#dontMatchInQuotes
1959    * @see com.stevesoft.pat.Regex#matchAt
1960    * @see com.stevesoft.pat.Regex#search
1961    */
 
1962  296 toggle public void optimize()
1963    {
1964  296 if (optimized() || thePattern == null)
1965    {
1966  0 return;
1967    }
1968  296 minMatch = new patInt(0); // thePattern.countMinChars();
1969  296 thePattern = RegOpt.opt(thePattern, ignoreCase, dontMatchInQuotes);
1970  296 skipper = Skip.findSkip(this);
1971    // RegOpt.setParents(this);
1972  296 return;
1973    }
1974   
1975    Skip skipper;
1976   
1977    /**
1978    * This function returns true if the optimize method has been called.
1979    */
 
1980  296 toggle public boolean optimized()
1981    {
1982  296 return minMatch != null;
1983    }
1984   
1985    /**
1986    * A bit of syntactic surgar for those who want to make their code look more
1987    * perl-like. To use this initialize your Regex object by saying:
1988    *
1989    * <pre>
1990    * Regex r1 = Regex.perlCode(&quot;s/hello/goodbye/&quot;);
1991    * Regex r2 = Regex.perlCode(&quot;s'fish'frog'i&quot;);
1992    * Regex r3 = Regex.perlCode(&quot;m'hello');
1993    * </pre>
1994    *
1995    * The i for ignoreCase is supported in this syntax, as well as m, s, and x.
1996    * The g flat is a bit of a special case.
1997    * <p>
1998    * If you wish to replace all occurences of a pattern, you do not put a 'g' in
1999    * the perlCode, but call Regex's replaceAll method.
2000    * <p>
2001    * If you wish to simply and only do a search for r2's pattern, you can do
2002    * this by calling the searchFrom method method repeatedly, or by calling
2003    * search repeatedly if the g flag is set.
2004    * <p>
2005    * Note: Currently perlCode does <em>not</em> support the (?e=#) syntax for
2006    * changing the escape character.
2007    */
2008   
 
2009  80 toggle public static Regex perlCode(String s)
2010    {
2011    // this file is big enough, see parsePerl.java
2012    // for this function.
2013  80 return parsePerl.parse(s);
2014    }
2015   
2016    static final char back_slash = '\\';
2017   
2018    /**
2019    * Checks to see if there are only literal and no special pattern elements in
2020    * this Regex.
2021    */
 
2022  0 toggle public boolean isLiteral()
2023    {
2024  0 Pattern x = thePattern;
2025  0 while (x != null)
2026    {
2027  0 if (x instanceof oneChar)
2028    {
2029  0 ;
2030    }
2031  0 else if (x instanceof Skipped)
2032    {
2033  0 ;
2034    }
2035    else
2036    {
2037  0 return false;
2038    }
2039  0 x = x.next;
2040    }
2041  0 return true;
2042    }
2043   
2044    /**
2045    * You only need to know about this if you are inventing your own pattern
2046    * elements.
2047    */
 
2048  0 toggle public patInt countMinChars()
2049    {
2050  0 return thePattern.countMinChars();
2051    }
2052   
2053    /**
2054    * You only need to know about this if you are inventing your own pattern
2055    * elements.
2056    */
 
2057  0 toggle public patInt countMaxChars()
2058    {
2059  0 return thePattern.countMaxChars();
2060    }
2061   
 
2062  0 toggle boolean isHexDigit(StrPos sp)
2063    {
2064  0 boolean r = !sp.eos
2065    && !sp.dontMatch
2066    && ((sp.c >= '0' && sp.c <= '9')
2067    || (sp.c >= 'a' && sp.c <= 'f') || (sp.c >= 'A' && sp.c <= 'F'));
2068  0 return r;
2069    }
2070   
 
2071  11423 toggle boolean isOctalDigit(StrPos sp, boolean first)
2072    {
2073  11423 boolean r = !sp.eos && !(first ^ sp.dontMatch) && sp.c >= '0'
2074    && sp.c <= '7';
2075  11423 return r;
2076    }
2077   
 
2078  0 toggle int getHexDigit(StrPos sp)
2079    {
2080  0 if (sp.c >= '0' && sp.c <= '9')
2081    {
2082  0 return sp.c - '0';
2083    }
2084  0 if (sp.c >= 'a' && sp.c <= 'f')
2085    {
2086  0 return sp.c - 'a' + 10;
2087    }
2088  0 return sp.c - 'A' + 10;
2089    }
2090   
 
2091  0 toggle boolean next2Hex(StrPos sp)
2092    {
2093  0 StrPos sp2 = new StrPos(sp);
2094  0 sp2.inc();
2095  0 if (!isHexDigit(sp2))
2096    {
2097  0 return false;
2098    }
2099  0 sp2.inc();
2100  0 if (!isHexDigit(sp2))
2101    {
2102  0 return false;
2103    }
2104  0 return true;
2105    }
2106   
 
2107  11423 toggle boolean isOctalString(StrPos sp)
2108    {
2109  11423 if (!isOctalDigit(sp, true))
2110    {
2111  11423 return false;
2112    }
2113  0 StrPos sp2 = new StrPos(sp);
2114  0 sp2.inc();
2115  0 if (!isOctalDigit(sp2, false))
2116    {
2117  0 return false;
2118    }
2119  0 return true;
2120    }
2121    }