Clover icon

Coverage Report

  1. Project Clover database Mon Sep 2 2024 17:57:51 BST
  2. Package com.stevesoft.pat

File Regex.java

 

Coverage histogram

../../../img/srcFileCovDistChart5.png
43% of files have more coverage

Code metrics

404
682
92
17
2,116
1,523
375
0.55
7.41
5.41
4.08

Classes

Class Line # Actions
UnicodePunct 20 1 3
0.00%
UnicodeWhite 30 1 3
0.00%
NUnicodePunct 42 1 3
0.00%
NUnicodeWhite 54 1 3
0.00%
UnicodeW 64 4 5
0.00%
NUnicodeW 80 4 5
0.00%
UnicodeDigit 97 1 3
0.00%
NUnicodeDigit 108 1 3
0.00%
UnicodeMath 119 1 3
0.00%
NUnicodeMath 129 1 3
0.00%
UnicodeCurrency 139 1 3
0.00%
NUnicodeCurrency 149 1 3
0.00%
UnicodeAlpha 159 1 3
0.00%
NUnicodeAlpha 169 1 3
0.00%
UnicodeUpper 180 2 4
0.00%
UnicodeLower 195 2 4
0.00%
Regex 309 658 321
0.4509090845.1%
 

Contributing tests

This file is covered by 101 tests. .

Source view

1    //
2    // This software is now distributed according to
3    // the Lesser Gnu Public License. Please see
4    // http://www.gnu.org/copyleft/lesser.txt for
5    // the details.
6    // -- Happy Computing!
7    //
8    package com.stevesoft.pat;
9   
10    import jalview.util.MessageManager;
11   
12    import java.io.File;
13    import java.io.FilenameFilter;
14    import java.util.BitSet;
15    import java.util.Hashtable;
16   
17    import com.stevesoft.pat.wrap.StringWrap;
18   
19    /** Matches a Unicode punctuation character. */
 
20    class UnicodePunct extends UniValidator
21    {
 
22  0 toggle @Override
23    public int validate(StringLike s, int from, int to)
24    {
25  0 return from < s.length() && Prop.isPunct(s.charAt(from)) ? to : -1;
26    }
27    }
28   
29    /** Matches a Unicode white space character. */
 
30    class UnicodeWhite extends UniValidator
31    {
 
32  0 toggle @Override
33    public int validate(StringLike s, int from, int to)
34    {
35  0 return from < s.length() && Prop.isWhite(s.charAt(from)) ? to : -1;
36    }
37    }
38   
39    /**
40    * Matches a character that is not a Unicode punctuation character.
41    */
 
42    class NUnicodePunct extends UniValidator
43    {
 
44  0 toggle @Override
45    public int validate(StringLike s, int from, int to)
46    {
47  0 return from < s.length() && !Prop.isPunct(s.charAt(from)) ? to : -1;
48    }
49    }
50   
51    /**
52    * Matches a character that is not a Unicode white space character.
53    */
 
54    class NUnicodeWhite extends UniValidator
55    {
 
56  0 toggle @Override
57    public int validate(StringLike s, int from, int to)
58    {
59  0 return from < s.length() && !Prop.isWhite(s.charAt(from)) ? to : -1;
60    }
61    }
62   
63    /** Matches a Unicode word character: an alphanumeric or underscore. */
 
64    class UnicodeW extends UniValidator
65    {
 
66  0 toggle @Override
67    public int validate(StringLike s, int from, int to)
68    {
69  0 if (from >= s.length())
70    {
71  0 return -1;
72    }
73  0 char c = s.charAt(from);
74  0 return (Prop.isAlphabetic(c) || Prop.isDecimalDigit(c) || c == '_') ? to
75    : -1;
76    }
77    }
78   
79    /** Matches a character that is not a Unicode alphanumeric or underscore. */
 
80    class NUnicodeW extends UniValidator
81    {
 
82  0 toggle @Override
83    public int validate(StringLike s, int from, int to)
84    {
85  0 if (from >= s.length())
86    {
87  0 return -1;
88    }
89  0 char c = s.charAt(from);
90  0 return !(Prop.isAlphabetic(c) || Prop.isDecimalDigit(c) || c == '_')
91    ? to
92    : -1;
93    }
94    }
95   
96    /** Matches a Unicode decimal digit. */
 
97    class UnicodeDigit extends UniValidator
98    {
 
99  0 toggle @Override
100    public int validate(StringLike s, int from, int to)
101    {
102  0 return from < s.length() && Prop.isDecimalDigit(s.charAt(from)) ? to
103    : -1;
104    }
105    }
106   
107    /** Matches a character that is not a Unicode digit. */
 
108    class NUnicodeDigit extends UniValidator
109    {
 
110  0 toggle @Override
111    public int validate(StringLike s, int from, int to)
112    {
113  0 return from < s.length() && !Prop.isDecimalDigit(s.charAt(from)) ? to
114    : -1;
115    }
116    }
117   
118    /** Matches a Unicode math character. */
 
119    class UnicodeMath extends UniValidator
120    {
 
121  0 toggle @Override
122    public int validate(StringLike s, int from, int to)
123    {
124  0 return from < s.length() && Prop.isMath(s.charAt(from)) ? to : -1;
125    }
126    }
127   
128    /** Matches a non-math Unicode character. */
 
129    class NUnicodeMath extends UniValidator
130    {
 
131  0 toggle @Override
132    public int validate(StringLike s, int from, int to)
133    {
134  0 return from < s.length() && !Prop.isMath(s.charAt(from)) ? to : -1;
135    }
136    }
137   
138    /** Matches a Unicode currency symbol. */
 
139    class UnicodeCurrency extends UniValidator
140    {
 
141  0 toggle @Override
142    public int validate(StringLike s, int from, int to)
143    {
144  0 return from < s.length() && Prop.isCurrency(s.charAt(from)) ? to : -1;
145    }
146    }
147   
148    /** Matches a non-currency symbol Unicode character. */
 
149    class NUnicodeCurrency extends UniValidator
150    {
 
151  0 toggle @Override
152    public int validate(StringLike s, int from, int to)
153    {
154  0 return from < s.length() && !Prop.isCurrency(s.charAt(from)) ? to : -1;
155    }
156    }
157   
158    /** Matches a Unicode alphabetic character. */
 
159    class UnicodeAlpha extends UniValidator
160    {
 
161  0 toggle @Override
162    public int validate(StringLike s, int from, int to)
163    {
164  0 return from < s.length() && Prop.isAlphabetic(s.charAt(from)) ? to : -1;
165    }
166    }
167   
168    /** Matches a non-alphabetic Unicode character. */
 
169    class NUnicodeAlpha extends UniValidator
170    {
 
171  0 toggle @Override
172    public int validate(StringLike s, int from, int to)
173    {
174  0 return from < s.length() && !Prop.isAlphabetic(s.charAt(from)) ? to
175    : -1;
176    }
177    }
178   
179    /** Matches an upper case Unicode character. */
 
180    class UnicodeUpper extends UniValidator
181    {
 
182  0 toggle @Override
183    public int validate(StringLike s, int from, int to)
184    {
185  0 return from < s.length() && isUpper(s.charAt(from)) ? to : -1;
186    }
187   
 
188  0 toggle final boolean isUpper(char c)
189    {
190  0 return c == CaseMgr.toUpperCase(c) && c != CaseMgr.toLowerCase(c);
191    }
192    }
193   
194    /** Matches an upper case Unicode character. */
 
195    class UnicodeLower extends UniValidator
196    {
 
197  0 toggle @Override
198    public int validate(StringLike s, int from, int to)
199    {
200  0 return from < s.length() && isLower(s.charAt(from)) ? to : -1;
201    }
202   
 
203  0 toggle final boolean isLower(char c)
204    {
205  0 return c != CaseMgr.toUpperCase(c) && c == CaseMgr.toLowerCase(c);
206    }
207    }
208   
209    /**
210    * Regex provides the parser which constructs the linked list of Pattern classes
211    * from a String.
212    * <p>
213    * For the purpose of this documentation, the fact that java interprets the
214    * backslash will be ignored. In practice, however, you will need a double
215    * backslash to obtain a string that contains a single backslash character.
216    * Thus, the example pattern "\b" should really be typed as "\\b" inside java
217    * code.
218    * <p>
219    * Note that Regex is part of package "com.stevesoft.pat". To use it, simply
220    * import com.stevesoft.pat.Regex at the top of your file.
221    * <p>
222    * Regex is made with a constructor that takes a String that defines the regular
223    * expression. Thus, for example
224    *
225    * <pre>
226    * Regex r = new Regex(&quot;[a-c]*&quot;);
227    * </pre>
228    *
229    * matches any number of characters so long as the are 'a', 'b', or 'c').
230    * <p>
231    * To attempt to match the Pattern to a given string, you can use either the
232    * search(String) member function, or the matchAt(String,int position) member
233    * function. These functions return a boolean which tells you whether or not the
234    * thing worked, and sets the methods "charsMatched()" and "matchedFrom()" in
235    * the Regex object appropriately.
236    * <p>
237    * The portion of the string before the match can be obtained by the left()
238    * member, and the portion after the match can be obtained by the right()
239    * member.
240    * <p>
241    * Essentially, this package implements a syntax that is very much like the perl
242    * 5 regular expression syntax.
243    *
244    * Longer example:
245    *
246    * <pre>
247    * Regex r = new Regex(&quot;x(a|b)y&quot;);
248    * r.matchAt(&quot;xay&quot;, 0);
249    * System.out.println(&quot;sub = &quot; + r.stringMatched(1));
250    * </pre>
251    *
252    * The above would print "sub = a".
253    *
254    * <pre>
255    * r.left() // would return &quot;x&quot;
256    * r.right() // would return &quot;y&quot;
257    * </pre>
258    *
259    * <p>
260    * Differences between this package and perl5:<br>
261    * The extended Pattern for setting flags, is now supported, but the flags are
262    * different. "(?i)" tells the pattern to ignore case, "(?Q)" sets the
263    * "dontMatchInQuotes" flag, and "(?iQ)" sets them both. You can change the
264    * escape character. The pattern
265    *
266    * <pre>
267    * (?e=#)#d+
268    * </pre>
269    *
270    * is the same as
271    *
272    * <pre>
273    * \d+
274    * </pre>
275    *
276    * , but note that the sequence
277    *
278    * <pre>
279    * (?e=#)
280    * </pre>
281    *
282    * <b>must</b> occur at the very beginning of the pattern. There may be other
283    * small differences as well. I will either make my package conform or note them
284    * as I become aware of them.
285    * <p>
286    * This package supports additional patterns not in perl5: <center> <table *
287    * border=1>
288    * <tr>
289    * <td>(?@())</td>
290    * <td>Group</td>
291    * <td>This matches all characters between the '(' character and the balancing
292    * ')' character. Thus, it will match "()" as well as "(())". The balancing
293    * characters are arbitrary, thus (?@{}) matches on "{}" and "{{}}".</td>
294    * <tr>
295    * <td>(?&lt1)</td>
296    * <td>Backup</td>
297    * <td>Moves the pointer backwards within the text. This allows you to make a
298    * "look behind." It fails if it attempts to move to a position before the
299    * beginning of the string. "x(?&lt1)" is equivalent to "(?=x)". The number, 1
300    * in this example, is the number of characters to move backwards.</td>
301    * </table>
302    * </center>
303    * </dl>
304    *
305    * @author Steven R. Brandt
306    * @version package com.stevesoft.pat, release 1.5.3
307    * @see Pattern
308    */
 
309    public class Regex extends RegRes implements FilenameFilter
310    {
311    /**
312    * BackRefOffset gives the identity number of the first pattern. Version 1.0
313    * used zero, version 1.1 uses 1 to be more compatible with perl.
314    */
315    static int BackRefOffset = 1;
316   
317    private static Pattern none = new NoPattern();
318   
319    Pattern thePattern = none;
320   
321    patInt minMatch = new patInt(0);
322   
323    static Hashtable validators = new Hashtable();
 
324  10 toggle static
325    {
326  10 define("p", "(?>1)", new UnicodePunct());
327  10 define("P", "(?>1)", new NUnicodePunct());
328  10 define("s", "(?>1)", new UnicodeWhite());
329  10 define("S", "(?>1)", new NUnicodeWhite());
330  10 define("w", "(?>1)", new UnicodeW());
331  10 define("W", "(?>1)", new NUnicodeW());
332  10 define("d", "(?>1)", new UnicodeDigit());
333  10 define("D", "(?>1)", new NUnicodeDigit());
334  10 define("m", "(?>1)", new UnicodeMath());
335  10 define("M", "(?>1)", new NUnicodeMath());
336  10 define("c", "(?>1)", new UnicodeCurrency());
337  10 define("C", "(?>1)", new NUnicodeCurrency());
338  10 define("a", "(?>1)", new UnicodeAlpha());
339  10 define("A", "(?>1)", new NUnicodeAlpha());
340  10 define("uc", "(?>1)", new UnicodeUpper());
341  10 define("lc", "(?>1)", new UnicodeLower());
342    }
343   
344    /** Set the dontMatch in quotes flag. */
 
345  0 toggle public void setDontMatchInQuotes(boolean b)
346    {
347  0 dontMatchInQuotes = b;
348    }
349   
350    /** Find out if the dontMatchInQuotes flag is enabled. */
 
351  0 toggle public boolean getDontMatchInQuotes()
352    {
353  0 return dontMatchInQuotes;
354    }
355   
356    boolean dontMatchInQuotes = false;
357   
358    /**
359    * Set the state of the ignoreCase flag. If set to true, then the pattern
360    * matcher will ignore case when searching for a match.
361    */
 
362  230 toggle public void setIgnoreCase(boolean b)
363    {
364  230 ignoreCase = b;
365    }
366   
367    /**
368    * Get the state of the ignoreCase flag. Returns true if we are ignoring the
369    * case of the pattern, false otherwise.
370    */
 
371  0 toggle public boolean getIgnoreCase()
372    {
373  0 return ignoreCase;
374    }
375   
376    boolean ignoreCase = false;
377   
378    static boolean defaultMFlag = false;
379   
380    /**
381    * Set the default value of the m flag. If it is set to true, then the MFlag
382    * will be on for any regex search executed.
383    */
 
384  0 toggle public static void setDefaultMFlag(boolean mFlag)
385    {
386  0 defaultMFlag = mFlag;
387    }
388   
389    /**
390    * Get the default value of the m flag. If it is set to true, then the MFlag
391    * will be on for any regex search executed.
392    */
 
393  0 toggle public static boolean getDefaultMFlag()
394    {
395  0 return defaultMFlag;
396    }
397   
398    /**
399    * Initializes the object without a Pattern. To supply a Pattern use
400    * compile(String s).
401    *
402    * @see com.stevesoft.pat.Regex#compile(java.lang.String)
403    */
 
404  2805 toggle public Regex()
405    {
406    }
407   
408    /**
409    * Create and compile a Regex, but do not throw any exceptions. If you wish to
410    * have exceptions thrown for syntax errors, you must use the Regex(void)
411    * constructor to create the Regex object, and then call the compile method.
412    * Therefore, you should only call this method when you know your pattern is
413    * right. I will probably become more like
414    *
415    * @see com.stevesoft.pat.Regex#search(java.lang.String)
416    * @see com.stevesoft.pat.Regex#compile(java.lang.String)
417    */
 
418  2116 toggle public Regex(String s)
419    {
420  2116 try
421    {
422  2116 compile(s);
423    } catch (RegSyntax rs)
424    {
425    }
426    }
427   
428    ReplaceRule rep = null;
429   
430    /**
431    * Create and compile both a Regex and a ReplaceRule.
432    *
433    * @see com.stevesoft.pat.ReplaceRule
434    * @see com.stevesoft.pat.Regex#compile(java.lang.String)
435    */
 
436  90 toggle public Regex(String s, String rp)
437    {
438  90 this(s);
439  90 rep = ReplaceRule.perlCode(rp);
440    }
441   
442    /**
443    * Create and compile a Regex, but give it the ReplaceRule specified. This
444    * allows the user finer control of the Replacement process, if that is
445    * desired.
446    *
447    * @see com.stevesoft.pat.ReplaceRule
448    * @see com.stevesoft.pat.Regex#compile(java.lang.String)
449    */
 
450  0 toggle public Regex(String s, ReplaceRule rp)
451    {
452  0 this(s);
453  0 rep = rp;
454    }
455   
456    /**
457    * Change the ReplaceRule of this Regex by compiling a new one using String
458    * rp.
459    */
 
460  0 toggle public void setReplaceRule(String rp)
461    {
462  0 rep = ReplaceRule.perlCode(rp);
463  0 repr = null; // Clear Replacer history
464    }
465   
466    /** Change the ReplaceRule of this Regex to rp. */
 
467  109 toggle public void setReplaceRule(ReplaceRule rp)
468    {
469  109 rep = rp;
470    }
471   
472    /**
473    * Test to see if a custom defined rule exists.
474    *
475    * @see com.stevesoft.pat#define(java.lang.String,java.lang.String,Validator)
476    */
 
477  0 toggle public static boolean isDefined(String nm)
478    {
479  0 return validators.get(nm) != null;
480    }
481   
482    /**
483    * Removes a custom defined rule.
484    *
485    * @see com.stevesoft.pat#define(java.lang.String,java.lang.String,Validator)
486    */
 
487  0 toggle public static void undefine(String nm)
488    {
489  0 validators.remove(nm);
490    }
491   
492    /**
493    * Defines a method to create a new rule. See test/deriv2.java and
494    * test/deriv3.java for examples of how to use it.
495    */
 
496  160 toggle public static void define(String nm, String pat, Validator v)
497    {
498  160 v.pattern = pat;
499  160 validators.put(nm, v);
500    }
501   
502    /**
503    * Defines a shorthand for a pattern. The pattern will be invoked by a string
504    * that has the form "(??"+nm+")".
505    */
 
506  0 toggle public static void define(String nm, String pat)
507    {
508  0 validators.put(nm, pat);
509    }
510   
511    /** Get the current ReplaceRule. */
 
512  3 toggle public ReplaceRule getReplaceRule()
513    {
514  3 return rep;
515    }
516   
517    Replacer repr = null;
518   
 
519  152 toggle final Replacer _getReplacer()
520    {
521  152 return repr == null ? repr = new Replacer() : repr;
522    }
523   
 
524  0 toggle public Replacer getReplacer()
525    {
526  0 if (repr == null)
527    {
528  0 repr = new Replacer();
529    }
530  0 repr.rh.me = this;
531  0 repr.rh.prev = null;
532  0 return repr;
533    }
534   
535    /**
536    * Replace the first occurence of this pattern in String s according to the
537    * ReplaceRule.
538    *
539    * @see com.stevesoft.pat.ReplaceRule
540    * @see com.stevesoft.pat.Regex#getReplaceRule()
541    */
 
542  0 toggle public String replaceFirst(String s)
543    {
544  0 return _getReplacer().replaceFirstRegion(s, this, 0, s.length())
545    .toString();
546    }
547   
548    /**
549    * Replace the first occurence of this pattern in String s beginning with
550    * position pos according to the ReplaceRule.
551    *
552    * @see com.stevesoft.pat.ReplaceRule
553    * @see com.stevesoft.pat.Regex#getReplaceRule()
554    */
 
555  0 toggle public String replaceFirstFrom(String s, int pos)
556    {
557  0 return _getReplacer().replaceFirstRegion(s, this, pos, s.length())
558    .toString();
559    }
560   
561    /**
562    * Replace the first occurence of this pattern in String s beginning with
563    * position start and ending with end according to the ReplaceRule.
564    *
565    * @see com.stevesoft.pat.ReplaceRule
566    * @see com.stevesoft.pat.Regex#getReplaceRule()
567    */
 
568  0 toggle public String replaceFirstRegion(String s, int start, int end)
569    {
570  0 return _getReplacer().replaceFirstRegion(s, this, start, end)
571    .toString();
572    }
573   
574    /**
575    * Replace all occurences of this pattern in String s according to the
576    * ReplaceRule.
577    *
578    * @see com.stevesoft.pat.ReplaceRule
579    * @see com.stevesoft.pat.Regex#getReplaceRule()
580    */
 
581  152 toggle public String replaceAll(String s)
582    {
583  152 return _getReplacer().replaceAllRegion(s, this, 0, s.length())
584    .toString();
585    }
586   
 
587  0 toggle public StringLike replaceAll(StringLike s)
588    {
589  0 return _getReplacer().replaceAllRegion(s, this, 0, s.length());
590    }
591   
592    /**
593    * Replace all occurences of this pattern in String s beginning with position
594    * pos according to the ReplaceRule.
595    *
596    * @see com.stevesoft.pat.ReplaceRule
597    * @see com.stevesoft.pat.Regex#getReplaceRule()
598    */
 
599  0 toggle public String replaceAllFrom(String s, int pos)
600    {
601  0 return _getReplacer().replaceAllRegion(s, this, pos, s.length())
602    .toString();
603    }
604   
605    /**
606    * Replace all occurences of this pattern in String s beginning with position
607    * start and ending with end according to the ReplaceRule.
608    *
609    * @see com.stevesoft.pat.ReplaceRule
610    * @see com.stevesoft.pat.Regex#getReplaceRule()
611    */
 
612  0 toggle public String replaceAllRegion(String s, int start, int end)
613    {
614  0 return _getReplacer().replaceAllRegion(s, this, start, end).toString();
615    }
616   
617    /** Essentially clones the Regex object */
 
618  196 toggle public Regex(Regex r)
619    {
620  196 super(r);
621  196 dontMatchInQuotes = r.dontMatchInQuotes;
622  196 esc = r.esc;
623  196 ignoreCase = r.ignoreCase;
624  196 gFlag = r.gFlag;
625  196 if (r.rep == null)
626    {
627  196 rep = null;
628    }
629    else
630    {
631  0 rep = (ReplaceRule) r.rep.clone();
632    }
633    /*
634    * try { compile(r.toString()); } catch(RegSyntax r_) {}
635    */
636  196 thePattern = r.thePattern.clone(new Hashtable());
637  196 minMatch = r.minMatch;
638  196 skipper = r.skipper;
639    }
640   
641    /**
642    * By default, the escape character is the backslash, but you can make it
643    * anything you want by setting this variable.
644    */
645    public char esc = Pattern.ESC;
646   
647    /**
648    * This method compiles a regular expression, making it possible to call the
649    * search or matchAt methods.
650    *
651    * @exception com.stevesoft.pat.RegSyntax
652    * is thrown if a syntax error is encountered in the pattern. For
653    * example, "x{3,1}" or "*a" are not valid patterns.
654    * @see com.stevesoft.pat.Regex#search
655    * @see com.stevesoft.pat.Regex#matchAt
656    */
 
657  2226 toggle public void compile(String prepat) throws RegSyntax
658    {
659  2226 String postpat = parsePerl.codify(prepat, true);
660  2226 String pat = postpat == null ? prepat : postpat;
661  2226 minMatch = null;
662  2226 ignoreCase = false;
663  2226 dontMatchInQuotes = false;
664  2226 Rthings mk = new Rthings(this);
665  2226 int offset = mk.val;
666  2226 String newpat = pat;
667  2226 thePattern = none;
668  2226 p = null;
669  2226 or = null;
670  2226 minMatch = new patInt(0);
671  2226 StrPos sp = new StrPos(pat, 0);
672  2226 if (sp.incMatch("(?e="))
673    {
674  0 char newEsc = sp.c;
675  0 sp.inc();
676  0 if (sp.match(')'))
677    {
678  0 newpat = reEscape(pat.substring(6), newEsc, Pattern.ESC);
679    }
680    }
681  2226 else if (esc != Pattern.ESC)
682    {
683  0 newpat = reEscape(pat, esc, Pattern.ESC);
684    }
685  2226 thePattern = _compile(newpat, mk);
686  2225 numSubs_ = mk.val - offset;
687  2225 mk.set(this);
688    }
689   
690    /*
691    * If a Regex is compared against a Regex, a check is done to see that the
692    * patterns are equal as well as the most recent match. If a Regex is compare
693    * with a RegRes, only the result of the most recent match is compared.
694    */
 
695  0 toggle @Override
696    public boolean equals(Object o)
697    {
698  0 if (o instanceof Regex)
699    {
700  0 if (toString().equals(o.toString()))
701    {
702  0 return super.equals(o);
703    }
704    else
705    {
706  0 return false;
707    }
708    }
709    else
710    {
711  0 return super.equals(o);
712    }
713    }
714   
715    /** A clone by any other name would smell as sweet. */
 
716  196 toggle @Override
717    public Object clone()
718    {
719  196 return new Regex(this);
720    }
721   
722    /** Return a clone of the underlying RegRes object. */
 
723  0 toggle public RegRes result()
724    {
725  0 return (RegRes) super.clone();
726    }
727   
728    // prep sets global variables of class
729    // Pattern so that it can access them
730    // during an attempt at a match
731    Pthings pt = new Pthings();
732   
 
733  11616 toggle final Pthings prep(StringLike s)
734    {
735    // if(gFlag)
736  11616 pt.lastPos = matchedTo();
737  11616 if (pt.lastPos < 0)
738    {
739  6467 pt.lastPos = 0;
740    }
741  11616 if ((s == null ? null : s.unwrap()) != (src == null ? null
742    : s.unwrap()))
743    {
744  1883 pt.lastPos = 0;
745    }
746  11616 src = s;
747  11616 pt.dotDoesntMatchCR = dotDoesntMatchCR && (!sFlag);
748  11616 pt.mFlag = (mFlag | defaultMFlag);
749  11616 pt.ignoreCase = ignoreCase;
750  11616 pt.no_check = false;
751  11616 if (pt.marks != null)
752    {
753  36923 for (int i = 0; i < pt.marks.length; i++)
754    {
755  32274 pt.marks[i] = -1;
756    }
757    }
758  11616 pt.marks = null;
759  11616 pt.nMarks = numSubs_;
760  11616 pt.src = s;
761  11616 if (dontMatchInQuotes)
762    {
763  0 setCbits(s, pt);
764    }
765    else
766    {
767  11616 pt.cbits = null;
768    }
769  11616 return pt;
770    }
771   
772    /**
773    * Attempt to match a Pattern beginning at a specified location within the
774    * string.
775    *
776    * @see com.stevesoft.pat.Regex#search
777    */
 
778  0 toggle public boolean matchAt(String s, int start_pos)
779    {
780  0 return _search(s, start_pos, start_pos);
781    }
782   
783    /**
784    * Attempt to match a Pattern beginning at a specified location within the
785    * StringLike.
786    *
787    * @see com.stevesoft.pat.Regex#search
788    */
 
789  0 toggle public boolean matchAt(StringLike s, int start_pos)
790    {
791  0 return _search(s, start_pos, start_pos);
792    }
793   
794    /**
795    * Search through a String for the first occurrence of a match.
796    *
797    * @see com.stevesoft.pat.Regex#searchFrom
798    * @see com.stevesoft.pat.Regex#matchAt
799    */
 
800  10388 toggle public boolean search(String s)
801    {
802  10388 if (s == null)
803    {
804  0 throw new NullPointerException(MessageManager
805    .getString("exception.null_string_given_to_regex_search"));
806    }
807  10388 return _search(s, 0, s.length());
808    }
809   
 
810  0 toggle public boolean search(StringLike sl)
811    {
812  0 if (sl == null)
813    {
814  0 throw new NullPointerException(MessageManager.getString(
815    "exception.null_string_like_given_to_regex_search"));
816    }
817  0 return _search(sl, 0, sl.length());
818    }
819   
 
820  0 toggle public boolean reverseSearch(String s)
821    {
822  0 if (s == null)
823    {
824  0 throw new NullPointerException(MessageManager.getString(
825    "exception.null_string_given_to_regex_reverse_search"));
826    }
827  0 return _reverseSearch(s, 0, s.length());
828    }
829   
 
830  0 toggle public boolean reverseSearch(StringLike sl)
831    {
832  0 if (sl == null)
833    {
834  0 throw new NullPointerException(MessageManager.getString(
835    "exception.null_string_like_given_to_regex_reverse_search"));
836    }
837  0 return _reverseSearch(sl, 0, sl.length());
838    }
839   
840    /**
841    * Search through a String for the first occurence of a match, but start at
842    * position
843    *
844    * <pre>
845    * start
846    * </pre>
847    */
 
848  1073 toggle public boolean searchFrom(String s, int start)
849    {
850  1073 if (s == null)
851    {
852  0 throw new NullPointerException(MessageManager.getString(
853    "exception.null_string_like_given_to_regex_search_from"));
854    }
855  1073 return _search(s, start, s.length());
856    }
857   
 
858  0 toggle public boolean searchFrom(StringLike s, int start)
859    {
860  0 if (s == null)
861    {
862  0 throw new NullPointerException(MessageManager.getString(
863    "exception.null_string_like_given_to_regex_search_from"));
864    }
865  0 return _search(s, start, s.length());
866    }
867   
868    /**
869    * Search through a region of a String for the first occurence of a match.
870    */
 
871  0 toggle public boolean searchRegion(String s, int start, int end)
872    {
873  0 if (s == null)
874    {
875  0 throw new NullPointerException(MessageManager.getString(
876    "exception.null_string_like_given_to_regex_search_region"));
877    }
878  0 return _search(s, start, end);
879    }
880   
881    /**
882    * Set this to change the default behavior of the "." pattern. By default it
883    * now matches perl's behavior and fails to match the '\n' character.
884    */
885    public static boolean dotDoesntMatchCR = true;
886   
887    StringLike gFlags;
888   
889    int gFlagto = 0;
890   
891    boolean gFlag = false;
892   
893    /** Set the 'g' flag */
 
894  0 toggle public void setGFlag(boolean b)
895    {
896  0 gFlag = b;
897    }
898   
899    /** Get the state of the 'g' flag. */
 
900  0 toggle public boolean getGFlag()
901    {
902  0 return gFlag;
903    }
904   
905    boolean sFlag = false;
906   
907    /** Get the state of the sFlag */
 
908  0 toggle public boolean getSFlag()
909    {
910  0 return sFlag;
911    }
912   
913    boolean mFlag = false;
914   
915    /** Get the state of the sFlag */
 
916  0 toggle public boolean getMFlag()
917    {
918  0 return mFlag;
919    }
920   
 
921  11461 toggle final boolean _search(String s, int start, int end)
922    {
923  11461 return _search(new StringWrap(s), start, end);
924    }
925   
 
926  11616 toggle final boolean _search(StringLike s, int start, int end)
927    {
928  11616 if (gFlag && gFlagto > 0 && gFlags != null
929    && s.unwrap() == gFlags.unwrap())
930    {
931  0 start = gFlagto;
932    }
933  11616 gFlags = null;
934   
935  11616 Pthings pt = prep(s);
936   
937  11616 int up = (minMatch == null ? end : end - minMatch.i);
938   
939  11616 if (up < start && end >= start)
940    {
941  0 up = start;
942    }
943   
944  11616 if (skipper == null)
945    {
946  263340 for (int i = start; i <= up; i++)
947    {
948  259002 charsMatched_ = thePattern.matchAt(s, i, pt);
949  259002 if (charsMatched_ >= 0)
950    {
951  4817 matchFrom_ = thePattern.mfrom;
952  4817 marks = pt.marks;
953  4817 gFlagto = matchFrom_ + charsMatched_;
954  4817 gFlags = s;
955  4817 return didMatch_ = true;
956    }
957    }
958    }
959    else
960    {
961  2461 pt.no_check = true;
962  127535 for (int i = start; i <= up; i++)
963    {
964  127535 i = skipper.find(src, i, up);
965  127535 if (i < 0)
966    {
967  917 charsMatched_ = matchFrom_ = -1;
968  917 return didMatch_ = false;
969    }
970  126618 charsMatched_ = thePattern.matchAt(s, i, pt);
971  126618 if (charsMatched_ >= 0)
972    {
973  1544 matchFrom_ = thePattern.mfrom;
974  1544 marks = pt.marks;
975  1544 gFlagto = matchFrom_ + charsMatched_;
976  1544 gFlags = s;
977  1544 return didMatch_ = true;
978    }
979    }
980    }
981  4338 return didMatch_ = false;
982    }
983   
984    /*
985    * final boolean _search(LongStringLike s,long start,long end) { if(gFlag &&
986    * gFlagto > 0 && s==gFlags) start = gFlagto; gFlags = null;
987    *
988    * Pthings pt=prep(s);
989    *
990    * int up = end;//(minMatch == null ? end : end-minMatch.i);
991    *
992    * if(up < start && end >= start) up = start;
993    *
994    * if(skipper == null) { for(long i=start;i<=up;i++) { charsMatched_ =
995    * thePattern.matchAt(s,i,pt); if(charsMatched_ >= 0) { matchFrom_ =
996    * thePattern.mfrom; marks = pt.marks; gFlagto = matchFrom_+charsMatched_;
997    * return didMatch_=true; } } } else { pt.no_check = true; for(long
998    * i=start;i<=up;i++) { i = skipper.find(src,i,up); if(i<0) { charsMatched_ =
999    * matchFrom_ = -1; return didMatch_ = false; } charsMatched_ =
1000    * thePattern.matchAt(s,i,pt); if(charsMatched_ >= 0) { matchFrom_ =
1001    * thePattern.mfrom; marks = pt.marks; gFlagto = matchFrom_+charsMatched_;
1002    * gFlags = s; return didMatch_=true; } else { i = s.adjustIndex(i); up =
1003    * s.adjustEnd(i); } } } return didMatch_=false; }
1004    */
1005   
 
1006  0 toggle boolean _reverseSearch(String s, int start, int end)
1007    {
1008  0 return _reverseSearch(new StringWrap(s), start, end);
1009    }
1010   
 
1011  0 toggle boolean _reverseSearch(StringLike s, int start, int end)
1012    {
1013  0 if (gFlag && gFlagto > 0 && s.unwrap() == gFlags.unwrap())
1014    {
1015  0 end = gFlagto;
1016    }
1017  0 gFlags = null;
1018  0 Pthings pt = prep(s);
1019  0 for (int i = end; i >= start; i--)
1020    {
1021  0 charsMatched_ = thePattern.matchAt(s, i, pt);
1022  0 if (charsMatched_ >= 0)
1023    {
1024  0 matchFrom_ = thePattern.mfrom;
1025  0 marks = pt.marks;
1026  0 gFlagto = matchFrom_ - 1;
1027  0 gFlags = s;
1028  0 return didMatch_ = true;
1029    }
1030    }
1031  0 return didMatch_ = false;
1032    }
1033   
1034    // This routine sets the cbits variable
1035    // of class Pattern. Cbits is true for
1036    // the bit corresponding to a character inside
1037    // a set of quotes.
1038    static StringLike lasts = null;
1039   
1040    static BitSet lastbs = null;
1041   
 
1042  0 toggle static void setCbits(StringLike s, Pthings pt)
1043    {
1044  0 if (s == lasts)
1045    {
1046  0 pt.cbits = lastbs;
1047  0 return;
1048    }
1049  0 BitSet bs = new BitSet(s.length());
1050  0 char qc = ' ';
1051  0 boolean setBit = false;
1052  0 for (int i = 0; i < s.length(); i++)
1053    {
1054  0 if (setBit)
1055    {
1056  0 bs.set(i);
1057    }
1058  0 char c = s.charAt(i);
1059  0 if (!setBit && c == '"')
1060    {
1061  0 qc = c;
1062  0 setBit = true;
1063  0 bs.set(i);
1064    }
1065  0 else if (!setBit && c == '\'')
1066    {
1067  0 qc = c;
1068  0 setBit = true;
1069  0 bs.set(i);
1070    }
1071  0 else if (setBit && c == qc)
1072    {
1073  0 setBit = false;
1074    }
1075  0 else if (setBit && c == '\\' && i + 1 < s.length())
1076    {
1077  0 i++;
1078  0 if (setBit)
1079    {
1080  0 bs.set(i);
1081    }
1082    }
1083    }
1084  0 pt.cbits = lastbs = bs;
1085  0 lasts = s;
1086    }
1087   
1088    // Wanted user to over-ride this in alpha version,
1089    // but it wasn't really necessary because of this trick:
 
1090  2605 toggle Regex newRegex()
1091    {
1092  2605 try
1093    {
1094  2605 return getClass().getDeclaredConstructor().newInstance();
1095    } catch (InstantiationException ie)
1096    {
1097  0 return null;
1098    } catch (IllegalAccessException iae)
1099    {
1100  0 return null;
1101    } catch (ReflectiveOperationException roe)
1102    {
1103  0 return null;
1104    }
1105    }
1106   
1107    /**
1108    * Only needed for creating your own extensions of Regex. This method adds the
1109    * next Pattern in the chain of patterns or sets the Pattern if it is the
1110    * first call.
1111    */
 
1112  11015 toggle protected void add(Pattern p2)
1113    {
1114  11015 if (p == null)
1115    {
1116  4989 p = p2;
1117    }
1118    else
1119    {
1120  6026 p.add(p2);
1121  6026 p2 = p;
1122    }
1123    }
1124   
1125    /**
1126    * You only need to use this method if you are creating your own extentions to
1127    * Regex. compile1 compiles one Pattern element, it can be over-ridden to
1128    * allow the Regex compiler to understand new syntax. See deriv.java for an
1129    * example. This routine is the heart of class Regex. Rthings has one integer
1130    * member called intValue, it is used to keep track of the number of ()'s in
1131    * the Pattern.
1132    *
1133    * @exception com.stevesoft.pat.RegSyntax
1134    * is thrown when a nonsensensical pattern is supplied. For
1135    * example, a pattern beginning with *.
1136    */
 
1137  15396 toggle protected void compile1(StrPos sp, Rthings mk) throws RegSyntax
1138    {
1139  15396 if (sp.match('['))
1140    {
1141  2173 sp.inc();
1142  2173 add(matchBracket(sp));
1143    }
1144  13223 else if (sp.match('|'))
1145    {
1146  173 if (or == null)
1147    {
1148  15 or = new Or();
1149    }
1150  173 if (p == null)
1151    {
1152  0 p = new NullPattern();
1153    }
1154  173 or.addOr(p);
1155  173 p = null;
1156    }
1157  13050 else if (sp.incMatch("(?<"))
1158    {
1159  0 patInt i = sp.getPatInt();
1160  0 if (i == null)
1161    {
1162  0 RegSyntaxError.endItAll("No int after (?<");
1163    }
1164  0 add(new Backup(i.intValue()));
1165  0 if (!sp.match(')'))
1166    {
1167  0 RegSyntaxError.endItAll("No ) after (?<");
1168    }
1169    }
1170  13050 else if (sp.incMatch("(?>"))
1171    {
1172  0 patInt i = sp.getPatInt();
1173  0 if (i == null)
1174    {
1175  0 RegSyntaxError.endItAll("No int after (?>");
1176    }
1177  0 add(new Backup(-i.intValue()));
1178  0 if (!sp.match(')'))
1179    {
1180  0 RegSyntaxError.endItAll("No ) after (?<");
1181    }
1182    }
1183  13050 else if (sp.incMatch("(?@"))
1184    {
1185  0 char op = sp.c;
1186  0 sp.inc();
1187  0 char cl = sp.c;
1188  0 sp.inc();
1189  0 if (!sp.match(')'))
1190    {
1191  0 RegSyntaxError.endItAll("(?@ does not have closing paren");
1192    }
1193  0 add(new Group(op, cl));
1194    }
1195  13050 else if (sp.incMatch("(?#"))
1196    {
1197  0 while (!sp.match(')'))
1198    {
1199  0 sp.inc();
1200    }
1201    }
1202  13050 else if (sp.dontMatch && sp.c == 'w')
1203    {
1204    // Regex r = new Regex();
1205    // r._compile("[a-zA-Z0-9_]",mk);
1206    // add(new Goop("\\w",r.thePattern));
1207  155 Bracket b = new Bracket(false);
1208  155 b.addOr(new Range('a', 'z'));
1209  155 b.addOr(new Range('A', 'Z'));
1210  155 b.addOr(new Range('0', '9'));
1211  155 b.addOr(new oneChar('_'));
1212  155 add(b);
1213    }
1214  12895 else if (sp.dontMatch && sp.c == 'G')
1215    {
1216  0 add(new BackG());
1217    }
1218  12895 else if (sp.dontMatch && sp.c == 's')
1219    {
1220    // Regex r = new Regex();
1221    // r._compile("[ \t\n\r\b]",mk);
1222    // add(new Goop("\\s",r.thePattern));
1223  1250 Bracket b = new Bracket(false);
1224  1250 b.addOr(new oneChar((char) 32));
1225  1250 b.addOr(new Range((char) 8, (char) 10));
1226  1250 b.addOr(new oneChar((char) 13));
1227  1250 add(b);
1228    }
1229  11645 else if (sp.dontMatch && sp.c == 'd')
1230    {
1231    // Regex r = new Regex();
1232    // r._compile("[0-9]",mk);
1233    // add(new Goop("\\d",r.thePattern));
1234  110 Range digit = new Range('0', '9');
1235  110 digit.printBrackets = true;
1236  110 add(digit);
1237    }
1238  11535 else if (sp.dontMatch && sp.c == 'W')
1239    {
1240    // Regex r = new Regex();
1241    // r._compile("[^a-zA-Z0-9_]",mk);
1242    // add(new Goop("\\W",r.thePattern));
1243  1 Bracket b = new Bracket(true);
1244  1 b.addOr(new Range('a', 'z'));
1245  1 b.addOr(new Range('A', 'Z'));
1246  1 b.addOr(new Range('0', '9'));
1247  1 b.addOr(new oneChar('_'));
1248  1 add(b);
1249    }
1250  11534 else if (sp.dontMatch && sp.c == 'S')
1251    {
1252    // Regex r = new Regex();
1253    // r._compile("[^ \t\n\r\b]",mk);
1254    // add(new Goop("\\S",r.thePattern));
1255  225 Bracket b = new Bracket(true);
1256  225 b.addOr(new oneChar((char) 32));
1257  225 b.addOr(new Range((char) 8, (char) 10));
1258  225 b.addOr(new oneChar((char) 13));
1259  225 add(b);
1260    }
1261  11309 else if (sp.dontMatch && sp.c == 'D')
1262    {
1263    // Regex r = new Regex();
1264    // r._compile("[^0-9]",mk);
1265    // add(new Goop("\\D",r.thePattern));
1266  0 Bracket b = new Bracket(true);
1267  0 b.addOr(new Range('0', '9'));
1268  0 add(b);
1269    }
1270  11309 else if (sp.dontMatch && sp.c == 'B')
1271    {
1272  0 Regex r = new Regex();
1273  0 r._compile("(?!" + back_slash + "b)", mk);
1274  0 add(r.thePattern);
1275    }
1276  11309 else if (isOctalString(sp))
1277    {
1278  0 int d = sp.c - '0';
1279  0 sp.inc();
1280  0 d = 8 * d + sp.c - '0';
1281  0 StrPos sp2 = new StrPos(sp);
1282  0 sp2.inc();
1283  0 if (isOctalDigit(sp2, false))
1284    {
1285  0 sp.inc();
1286  0 d = 8 * d + sp.c - '0';
1287    }
1288  0 add(new oneChar((char) d));
1289    }
1290  11309 else if (sp.dontMatch && sp.c >= '1' && sp.c <= '9')
1291    {
1292  0 int iv = sp.c - '0';
1293  0 StrPos s2 = new StrPos(sp);
1294  0 s2.inc();
1295  0 if (!s2.dontMatch && s2.c >= '0' && s2.c <= '9')
1296    {
1297  0 iv = 10 * iv + (s2.c - '0');
1298  0 sp.inc();
1299    }
1300  0 add(new BackMatch(iv));
1301    }
1302  11309 else if (sp.dontMatch && sp.c == 'b')
1303    {
1304  396 add(new Boundary());
1305    }
1306  10913 else if (sp.match('\b'))
1307    {
1308  0 add(new Boundary());
1309    }
1310  10913 else if (sp.match('$'))
1311    {
1312  31 add(new End(true));
1313    }
1314  10882 else if (sp.dontMatch && sp.c == 'Z')
1315    {
1316  0 add(new End(false));
1317    }
1318  10882 else if (sp.match('.'))
1319    {
1320  375 add(new Any());
1321    }
1322  10507 else if (sp.incMatch("(??"))
1323    {
1324  0 StringBuffer sb = new StringBuffer();
1325  0 StringBuffer sb2 = new StringBuffer();
1326  0 while (!sp.match(')') && !sp.match(':'))
1327    {
1328  0 sb.append(sp.c);
1329  0 sp.inc();
1330    }
1331  0 if (sp.incMatch(":"))
1332    {
1333  0 while (!sp.match(')'))
1334    {
1335  0 sb2.append(sp.c);
1336  0 sp.inc();
1337    }
1338    }
1339  0 String sbs = sb.toString();
1340  0 if (validators.get(sbs) instanceof String)
1341    {
1342  0 String pat = (String) validators.get(sbs);
1343  0 Regex r = newRegex();
1344  0 Rthings rth = new Rthings(this);
1345  0 rth.noBackRefs = true;
1346  0 r._compile(pat, rth);
1347  0 add(r.thePattern);
1348    }
1349    else
1350    {
1351  0 Custom cm = new Custom(sb.toString());
1352  0 if (cm.v != null)
1353    {
1354  0 Validator v2 = cm.v.arg(sb2.toString());
1355  0 if (v2 != null)
1356    {
1357  0 v2.argsave = sb2.toString();
1358  0 String p = cm.v.pattern;
1359  0 cm.v = v2;
1360  0 v2.pattern = p;
1361    }
1362  0 Regex r = newRegex();
1363  0 Rthings rth = new Rthings(this);
1364  0 rth.noBackRefs = true;
1365  0 r._compile(cm.v.pattern, rth);
1366  0 cm.sub = r.thePattern;
1367  0 cm.sub.add(new CustomEndpoint(cm));
1368  0 cm.sub.setParent(cm);
1369  0 add(cm);
1370    }
1371    }
1372    }
1373  10507 else if (sp.match('('))
1374    {
1375  2605 mk.parenLevel++;
1376  2605 Regex r = newRegex();
1377    // r.or = new Or();
1378  2605 sp.inc();
1379  2605 if (sp.incMatch("?:"))
1380    {
1381  42 r.or = new Or();
1382    }
1383  2563 else if (sp.incMatch("?="))
1384    {
1385  0 r.or = new lookAhead(false);
1386    }
1387  2563 else if (sp.incMatch("?!"))
1388    {
1389  0 r.or = new lookAhead(true);
1390    }
1391  2563 else if (sp.match('?'))
1392    {
1393  0 sp.inc();
1394  0 do
1395    {
1396  0 if (sp.c == 'i')
1397    {
1398  0 mk.ignoreCase = true;
1399    }
1400  0 if (sp.c == 'Q')
1401    {
1402  0 mk.dontMatchInQuotes = true;
1403    }
1404  0 if (sp.c == 'o')
1405    {
1406  0 mk.optimizeMe = true;
1407    }
1408  0 if (sp.c == 'g')
1409    {
1410  0 mk.gFlag = true;
1411    }
1412  0 if (sp.c == 's')
1413    {
1414  0 mk.sFlag = true;
1415    }
1416  0 if (sp.c == 'm')
1417    {
1418  0 mk.mFlag = true;
1419    }
1420  0 sp.inc();
1421  0 } while (!sp.match(')') && !sp.eos);
1422  0 r = null;
1423  0 mk.parenLevel--;
1424  0 if (sp.eos) // throw new RegSyntax
1425    {
1426  0 RegSyntaxError.endItAll("Unclosed ()");
1427    }
1428    }
1429    else
1430    { // just ordinary parenthesis
1431  2563 r.or = mk.noBackRefs ? new Or() : new OrMark(mk.val++);
1432    }
1433  2605 if (r != null)
1434    {
1435  2605 add(r._compile(sp, mk));
1436    }
1437    }
1438  7902 else if (sp.match('^'))
1439    {
1440  65 add(new Start(true));
1441    }
1442  7837 else if (sp.dontMatch && sp.c == 'A')
1443    {
1444  0 add(new Start(false));
1445    }
1446  7837 else if (sp.match('*'))
1447    {
1448  1460 addMulti(new patInt(0), new patInf());
1449    }
1450  6377 else if (sp.match('+'))
1451    {
1452  2487 addMulti(new patInt(1), new patInf());
1453    }
1454  3890 else if (sp.match('?'))
1455    {
1456  89 addMulti(new patInt(0), new patInt(1));
1457    }
1458  3801 else if (sp.match('{'))
1459    {
1460  172 boolean bad = false;
1461  172 StrPos sp2 = new StrPos(sp);
1462    // StringBuffer sb = new StringBuffer();
1463  172 sp.inc();
1464  172 patInt i1 = sp.getPatInt();
1465  172 patInt i2 = null;
1466  172 if (sp.match('}'))
1467    {
1468  157 i2 = i1;
1469    }
1470    else
1471    {
1472  15 if (!sp.match(','))
1473    {
1474    /*
1475    * RegSyntaxError.endItAll( "String \"{"+i2+ "\" should be followed
1476    * with , or }");
1477    */
1478  0 bad = true;
1479    }
1480  15 sp.inc();
1481  15 if (sp.match('}'))
1482    {
1483  15 i2 = new patInf();
1484    }
1485    else
1486    {
1487  0 i2 = sp.getPatInt();
1488    }
1489    }
1490  172 if (i1 == null || i2 == null)
1491    {
1492    /*
1493    * throw new RegSyntax("Badly formatted Multi: " +"{"+i1+","+i2+"}");
1494    */
1495  0 bad = true;
1496    }
1497  172 if (bad)
1498    {
1499  0 sp.dup(sp2);
1500  0 add(new oneChar(sp.c));
1501    }
1502    else
1503    {
1504  172 addMulti(i1, i2);
1505    }
1506    }
1507  3629 else if (sp.escMatch('x') && next2Hex(sp))
1508    {
1509  0 sp.inc();
1510  0 int d = getHexDigit(sp);
1511  0 sp.inc();
1512  0 d = 16 * d + getHexDigit(sp);
1513  0 add(new oneChar((char) d));
1514    }
1515  3629 else if (sp.escMatch('c'))
1516    {
1517  0 sp.inc();
1518  0 if (sp.c < Ctrl.cmap.length)
1519    {
1520  0 add(new oneChar(Ctrl.cmap[sp.c]));
1521    }
1522    else
1523    {
1524  0 add(new oneChar(sp.c));
1525    }
1526    }
1527  3629 else if (sp.escMatch('f'))
1528    {
1529  0 add(new oneChar((char) 12));
1530    }
1531  3629 else if (sp.escMatch('a'))
1532    {
1533  0 add(new oneChar((char) 7));
1534    }
1535  3629 else if (sp.escMatch('t'))
1536    {
1537  0 add(new oneChar('\t'));
1538    }
1539  3629 else if (sp.escMatch('n'))
1540    {
1541  0 add(new oneChar('\n'));
1542    }
1543  3629 else if (sp.escMatch('r'))
1544    {
1545  0 add(new oneChar('\r'));
1546    }
1547  3629 else if (sp.escMatch('b'))
1548    {
1549  0 add(new oneChar('\b'));
1550    }
1551  3629 else if (sp.escMatch('e'))
1552    {
1553  0 add(new oneChar((char) 27));
1554    }
1555    else
1556    {
1557  3629 add(new oneChar(sp.c));
1558  3629 if (sp.match(')'))
1559    {
1560  0 RegSyntaxError.endItAll("Unmatched right paren in pattern");
1561    }
1562    }
1563    }
1564   
1565    // compiles all Pattern elements, internal method
 
1566  2226 toggle private Pattern _compile(String pat, Rthings mk) throws RegSyntax
1567    {
1568  2226 minMatch = null;
1569  2226 sFlag = mFlag = ignoreCase = gFlag = false;
1570  2226 StrPos sp = new StrPos(pat, 0);
1571  2226 thePattern = _compile(sp, mk);
1572  2225 pt.marks = null;
1573  2225 return thePattern;
1574    }
1575   
1576    Pattern p = null;
1577   
1578    Or or = null;
1579   
 
1580  4831 toggle Pattern _compile(StrPos sp, Rthings mk) throws RegSyntax
1581    {
1582  20226 while (!(sp.eos || (or != null && sp.match(')'))))
1583    {
1584  15396 compile1(sp, mk);
1585  15395 sp.inc();
1586    }
1587  4830 if (sp.match(')'))
1588    {
1589  2605 mk.parenLevel--;
1590    }
1591  2225 else if (sp.eos && mk.parenLevel != 0)
1592    {
1593  0 RegSyntaxError.endItAll("Unclosed Parenthesis! lvl=" + mk.parenLevel);
1594    }
1595  4830 if (or != null)
1596    {
1597  2620 if (p == null)
1598    {
1599  15 p = new NullPattern();
1600    }
1601  2620 or.addOr(p);
1602  2620 return or;
1603    }
1604  2210 return p == null ? new NullPattern() : p;
1605    }
1606   
1607    // add a multi object to the end of the chain
1608    // which applies to the last object
 
1609  4208 toggle void addMulti(patInt i1, patInt i2) throws RegSyntax
1610    {
1611  4208 Pattern last, last2;
1612  8372 for (last = p; last != null && last.next != null; last = last.next)
1613    {
1614  4164 ;
1615    }
1616  4208 if (last == null || last == p)
1617    {
1618  2588 last2 = null;
1619    }
1620    else
1621    {
1622  4164 for (last2 = p; last2.next != last; last2 = last2.next)
1623    {
1624  2544 ;
1625    }
1626    }
1627  4208 if (last instanceof Multi && i1.intValue() == 0 && i2.intValue() == 1)
1628    {
1629  0 ((Multi) last).matchFewest = true;
1630    }
1631  4208 else if (last instanceof FastMulti && i1.intValue() == 0
1632    && i2.intValue() == 1)
1633    {
1634  0 ((FastMulti) last).matchFewest = true;
1635    }
1636  4208 else if (last instanceof DotMulti && i1.intValue() == 0
1637    && i2.intValue() == 1)
1638    {
1639  0 ((DotMulti) last).matchFewest = true;
1640    }
1641  4208 else if (last instanceof Multi || last instanceof DotMulti
1642    || last instanceof FastMulti)
1643    {
1644  1 throw new RegSyntax("Syntax error.");
1645    }
1646  4207 else if (last2 == null)
1647    {
1648  2587 p = mkMulti(i1, i2, p);
1649    }
1650    else
1651    {
1652  1620 last2.next = mkMulti(i1, i2, last);
1653    }
1654    }
1655   
 
1656  4207 toggle final static Pattern mkMulti(patInt lo, patInt hi, Pattern p)
1657    throws RegSyntax
1658    {
1659  4207 if (p instanceof Any && p.next == null)
1660    {
1661  371 return new DotMulti(lo, hi);
1662    }
1663  3836 return RegOpt.safe4fm(p) ? (Pattern) new FastMulti(lo, hi, p)
1664    : (Pattern) new Multi(lo, hi, p);
1665    }
1666   
1667    // process the bracket operator
 
1668  2173 toggle Pattern matchBracket(StrPos sp) throws RegSyntax
1669    {
1670  2173 Bracket ret;
1671  2173 if (sp.match('^'))
1672    {
1673  419 ret = new Bracket(true);
1674  419 sp.inc();
1675    }
1676    else
1677    {
1678  1754 ret = new Bracket(false);
1679    }
1680  2173 if (sp.match(']'))
1681    {
1682    // throw new RegSyntax
1683  0 RegSyntaxError.endItAll("Unmatched []");
1684    }
1685   
1686  10278 while (!sp.eos && !sp.match(']'))
1687    {
1688  8105 StrPos s1 = new StrPos(sp);
1689  8105 s1.inc();
1690  8105 StrPos s1_ = new StrPos(s1);
1691  8105 s1_.inc();
1692  8105 if (s1.match('-') && !s1_.match(']'))
1693    {
1694  1836 StrPos s2 = new StrPos(s1);
1695  1836 s2.inc();
1696  1836 if (!s2.eos)
1697    {
1698  1836 ret.addOr(new Range(sp.c, s2.c));
1699    }
1700  1836 sp.inc();
1701  1836 sp.inc();
1702    }
1703  6269 else if (sp.escMatch('Q'))
1704    {
1705  0 sp.inc();
1706  0 while (!sp.escMatch('E'))
1707    {
1708  0 ret.addOr(new oneChar(sp.c));
1709  0 sp.inc();
1710    }
1711    }
1712  6269 else if (sp.escMatch('d'))
1713    {
1714  45 ret.addOr(new Range('0', '9'));
1715    }
1716  6224 else if (sp.escMatch('s'))
1717    {
1718  0 ret.addOr(new oneChar((char) 32));
1719  0 ret.addOr(new Range((char) 8, (char) 10));
1720  0 ret.addOr(new oneChar((char) 13));
1721    }
1722  6224 else if (sp.escMatch('w'))
1723    {
1724  0 ret.addOr(new Range('a', 'z'));
1725  0 ret.addOr(new Range('A', 'Z'));
1726  0 ret.addOr(new Range('0', '9'));
1727  0 ret.addOr(new oneChar('_'));
1728    }
1729  6224 else if (sp.escMatch('D'))
1730    {
1731  0 ret.addOr(new Range((char) 0, (char) 47));
1732  0 ret.addOr(new Range((char) 58, (char) 65535));
1733    }
1734  6224 else if (sp.escMatch('S'))
1735    {
1736  0 ret.addOr(new Range((char) 0, (char) 7));
1737  0 ret.addOr(new Range((char) 11, (char) 12));
1738  0 ret.addOr(new Range((char) 14, (char) 31));
1739  0 ret.addOr(new Range((char) 33, (char) 65535));
1740    }
1741  6224 else if (sp.escMatch('W'))
1742    {
1743  0 ret.addOr(new Range((char) 0, (char) 64));
1744  0 ret.addOr(new Range((char) 91, (char) 94));
1745  0 ret.addOr(new oneChar((char) 96));
1746  0 ret.addOr(new Range((char) 123, (char) 65535));
1747    }
1748  6224 else if (sp.escMatch('x') && next2Hex(sp))
1749    {
1750  0 sp.inc();
1751  0 int d = getHexDigit(sp);
1752  0 sp.inc();
1753  0 d = 16 * d + getHexDigit(sp);
1754  0 ret.addOr(new oneChar((char) d));
1755    }
1756  6224 else if (sp.escMatch('a'))
1757    {
1758  0 ret.addOr(new oneChar((char) 7));
1759    }
1760  6224 else if (sp.escMatch('f'))
1761    {
1762  0 ret.addOr(new oneChar((char) 12));
1763    }
1764  6224 else if (sp.escMatch('e'))
1765    {
1766  0 ret.addOr(new oneChar((char) 27));
1767    }
1768  6224 else if (sp.escMatch('n'))
1769    {
1770  0 ret.addOr(new oneChar('\n'));
1771    }
1772  6224 else if (sp.escMatch('t'))
1773    {
1774  0 ret.addOr(new oneChar('\t'));
1775    }
1776  6224 else if (sp.escMatch('r'))
1777    {
1778  0 ret.addOr(new oneChar('\r'));
1779    }
1780  6224 else if (sp.escMatch('c'))
1781    {
1782  0 sp.inc();
1783  0 if (sp.c < Ctrl.cmap.length)
1784    {
1785  0 ret.addOr(new oneChar(Ctrl.cmap[sp.c]));
1786    }
1787    else
1788    {
1789  0 ret.addOr(new oneChar(sp.c));
1790    }
1791    }
1792  6224 else if (isOctalString(sp))
1793    {
1794  0 int d = sp.c - '0';
1795  0 sp.inc();
1796  0 d = 8 * d + sp.c - '0';
1797  0 StrPos sp2 = new StrPos(sp);
1798  0 sp2.inc();
1799  0 if (isOctalDigit(sp2, false))
1800    {
1801  0 sp.inc();
1802  0 d = 8 * d + sp.c - '0';
1803    }
1804  0 ret.addOr(new oneChar((char) d));
1805    }
1806    else
1807    {
1808  6224 ret.addOr(new oneChar(sp.c));
1809    }
1810  8105 sp.inc();
1811    }
1812  2173 return ret;
1813    }
1814   
1815    /**
1816    * Converts the stored Pattern to a String -- this is a decompile. Note that
1817    * \t and \n will really print out here, Not just the two character
1818    * representations. Also be prepared to see some strange output if your
1819    * characters are not printable.
1820    */
 
1821  0 toggle @Override
1822    public String toString()
1823    {
1824  0 if (false && thePattern == null)
1825    {
1826  0 return "";
1827    }
1828    else
1829    {
1830  0 StringBuffer sb = new StringBuffer();
1831  0 if (esc != Pattern.ESC)
1832    {
1833  0 sb.append("(?e=");
1834  0 sb.append(esc);
1835  0 sb.append(")");
1836    }
1837  0 if (gFlag || mFlag || !dotDoesntMatchCR || sFlag || ignoreCase
1838    || dontMatchInQuotes || optimized())
1839    {
1840  0 sb.append("(?");
1841  0 if (ignoreCase)
1842    {
1843  0 sb.append("i");
1844    }
1845  0 if (mFlag)
1846    {
1847  0 sb.append("m");
1848    }
1849  0 if (sFlag || !dotDoesntMatchCR)
1850    {
1851  0 sb.append("s");
1852    }
1853  0 if (dontMatchInQuotes)
1854    {
1855  0 sb.append("Q");
1856    }
1857  0 if (optimized())
1858    {
1859  0 sb.append("o");
1860    }
1861  0 if (gFlag)
1862    {
1863  0 sb.append("g");
1864    }
1865  0 sb.append(")");
1866    }
1867  0 String patstr = thePattern.toString();
1868  0 if (esc != Pattern.ESC)
1869    {
1870  0 patstr = reEscape(patstr, Pattern.ESC, esc);
1871    }
1872  0 sb.append(patstr);
1873  0 return sb.toString();
1874    }
1875    }
1876   
1877    // Re-escape Pattern, allows us to use a different escape
1878    // character.
 
1879  0 toggle static String reEscape(String s, char oldEsc, char newEsc)
1880    {
1881  0 if (oldEsc == newEsc)
1882    {
1883  0 return s;
1884    }
1885  0 int i;
1886  0 StringBuffer sb = new StringBuffer();
1887  0 for (i = 0; i < s.length(); i++)
1888    {
1889  0 if (s.charAt(i) == oldEsc && i + 1 < s.length())
1890    {
1891  0 if (s.charAt(i + 1) == oldEsc)
1892    {
1893  0 sb.append(oldEsc);
1894    }
1895    else
1896    {
1897  0 sb.append(newEsc);
1898  0 sb.append(s.charAt(i + 1));
1899    }
1900  0 i++;
1901    }
1902  0 else if (s.charAt(i) == newEsc)
1903    {
1904  0 sb.append(newEsc);
1905  0 sb.append(newEsc);
1906    }
1907    else
1908    {
1909  0 sb.append(s.charAt(i));
1910    }
1911    }
1912  0 return sb.toString();
1913    }
1914   
1915    /**
1916    * This method implements FilenameFilter, allowing one to use a Regex to
1917    * search through a directory using File.list. There is a FileRegex now that
1918    * does this better.
1919    *
1920    * @see com.stevesoft.pat.FileRegex
1921    */
 
1922  0 toggle @Override
1923    public boolean accept(File dir, String s)
1924    {
1925  0 return search(s);
1926    }
1927   
1928    /** The version of this package */
 
1929  0 toggle final static public String version()
1930    {
1931  0 return "lgpl release 1.5.3";
1932    }
1933   
1934    /**
1935    * Once this method is called, the state of variables ignoreCase and
1936    * dontMatchInQuotes should not be changed as the results will be
1937    * unpredictable. However, search and matchAt will run more quickly. Note that
1938    * you can check to see if the pattern has been optimized by calling the
1939    * optimized() method.
1940    * <p>
1941    * This method will attempt to rewrite your pattern in a way that makes it
1942    * faster (not all patterns execute at the same speed). In general, "(?: ...
1943    * )" will be faster than "( ... )" so if you don't need the backreference,
1944    * you should group using the former pattern.
1945    * <p>
1946    * It will also introduce new pattern elements that you can't get to
1947    * otherwise, for example if you have a large table of strings, i.e. the
1948    * months of the year "(January|February|...)" optimize() will make a
1949    * Hashtable that takes it to the next appropriate pattern element --
1950    * eliminating the need for a linear search.
1951    *
1952    * @see com.stevesoft.pat.Regex#optimized
1953    * @see com.stevesoft.pat.Regex#ignoreCase
1954    * @see com.stevesoft.pat.Regex#dontMatchInQuotes
1955    * @see com.stevesoft.pat.Regex#matchAt
1956    * @see com.stevesoft.pat.Regex#search
1957    */
 
1958  318 toggle public void optimize()
1959    {
1960  318 if (optimized() || thePattern == null)
1961    {
1962  0 return;
1963    }
1964  318 minMatch = new patInt(0); // thePattern.countMinChars();
1965  318 thePattern = RegOpt.opt(thePattern, ignoreCase, dontMatchInQuotes);
1966  318 skipper = Skip.findSkip(this);
1967    // RegOpt.setParents(this);
1968  318 return;
1969    }
1970   
1971    Skip skipper;
1972   
1973    /**
1974    * This function returns true if the optimize method has been called.
1975    */
 
1976  318 toggle public boolean optimized()
1977    {
1978  318 return minMatch != null;
1979    }
1980   
1981    /**
1982    * A bit of syntactic surgar for those who want to make their code look more
1983    * perl-like. To use this initialize your Regex object by saying:
1984    *
1985    * <pre>
1986    * Regex r1 = Regex.perlCode(&quot;s/hello/goodbye/&quot;);
1987    * Regex r2 = Regex.perlCode(&quot;s'fish'frog'i&quot;);
1988    * Regex r3 = Regex.perlCode(&quot;m'hello');
1989    * </pre>
1990    *
1991    * The i for ignoreCase is supported in this syntax, as well as m, s, and x.
1992    * The g flat is a bit of a special case.
1993    * <p>
1994    * If you wish to replace all occurences of a pattern, you do not put a 'g' in
1995    * the perlCode, but call Regex's replaceAll method.
1996    * <p>
1997    * If you wish to simply and only do a search for r2's pattern, you can do
1998    * this by calling the searchFrom method method repeatedly, or by calling
1999    * search repeatedly if the g flag is set.
2000    * <p>
2001    * Note: Currently perlCode does <em>not</em> support the (?e=#) syntax for
2002    * changing the escape character.
2003    */
2004   
 
2005  110 toggle public static Regex perlCode(String s)
2006    {
2007    // this file is big enough, see parsePerl.java
2008    // for this function.
2009  110 return parsePerl.parse(s);
2010    }
2011   
2012    static final char back_slash = '\\';
2013   
2014    /**
2015    * Checks to see if there are only literal and no special pattern elements in
2016    * this Regex.
2017    */
 
2018  0 toggle public boolean isLiteral()
2019    {
2020  0 Pattern x = thePattern;
2021  0 while (x != null)
2022    {
2023  0 if (x instanceof oneChar)
2024    {
2025  0 ;
2026    }
2027  0 else if (x instanceof Skipped)
2028    {
2029  0 ;
2030    }
2031    else
2032    {
2033  0 return false;
2034    }
2035  0 x = x.next;
2036    }
2037  0 return true;
2038    }
2039   
2040    /**
2041    * You only need to know about this if you are inventing your own pattern
2042    * elements.
2043    */
 
2044  0 toggle public patInt countMinChars()
2045    {
2046  0 return thePattern.countMinChars();
2047    }
2048   
2049    /**
2050    * You only need to know about this if you are inventing your own pattern
2051    * elements.
2052    */
 
2053  0 toggle public patInt countMaxChars()
2054    {
2055  0 return thePattern.countMaxChars();
2056    }
2057   
 
2058  0 toggle boolean isHexDigit(StrPos sp)
2059    {
2060  0 boolean r = !sp.eos && !sp.dontMatch
2061    && ((sp.c >= '0' && sp.c <= '9') || (sp.c >= 'a' && sp.c <= 'f')
2062    || (sp.c >= 'A' && sp.c <= 'F'));
2063  0 return r;
2064    }
2065   
 
2066  17533 toggle boolean isOctalDigit(StrPos sp, boolean first)
2067    {
2068  17533 boolean r = !sp.eos && !(first ^ sp.dontMatch) && sp.c >= '0'
2069    && sp.c <= '7';
2070  17533 return r;
2071    }
2072   
 
2073  0 toggle int getHexDigit(StrPos sp)
2074    {
2075  0 if (sp.c >= '0' && sp.c <= '9')
2076    {
2077  0 return sp.c - '0';
2078    }
2079  0 if (sp.c >= 'a' && sp.c <= 'f')
2080    {
2081  0 return sp.c - 'a' + 10;
2082    }
2083  0 return sp.c - 'A' + 10;
2084    }
2085   
 
2086  0 toggle boolean next2Hex(StrPos sp)
2087    {
2088  0 StrPos sp2 = new StrPos(sp);
2089  0 sp2.inc();
2090  0 if (!isHexDigit(sp2))
2091    {
2092  0 return false;
2093    }
2094  0 sp2.inc();
2095  0 if (!isHexDigit(sp2))
2096    {
2097  0 return false;
2098    }
2099  0 return true;
2100    }
2101   
 
2102  17533 toggle boolean isOctalString(StrPos sp)
2103    {
2104  17533 if (!isOctalDigit(sp, true))
2105    {
2106  17533 return false;
2107    }
2108  0 StrPos sp2 = new StrPos(sp);
2109  0 sp2.inc();
2110  0 if (!isOctalDigit(sp2, false))
2111    {
2112  0 return false;
2113    }
2114  0 return true;
2115    }
2116    }