Class |
Line # |
Actions |
|||
---|---|---|---|---|---|
FastChar | 15 | 3 | 5 | ||
Branch | 40 | 98 | 38 | ||
RegOpt | 272 | 85 | 39 |
1 | // | |
2 | // This software is now distributed according to | |
3 | // the Lesser Gnu Public License. Please see | |
4 | // http://www.gnu.org/copyleft/lesser.txt for | |
5 | // the details. | |
6 | // -- Happy Computing! | |
7 | // | |
8 | package com.stevesoft.pat; | |
9 | ||
10 | import java.util.Enumeration; | |
11 | import java.util.Hashtable; | |
12 | import java.util.Vector; | |
13 | ||
14 | /** This class is just like oneChar, but doesn't worry about case. */ | |
15 | class FastChar extends oneChar | |
16 | { | |
17 | 1111 | FastChar(char c) |
18 | { | |
19 | 1111 | super(c); |
20 | } | |
21 | ||
22 | 160616 | public int matchInternal(int p, Pthings pt) |
23 | { | |
24 | 160616 | return (p < pt.src.length() && pt.src.charAt(p) == c) |
25 | ? nextMatch(p + 1, pt) | |
26 | : -1; | |
27 | } | |
28 | ||
29 | 784 | Pattern clone1(Hashtable h) |
30 | { | |
31 | 784 | return new FastChar(c); |
32 | } | |
33 | } | |
34 | ||
35 | /** | |
36 | * This class is a hashtable keyed by Character Objects. It is used to match | |
37 | * things of the form (?:a..|b..|c..|..) match with greater efficiency -- by | |
38 | * using a Hashtable that indexes into the group of patterns. | |
39 | */ | |
40 | class Branch extends Pattern | |
41 | { | |
42 | Hashtable h = new Hashtable(); | |
43 | ||
44 | // We need to keep track of the order | |
45 | // of the keys -- if we don't then | |
46 | // recompiling the output of toString | |
47 | // may produce errors by re-ordering | |
48 | // ()'s and changing the id number of | |
49 | // the backreference associated with | |
50 | // a subpattern. | |
51 | Vector keys = new Vector(); | |
52 | ||
53 | 784 | Branch() |
54 | { | |
55 | } | |
56 | ||
57 | 196 | Pattern clone1(Hashtable x) |
58 | { | |
59 | 196 | Branch b = new Branch(); |
60 | 196 | b.keys = (Vector) keys.clone(); |
61 | 196 | x.put(this, b); |
62 | 196 | x.put(b, b); |
63 | ||
64 | 588 | for (int i = 0; i < keys.size(); i++) |
65 | { | |
66 | 392 | Pattern p = (Pattern) h.get(keys.elementAt(i)); |
67 | 392 | b.h.put(keys.elementAt(i), p.clone(x)); |
68 | } | |
69 | 196 | return b; |
70 | } | |
71 | ||
72 | // this function eliminates Branches with 0 or 1 elements. | |
73 | 93 | final Pattern reduce(boolean ignoreCase, boolean dontMinQ) |
74 | { | |
75 | 93 | if (h.size() == 1) |
76 | { | |
77 | 0 | Enumeration e = h.keys(); |
78 | 0 | Character c = (Character) e.nextElement(); |
79 | 0 | Pattern oc; |
80 | 0 | if (ignoreCase || dontMinQ) |
81 | { | |
82 | 0 | oc = new oneChar(c.charValue()); |
83 | } | |
84 | else | |
85 | { | |
86 | 0 | oc = new FastChar(c.charValue()); |
87 | } | |
88 | 0 | oc.next = (Pattern) h.get(c); |
89 | 0 | oc.add(next); |
90 | 0 | return oc; |
91 | } | |
92 | 93 | else if (h.size() == 0) |
93 | { | |
94 | 0 | return null; |
95 | } | |
96 | 93 | return this; |
97 | } | |
98 | ||
99 | 183 | public patInt maxChars() |
100 | { | |
101 | 183 | Enumeration e = h.keys(); |
102 | 183 | patInt count = new patInt(0); |
103 | 549 | while (e.hasMoreElements()) |
104 | { | |
105 | 366 | Object key = e.nextElement(); |
106 | 366 | Pattern pa = (Pattern) h.get(key); |
107 | 366 | patInt pi = pa.maxChars(); |
108 | 366 | pi.inc(); |
109 | 366 | count.maxeq(pi); |
110 | } | |
111 | 183 | return count; |
112 | } | |
113 | ||
114 | 183 | public patInt minChars() |
115 | { | |
116 | 183 | Enumeration e = h.keys(); |
117 | 183 | patInt count = new patInt(0); |
118 | 549 | while (e.hasMoreElements()) |
119 | { | |
120 | 366 | Object key = e.nextElement(); |
121 | 366 | Pattern pa = (Pattern) h.get(key); |
122 | 366 | patInt pi = pa.minChars(); |
123 | 366 | pi.inc(); |
124 | 366 | count.mineq(pi); |
125 | } | |
126 | 183 | return count; |
127 | } | |
128 | ||
129 | // adds a oneChar object to this Branch | |
130 | 198 | void addc(oneChar o, boolean ignoreCase, boolean dontMinQ) |
131 | { | |
132 | 198 | Pattern n = o.next; |
133 | 198 | if (n == null) |
134 | { | |
135 | 180 | n = new NullPattern(); |
136 | } | |
137 | else | |
138 | { | |
139 | 18 | n = RegOpt.opt(n, ignoreCase, dontMinQ); |
140 | } | |
141 | 198 | n.setParent(this); |
142 | 198 | set(Character.valueOf(o.c), n, ignoreCase, dontMinQ); |
143 | 198 | if (ignoreCase) |
144 | { | |
145 | 0 | if (o.c != o.altc) |
146 | { | |
147 | 0 | set(Character.valueOf(o.altc), n, ignoreCase, dontMinQ); |
148 | } | |
149 | 0 | if (o.c != o.altc2 && o.altc != o.altc2) |
150 | { | |
151 | 0 | set(Character.valueOf(o.altc2), n, ignoreCase, dontMinQ); |
152 | } | |
153 | } | |
154 | } | |
155 | ||
156 | 198 | void set(Character c, Pattern n, boolean igc, boolean dontMinQ) |
157 | { | |
158 | 198 | Pattern p = (Pattern) h.get(c); |
159 | 198 | next = null; |
160 | // This letter is not yet used in the Branch object. | |
161 | // We need to add it. | |
162 | 198 | if (p == null) |
163 | { | |
164 | 186 | if (n instanceof Or) |
165 | { | |
166 | // A NullPattern is prepended to an Or | |
167 | // to prevent confusing this object. | |
168 | // For example: (boo|bug) => (b(?:oo|ug)) | |
169 | // during this process. However, we | |
170 | // want (b(?:oo|ell)|bug) | |
171 | 6 | NullPattern np = new NullPattern(); |
172 | 6 | np.add(n); |
173 | 6 | h.put(c, np); |
174 | } | |
175 | else | |
176 | { | |
177 | 180 | h.put(c, n); |
178 | } | |
179 | // Make sure we remember the order things were | |
180 | // added into the Branch object so that we can | |
181 | // properly convert it to a String. | |
182 | 186 | keys.addElement(c); |
183 | } | |
184 | 12 | else if (p instanceof Or) |
185 | { | |
186 | 9 | ((Or) p).addOr(n); |
187 | } | |
188 | 3 | else if (p instanceof oneChar && n instanceof oneChar |
189 | && ((oneChar) p).c != ((oneChar) n).c) | |
190 | { | |
191 | 0 | Branch b = new Branch(); |
192 | 0 | b.addc((oneChar) p, igc, dontMinQ); |
193 | 0 | b.addc((oneChar) n, igc, dontMinQ); |
194 | 0 | h.put(c, b); |
195 | 0 | b.setParent(this); |
196 | } | |
197 | 3 | else if (p instanceof Branch && n instanceof oneChar) |
198 | { | |
199 | 0 | ((Branch) p).addc((oneChar) n, igc, dontMinQ); |
200 | 0 | n.setParent(p); |
201 | } | |
202 | else | |
203 | { | |
204 | // Create an Or object to receive the variety | |
205 | // of branches in the pattern if the current letter | |
206 | // is matched. We do not attempt to make these | |
207 | // sub-branches into a Branch object yet. | |
208 | 3 | Or o = new Or(); |
209 | 3 | o.setParent(this); |
210 | ||
211 | // Remove NullPattern from p -- it's no longer needed. | |
212 | 3 | if (p instanceof NullPattern && p.parent == null && p.next != null) |
213 | { | |
214 | 3 | o.addOr(p.next); |
215 | } | |
216 | else | |
217 | { | |
218 | 0 | o.addOr(p); |
219 | } | |
220 | 3 | o.addOr(n); |
221 | ||
222 | 3 | Pattern optpat = RegOpt.opt(o, igc, dontMinQ); |
223 | 3 | h.put(c, optpat); |
224 | 3 | optpat.setParent(this); |
225 | } | |
226 | } | |
227 | ||
228 | 0 | public String toString() |
229 | { | |
230 | 0 | StringBuffer sb = new StringBuffer(); |
231 | // should protect this... | |
232 | 0 | sb.append("(?:(?#branch)"); // Hashtable)"); |
233 | 0 | for (int i = 0; i < keys.size(); i++) |
234 | { | |
235 | 0 | Character c = (Character) keys.elementAt(i); |
236 | 0 | sb.append(c); |
237 | 0 | sb.append(h.get(c)); |
238 | 0 | if (i + 1 < keys.size()) |
239 | { | |
240 | 0 | sb.append("|"); |
241 | } | |
242 | } | |
243 | 0 | sb.append(")"); |
244 | 0 | sb.append(nextString()); |
245 | 0 | return sb.toString(); |
246 | } | |
247 | ||
248 | 387 | public int matchInternal(int pos, Pthings pt) |
249 | { | |
250 | 387 | if (pos >= pt.src.length()) |
251 | { | |
252 | 199 | return -1; |
253 | } | |
254 | 188 | Pattern n = (Pattern) h.get(Character.valueOf(pt.src.charAt(pos))); |
255 | 188 | if (n == null) |
256 | { | |
257 | 188 | return -1; |
258 | } | |
259 | 0 | if (pt.cbits != null && pt.cbits.get(pos)) |
260 | { | |
261 | 0 | return -1; |
262 | } | |
263 | 0 | return n.matchInternal(pos + 1, pt); |
264 | } | |
265 | } | |
266 | ||
267 | /** | |
268 | * This is just a place to put the optimizing function. It is never instantiated | |
269 | * as an Object. It just sorts through the RegOpt looking for things it can | |
270 | * change and make faster. | |
271 | */ | |
272 | public class RegOpt | |
273 | { | |
274 | 2280 | static Pattern opt(Pattern p, boolean ignoreCase, boolean dontMinQ) |
275 | { | |
276 | 2280 | if (p == null) |
277 | { | |
278 | 0 | return p; |
279 | } | |
280 | 2280 | if (p instanceof Bracket) |
281 | { | |
282 | 519 | Bracket b = (Bracket) p; |
283 | // FastBracket is the only special | |
284 | // optimized class to have its own | |
285 | // source file. | |
286 | 519 | p = FastBracket.process(b, ignoreCase); |
287 | // if(!(p instanceof FastBracket) | |
288 | // p = Switch.process(b,ignoreCase); | |
289 | 519 | p.next = b.next; |
290 | 519 | p.parent = b.parent; |
291 | } | |
292 | 1761 | else if (p instanceof oneChar && !ignoreCase && !dontMinQ) |
293 | { | |
294 | 327 | oneChar o = (oneChar) p; |
295 | 327 | p = new FastChar(o.c); |
296 | 327 | p.next = o.next; |
297 | 327 | p.parent = o.parent; |
298 | } | |
299 | 1434 | else if (p instanceof Or && ((Or) p).leftForm().equals("(?:") |
300 | && ((Or) p).v.size() == 1) | |
301 | { // Eliminate this Or Object. | |
302 | 0 | Or o = (Or) p; |
303 | 0 | p = (Pattern) o.v.elementAt(0); |
304 | 0 | p.setParent(null); |
305 | 0 | p = RegOpt.opt(p, ignoreCase, dontMinQ); |
306 | 0 | p.add(o.next); |
307 | } | |
308 | 1434 | else if (p instanceof Or) |
309 | { | |
310 | 588 | Or o = (Or) p; |
311 | 588 | o.pv = null; |
312 | 588 | Vector v = o.v; |
313 | 588 | o.v = new Vector(); |
314 | 588 | Branch b = new Branch(); |
315 | 588 | b.parent = o.parent; |
316 | 1296 | for (int i = 0; i < v.size(); i++) |
317 | { | |
318 | 708 | Pattern pp = (Pattern) v.elementAt(i); |
319 | // We want to have at least two oneChar's in | |
320 | // the Or Object to consider making a Branch. | |
321 | 708 | if (pp instanceof oneChar && (b.h.size() >= 1 || (i + 1 < v.size() |
322 | && v.elementAt(i + 1) instanceof oneChar))) | |
323 | { | |
324 | 198 | b.addc((oneChar) pp, ignoreCase, dontMinQ); |
325 | } | |
326 | else | |
327 | { | |
328 | 510 | if (b.keys.size() > 0) |
329 | { | |
330 | 0 | Pattern p2 = (Pattern) b.reduce(ignoreCase, dontMinQ); |
331 | 0 | if (p2 != null) |
332 | { | |
333 | 0 | o.addOr(p2); |
334 | 0 | b = new Branch(); |
335 | 0 | b.parent = o.parent; |
336 | } | |
337 | } | |
338 | 510 | o.addOr(opt(pp, ignoreCase, dontMinQ)); |
339 | } | |
340 | } | |
341 | 588 | if (b.keys.size() > 0) |
342 | { | |
343 | 93 | Pattern p2 = (Pattern) b.reduce(ignoreCase, dontMinQ); |
344 | 93 | if (p2 != null) |
345 | { | |
346 | 93 | o.addOr(p2); |
347 | } | |
348 | } | |
349 | 588 | if (o.v.size() == 1 && o.leftForm().equals("(?:")) |
350 | { // Eliminate Or Object | |
351 | 3 | p = (Pattern) o.v.elementAt(0); |
352 | 3 | p.setParent(null); |
353 | 3 | p = RegOpt.opt(p, ignoreCase, dontMinQ); |
354 | 3 | p.add(o.next); |
355 | } | |
356 | } | |
357 | 846 | else if (p instanceof FastMulti) |
358 | { | |
359 | 606 | PatternSub ps = (PatternSub) p; |
360 | 606 | ps.sub = RegOpt.opt(ps.sub, ignoreCase, dontMinQ); |
361 | } | |
362 | 240 | else if (p instanceof Multi && safe4fm(((PatternSub) p).sub)) |
363 | { | |
364 | 0 | Multi m = (Multi) p; |
365 | 0 | FastMulti fm = null; |
366 | 0 | try |
367 | { | |
368 | 0 | fm = new FastMulti(m.a, m.b, opt(m.sub, ignoreCase, dontMinQ)); |
369 | } catch (RegSyntax rs) | |
370 | { | |
371 | } | |
372 | 0 | fm.parent = m.parent; |
373 | 0 | fm.matchFewest = m.matchFewest; |
374 | 0 | fm.next = m.next; |
375 | 0 | p = fm; |
376 | } | |
377 | 2280 | if (p.next != null) |
378 | { | |
379 | 822 | p.next = opt(p.next, ignoreCase, dontMinQ); |
380 | } | |
381 | 2280 | return p; |
382 | } | |
383 | ||
384 | 3847 | final static boolean safe4fm(Pattern x) |
385 | { | |
386 | 7741 | while (x != null) |
387 | { | |
388 | 3902 | if (x instanceof Bracket) |
389 | { | |
390 | 3702 | ; |
391 | } | |
392 | 200 | else if (x instanceof Range) |
393 | { | |
394 | 110 | ; |
395 | } | |
396 | 90 | else if (x instanceof oneChar) |
397 | { | |
398 | 71 | ; |
399 | } | |
400 | 19 | else if (x instanceof Any) |
401 | { | |
402 | 0 | ; |
403 | } | |
404 | 19 | else if (x instanceof Custom |
405 | && ((Custom) x).v instanceof UniValidator) | |
406 | { | |
407 | 0 | ; |
408 | } | |
409 | 19 | else if (x instanceof Or) |
410 | { | |
411 | 19 | Or o = (Or) x; |
412 | 19 | if (!o.leftForm().equals("(?:")) |
413 | { | |
414 | 8 | return false; |
415 | } | |
416 | 11 | patInt lo = o.countMinChars(); |
417 | 11 | patInt hi = o.countMaxChars(); |
418 | 11 | if (!lo.equals(hi)) |
419 | { | |
420 | 0 | return false; |
421 | } | |
422 | 22 | for (int i = 0; i < o.v.size(); i++) |
423 | { | |
424 | 11 | if (!safe4fm((Pattern) o.v.elementAt(i))) |
425 | { | |
426 | 0 | return false; |
427 | } | |
428 | } | |
429 | } | |
430 | else | |
431 | { | |
432 | 0 | return false; |
433 | } | |
434 | 3894 | x = x.next; |
435 | } | |
436 | 3839 | return true; |
437 | } | |
438 | /* | |
439 | * public static void setParents(Regex r) { setParents(r.thePattern,null); } | |
440 | * static void setParents(Pattern p,Pattern x) { if(p instanceof PatternSub && | |
441 | * !(p instanceof FastMulti) && !(p instanceof DotMulti)) RegOpt.setParents( | |
442 | * ((PatternSub)p).sub, p); else if(p instanceof Or && !(p instanceof | |
443 | * Bracket)) { Or o = (Or)p; for(int i=0;i<o.v.size();i++) | |
444 | * RegOpt.setParents((Pattern)o.v.elementAt(i),o); } else if(p instanceof | |
445 | * Branch) { Branch b = (Branch)p; Enumeration e = b.h.keys(); | |
446 | * while(e.hasMoreElements()) { Object o = e.nextElement(); RegOpt.setParents( | |
447 | * (Pattern)b.h.get(o), b); } } if(p.next == null) p.parent = x; else { | |
448 | * p.parent = null; RegOpt.setParents(p.next,x); } } | |
449 | */ | |
450 | } |