Class |
Line # |
Actions |
|||
---|---|---|---|---|---|
FastBracket | 18 | 101 | 43 |
1 | // | |
2 | // This software is now distributed according to | |
3 | // the Lesser Gnu Public License. Please see | |
4 | // http://www.gnu.org/copyleft/lesser.txt for | |
5 | // the details. | |
6 | // -- Happy Computing! | |
7 | // | |
8 | package com.stevesoft.pat; | |
9 | ||
10 | import java.util.BitSet; | |
11 | import java.util.Vector; | |
12 | ||
13 | /** | |
14 | * Uses table lookup to match [] type constructs, but only if it can use a | |
15 | * lookup table 256 bits in size. It is impractical to make a table if it is too | |
16 | * large. | |
17 | */ | |
18 | public class FastBracket extends Bracket | |
19 | { | |
20 | int min, max; | |
21 | ||
22 | BitSet bs; | |
23 | ||
24 | 513 | FastBracket(boolean n) |
25 | { | |
26 | 513 | super(n); |
27 | } | |
28 | ||
29 | // This routine can optimize a bracket, possibly | |
30 | // it will replace it with a FastBracket. | |
31 | 519 | static Bracket process(Bracket b, boolean ignc) |
32 | { | |
33 | 519 | Vector v = b.v; |
34 | 519 | b.pv = null; |
35 | 519 | try |
36 | { | |
37 | ||
38 | // Expand out the vector to make separate | |
39 | // entries for other cases if ignoreCase is | |
40 | // turned on. | |
41 | 519 | Vector nv = v; |
42 | 519 | if (ignc) |
43 | { | |
44 | 0 | nv = new Vector(); |
45 | 0 | for (int i = 0; i < v.size(); i++) |
46 | { | |
47 | 0 | Pattern p = (Pattern) v.elementAt(i); |
48 | 0 | nv.addElement(p); |
49 | 0 | if (p instanceof oneChar) |
50 | { | |
51 | 0 | oneChar oc = (oneChar) p; |
52 | 0 | nv.addElement(new oneChar(oc.altc)); |
53 | } | |
54 | 0 | else if (p instanceof Range) |
55 | { | |
56 | 0 | Range ra = (Range) p; |
57 | 0 | nv.addElement(new Range(ra.altlo, ra.althi)); |
58 | } | |
59 | } | |
60 | } | |
61 | 519 | v = nv; |
62 | ||
63 | // Bubble sort, make sure elements | |
64 | // are in order. This will allow us | |
65 | // to merge them. | |
66 | 1611 | for (int i = 0; i < v.size() - 1; i++) |
67 | { | |
68 | 3516 | for (int j = 0; j < v.size() - 1; j++) |
69 | { | |
70 | 2424 | char c1 = getl(v.elementAt(j)); |
71 | 2424 | char c2 = getl(v.elementAt(j + 1)); |
72 | 2424 | if (c2 < c1) |
73 | { | |
74 | 1137 | Object o = v.elementAt(j); |
75 | 1137 | v.setElementAt(v.elementAt(j + 1), j); |
76 | 1137 | v.setElementAt(o, j + 1); |
77 | } | |
78 | } | |
79 | } | |
80 | ||
81 | 519 | nv = new Vector(); |
82 | // merge -- remove overlaps | |
83 | 519 | Pattern p = (Pattern) v.elementAt(0); |
84 | 519 | nv.addElement(p); |
85 | 1611 | for (int i = 1; i < v.size(); i++) |
86 | { | |
87 | 1092 | if (geth(p) + 1 >= getl(v.elementAt(i))) |
88 | { | |
89 | 54 | Pattern p2 = (Pattern) v.elementAt(i); |
90 | 54 | char lo = min(getl(p), getl(p2)); |
91 | 54 | char hi = max(geth(p), geth(p2)); |
92 | 54 | nv.setElementAt(p = mkelem(lo, hi), nv.size() - 1); |
93 | } | |
94 | else | |
95 | { | |
96 | 1038 | p = (Pattern) v.elementAt(i); |
97 | 1038 | nv.addElement(p); |
98 | } | |
99 | } | |
100 | ||
101 | 519 | b.v = v = nv; |
102 | } catch (RegSyntax e) | |
103 | { | |
104 | 0 | e.printStackTrace(); |
105 | } | |
106 | ||
107 | // We don't want these things to be empty. | |
108 | 519 | Vector negv = neg(v); |
109 | 519 | if (v.size() == 1) |
110 | { | |
111 | 6 | return b; |
112 | } | |
113 | 513 | if (negv.size() == 1) |
114 | { | |
115 | 0 | b.v = negv; |
116 | 0 | b.neg = !b.neg; |
117 | 0 | return b; |
118 | } | |
119 | ||
120 | // Now consider if we can make a FastBracket. | |
121 | // Uses a BitSet to do a lookup. | |
122 | 513 | FastBracket fb = newbrack(v, b.neg); |
123 | 513 | if (fb == null) |
124 | { | |
125 | 0 | fb = newbrack(negv, !b.neg); |
126 | } | |
127 | 513 | if (fb != null) |
128 | { | |
129 | 513 | fb.parent = b.parent; |
130 | 513 | fb.next = b.next; |
131 | 513 | return fb; |
132 | } | |
133 | ||
134 | // return the normal Bracket. | |
135 | 0 | return b; |
136 | } | |
137 | ||
138 | // Build a FastBracket and set bits. If this can't | |
139 | // be done, return null. | |
140 | 513 | final static FastBracket newbrack(Vector v, boolean neg) |
141 | { | |
142 | 513 | FastBracket fb = new FastBracket(neg); |
143 | 513 | fb.v = v; |
144 | 513 | if (v.size() == 0) |
145 | { | |
146 | 0 | return null; |
147 | } | |
148 | 513 | fb.min = getl(v.elementAt(0)); |
149 | 513 | fb.max = geth(v.elementAt(v.size() - 1)); |
150 | 513 | if (fb.max - fb.min <= 256) |
151 | { | |
152 | 513 | fb.bs = new BitSet(fb.max - fb.min + 1); |
153 | 2064 | for (int i = 0; i < v.size(); i++) |
154 | { | |
155 | 1551 | Object o = v.elementAt(i); |
156 | 1551 | int min0 = getl(o) - fb.min; |
157 | 1551 | int max0 = geth(o) - fb.min; |
158 | 4296 | for (int j = min0; j <= max0; j++) |
159 | { | |
160 | 2745 | fb.bs.set(j); |
161 | } | |
162 | } | |
163 | 513 | return fb; |
164 | } | |
165 | 0 | return null; |
166 | } | |
167 | ||
168 | // Negate a sorted Vector. Applying this | |
169 | // operation twice should yield the same Vector | |
170 | // back. | |
171 | 519 | final static Vector neg(Vector v) |
172 | { | |
173 | 519 | try |
174 | { | |
175 | 519 | Vector nv = new Vector(); |
176 | 519 | if (v.size() == 0) |
177 | { | |
178 | 0 | nv.addElement(new Range((char) 0, (char) 65535)); |
179 | 0 | return nv; |
180 | } | |
181 | 519 | int p0 = getl(v.elementAt(0)); |
182 | 519 | if (p0 != 0) |
183 | { | |
184 | 513 | nv.addElement(mkelem((char) 0, (char) (p0 - 1))); |
185 | } | |
186 | 1557 | for (int i = 0; i < v.size() - 1; i++) |
187 | { | |
188 | 1038 | int hi = getl(v.elementAt(i + 1)) - 1; |
189 | 1038 | int lo = geth(v.elementAt(i)) + 1; |
190 | 1038 | nv.addElement(mkelem((char) lo, (char) hi)); |
191 | } | |
192 | 519 | int pN = geth(v.lastElement()); |
193 | 519 | if (pN != 65535) |
194 | { | |
195 | 513 | nv.addElement(mkelem((char) (pN + 1), (char) 65535)); |
196 | } | |
197 | 519 | return nv; |
198 | } catch (RegSyntax rs) | |
199 | { | |
200 | 0 | return null; |
201 | } | |
202 | } | |
203 | ||
204 | // Make either a Range or oneChar Object, depending on which | |
205 | // is appropriate. | |
206 | 2118 | final static Pattern mkelem(char lo, char hi) throws RegSyntax |
207 | { | |
208 | 2118 | return lo == hi ? (Pattern) (new oneChar(lo)) |
209 | : (Pattern) (new Range(lo, hi)); | |
210 | } | |
211 | ||
212 | 54 | static final char min(char a, char b) |
213 | { | |
214 | 54 | return a < b ? a : b; |
215 | } | |
216 | ||
217 | 54 | static final char max(char a, char b) |
218 | { | |
219 | 54 | return a > b ? a : b; |
220 | } | |
221 | ||
222 | // getl -- get lower value of Range object, | |
223 | // or get value of oneChar object. | |
224 | 9669 | final static char getl(Object o) |
225 | { | |
226 | 9669 | Pattern p = (Pattern) o; |
227 | 9669 | if (p instanceof Range) |
228 | { | |
229 | 2541 | return ((Range) p).lo; |
230 | } | |
231 | 7128 | return ((oneChar) p).c; |
232 | } | |
233 | ||
234 | // geth -- get higher value of Range object, | |
235 | // or get value of oneChar object. | |
236 | 4821 | final static char geth(Object o) |
237 | { | |
238 | 4821 | Pattern p = (Pattern) o; |
239 | 4821 | if (p instanceof Range) |
240 | { | |
241 | 1590 | return ((Range) p).hi; |
242 | } | |
243 | 3231 | return ((oneChar) p).c; |
244 | } | |
245 | ||
246 | // This is the easy part! | |
247 | 214481 | public int matchInternal(int pos, Pthings pt) |
248 | { | |
249 | 214481 | if (pos >= pt.src.length() || Masked(pos, pt)) |
250 | { | |
251 | 4959 | return -1; |
252 | } | |
253 | 209522 | char c = pt.src.charAt(pos); |
254 | 209522 | return (neg ^ (c >= min && c <= max && bs.get(c - min))) |
255 | ? nextMatch(pos + 1, pt) | |
256 | : -1; | |
257 | } | |
258 | } |