Class |
Line # |
Actions |
|||
---|---|---|---|---|---|
parsePerl | 16 | 192 | 86 |
1 | // | |
2 | // This software is now distributed according to | |
3 | // the Lesser Gnu Public License. Please see | |
4 | // http://www.gnu.org/copyleft/lesser.txt for | |
5 | // the details. | |
6 | // -- Happy Computing! | |
7 | // | |
8 | package com.stevesoft.pat; | |
9 | ||
10 | /** | |
11 | * This class provides a method for parsing the "s/.../.../" constructs of | |
12 | * Regex.perlCode. | |
13 | * | |
14 | * @see Regex#perlCode | |
15 | */ | |
16 | class parsePerl | |
17 | { | |
18 | 98 | final static char close(char c) |
19 | { | |
20 | // This switch statement does not behave | |
21 | // properly when compiled with jdk1.1.5 | |
22 | // and the -O flag. | |
23 | /* | |
24 | * switch(c) { case '[': return ']'; case '(': return ')'; case '{': return | |
25 | * '}'; } return c; | |
26 | */ | |
27 | 98 | if (c == '<') |
28 | { | |
29 | 0 | return '>'; |
30 | } | |
31 | 98 | if (c == '[') |
32 | { | |
33 | 0 | return ']'; |
34 | } | |
35 | 98 | if (c == '(') |
36 | { | |
37 | 0 | return ')'; |
38 | } | |
39 | 98 | if (c == '{') |
40 | { | |
41 | 0 | return '}'; |
42 | } | |
43 | 98 | return c; |
44 | } | |
45 | ||
46 | 2226 | final public static String codify(String s, boolean keepbs) |
47 | { | |
48 | 2226 | return codify(s, 0, s.length(), keepbs); |
49 | } | |
50 | ||
51 | 2226 | final public static String codify(String s, int i0, int iN, |
52 | boolean keepbs) | |
53 | { | |
54 | 2226 | StringBuffer sb = new StringBuffer(); |
55 | 2226 | boolean ucmode = false, lcmode = false, litmode = false; |
56 | 2226 | boolean uc1 = false, lc1 = false; |
57 | 2226 | boolean modified = false; |
58 | 35054 | for (int i = i0; i < iN; i++) |
59 | { | |
60 | 32828 | char c = s.charAt(i); |
61 | 32828 | boolean mf = true, app = true; |
62 | 32828 | if (c == '\\') |
63 | { | |
64 | 3021 | app = false; |
65 | 3021 | i++; |
66 | 3021 | if (i < s.length()) |
67 | { | |
68 | 3021 | char c2 = s.charAt(i); |
69 | 3021 | switch (c2) |
70 | { | |
71 | 0 | case 'Q': |
72 | 0 | litmode = true; |
73 | 0 | break; |
74 | 0 | case 'U': |
75 | 0 | ucmode = true; |
76 | 0 | break; |
77 | 0 | case 'L': |
78 | 0 | lcmode = true; |
79 | 0 | break; |
80 | 0 | case 'u': |
81 | 0 | uc1 = true; |
82 | 0 | break; |
83 | 0 | case 'l': |
84 | 0 | lc1 = true; |
85 | 0 | break; |
86 | 0 | case 'E': |
87 | 0 | uc1 = lc1 = ucmode = lcmode = litmode = false; |
88 | 0 | break; |
89 | 3021 | default: |
90 | 3021 | if (keepbs) |
91 | { | |
92 | 3021 | sb.append('\\'); |
93 | } | |
94 | 3021 | c = c2; |
95 | 3021 | if (keepbs) |
96 | { | |
97 | 3021 | mf = false; |
98 | } | |
99 | 3021 | app = true; |
100 | 3021 | break; |
101 | } | |
102 | 3021 | modified |= mf; |
103 | } | |
104 | } | |
105 | 32828 | if (app) |
106 | { | |
107 | 32828 | if (lc1) |
108 | { | |
109 | 0 | c = lc(c); |
110 | 0 | lc1 = false; |
111 | } | |
112 | 32828 | else if (uc1) |
113 | { | |
114 | 0 | c = uc(c); |
115 | 0 | uc1 = false; |
116 | } | |
117 | 32828 | else if (ucmode) |
118 | { | |
119 | 0 | c = uc(c); |
120 | } | |
121 | 32828 | else if (lcmode) |
122 | { | |
123 | 0 | c = lc(c); |
124 | } | |
125 | 32828 | if (litmode && needbs(c)) |
126 | { | |
127 | 0 | sb.append('\\'); |
128 | } | |
129 | 32828 | sb.append(c); |
130 | } | |
131 | } | |
132 | 2226 | return modified ? sb.toString() : s; |
133 | } | |
134 | ||
135 | 0 | final static char uc(char c) |
136 | { | |
137 | 0 | return CaseMgr.toUpperCase(c); |
138 | } | |
139 | ||
140 | 0 | final static char lc(char c) |
141 | { | |
142 | 0 | return CaseMgr.toLowerCase(c); |
143 | } | |
144 | ||
145 | 0 | final static boolean needbs(char c) |
146 | { | |
147 | 0 | if (c >= 'a' && c <= 'z') |
148 | { | |
149 | 0 | return false; |
150 | } | |
151 | 0 | if (c >= 'A' && c <= 'Z') |
152 | { | |
153 | 0 | return false; |
154 | } | |
155 | 0 | if (c >= '0' && c <= '9') |
156 | { | |
157 | 0 | return false; |
158 | } | |
159 | 0 | if (c == '_') |
160 | { | |
161 | 0 | return false; |
162 | } | |
163 | 0 | return true; |
164 | } | |
165 | ||
166 | 110 | final static Regex parse(String s) |
167 | { | |
168 | 110 | boolean igncase = false, optim = false, gFlag = false; |
169 | 110 | boolean sFlag = false, mFlag = false, xFlag = false; |
170 | ||
171 | 110 | StringBuffer s1 = new StringBuffer(); |
172 | 110 | StringBuffer s2 = new StringBuffer(); |
173 | 110 | int i = 0, count = 0; |
174 | 110 | char mode, delim = '/', cdelim = '/'; |
175 | 110 | if (s.length() >= 3 && s.charAt(0) == 's') |
176 | { | |
177 | 68 | mode = 's'; |
178 | 68 | delim = s.charAt(1); |
179 | 68 | cdelim = close(delim); |
180 | 68 | i = 2; |
181 | } | |
182 | 42 | else if (s.length() >= 2 && s.charAt(0) == 'm') |
183 | { | |
184 | 30 | mode = 'm'; |
185 | 30 | delim = s.charAt(1); |
186 | 30 | cdelim = close(delim); |
187 | 30 | i = 2; |
188 | } | |
189 | 12 | else if (s.length() >= 1 && s.charAt(0) == '/') |
190 | { | |
191 | 12 | mode = 'm'; |
192 | 12 | i = 1; |
193 | } | |
194 | else | |
195 | { | |
196 | 0 | try |
197 | { | |
198 | 0 | RegSyntaxError.endItAll("Regex.perlCode should be of the " |
199 | + "form s/// or m// or //"); | |
200 | } catch (RegSyntax rs) | |
201 | { | |
202 | } | |
203 | 0 | return null; |
204 | } | |
205 | 848 | for (; i < s.length(); i++) |
206 | { | |
207 | 848 | if (s.charAt(i) == '\\') |
208 | { | |
209 | 82 | s1.append('\\'); |
210 | 82 | i++; |
211 | } | |
212 | 766 | else if (s.charAt(i) == cdelim && count == 0) |
213 | { | |
214 | 110 | i++; |
215 | 110 | break; |
216 | } | |
217 | 656 | else if (s.charAt(i) == delim && cdelim != delim) |
218 | { | |
219 | 0 | count++; |
220 | } | |
221 | 656 | else if (s.charAt(i) == cdelim && cdelim != delim) |
222 | { | |
223 | 0 | count--; |
224 | } | |
225 | 738 | s1.append(s.charAt(i)); |
226 | } | |
227 | 110 | if (mode == 's' && cdelim != delim) |
228 | { | |
229 | 0 | while (i < s.length() && Prop.isWhite(s.charAt(i))) |
230 | { | |
231 | 0 | i++; |
232 | } | |
233 | 0 | if (i >= s.length()) |
234 | { | |
235 | 0 | try |
236 | { | |
237 | 0 | RegSyntaxError.endItAll("" + mode + delim + " needs " + cdelim); |
238 | } catch (RegSyntax rs) | |
239 | { | |
240 | } | |
241 | 0 | return null; |
242 | } | |
243 | 0 | cdelim = close(delim = s.charAt(i)); |
244 | 0 | i++; |
245 | } | |
246 | 110 | count = 0; |
247 | 110 | if (mode == 's') |
248 | { | |
249 | 166 | for (; i < s.length(); i++) |
250 | { | |
251 | 166 | if (s.charAt(i) == '\\') |
252 | { | |
253 | 0 | s2.append('\\'); |
254 | 0 | i++; |
255 | } | |
256 | 166 | else if (s.charAt(i) == cdelim && count == 0) |
257 | { | |
258 | 68 | i++; |
259 | 68 | break; |
260 | } | |
261 | 98 | else if (s.charAt(i) == delim && cdelim != delim) |
262 | { | |
263 | 0 | count++; |
264 | } | |
265 | 98 | else if (s.charAt(i) == cdelim && cdelim != delim) |
266 | { | |
267 | 0 | count--; |
268 | } | |
269 | 98 | s2.append(s.charAt(i)); |
270 | } | |
271 | } | |
272 | 110 | for (; i < s.length(); i++) |
273 | { | |
274 | 0 | char c = s.charAt(i); |
275 | 0 | switch (c) |
276 | { | |
277 | 0 | case 'x': |
278 | 0 | xFlag = true; |
279 | 0 | break; |
280 | 0 | case 'i': |
281 | 0 | igncase = true; |
282 | 0 | break; |
283 | 0 | case 'o': |
284 | 0 | optim = true; |
285 | 0 | break; |
286 | 0 | case 's': |
287 | 0 | sFlag = true; |
288 | 0 | break; |
289 | 0 | case 'm': |
290 | 0 | mFlag = true; |
291 | 0 | break; |
292 | 0 | case 'g': |
293 | 0 | gFlag = true; |
294 | 0 | break; |
295 | 0 | default: |
296 | ||
297 | // syntax error! | |
298 | 0 | try |
299 | { | |
300 | 0 | RegSyntaxError.endItAll("Illegal flag to pattern: " + c); |
301 | } catch (RegSyntax rs) | |
302 | { | |
303 | } | |
304 | 0 | return null; |
305 | } | |
306 | } | |
307 | 110 | Regex r = new Regex(); |
308 | 110 | try |
309 | { | |
310 | 110 | String pat = s1.toString(), reprul = s2.toString(); |
311 | 110 | if (xFlag) |
312 | { | |
313 | 0 | pat = strip(pat); |
314 | 0 | reprul = strip(reprul); |
315 | } | |
316 | 110 | r.compile(pat); |
317 | 109 | r.ignoreCase |= igncase; |
318 | 109 | r.gFlag |= gFlag; |
319 | 109 | r.sFlag |= sFlag; |
320 | 109 | r.mFlag |= mFlag; |
321 | 109 | if (optim) |
322 | { | |
323 | 0 | r.optimize(); |
324 | } | |
325 | 109 | if (delim == '\'') |
326 | { | |
327 | 0 | r.setReplaceRule(new StringRule(reprul)); |
328 | } | |
329 | else | |
330 | { | |
331 | 109 | r.setReplaceRule(ReplaceRule.perlCode(reprul)); |
332 | } | |
333 | } catch (RegSyntax rs) | |
334 | { | |
335 | 1 | r = null; |
336 | } | |
337 | 110 | return r; |
338 | } | |
339 | ||
340 | 0 | static String strip(String s) |
341 | { | |
342 | 0 | StringBuffer sb = new StringBuffer(); |
343 | 0 | for (int i = 0; i < s.length(); i++) |
344 | { | |
345 | 0 | char c = s.charAt(i); |
346 | 0 | if (Prop.isWhite(c)) |
347 | { | |
348 | 0 | ; |
349 | } | |
350 | 0 | else if (c == '#') |
351 | { | |
352 | 0 | i++; |
353 | 0 | while (i < s.length()) |
354 | { | |
355 | 0 | if (s.charAt(i) == '\n') |
356 | { | |
357 | 0 | break; |
358 | } | |
359 | 0 | i++; |
360 | } | |
361 | } | |
362 | 0 | else if (c == '\\') |
363 | { | |
364 | 0 | sb.append(c); |
365 | 0 | sb.append(s.charAt(++i)); |
366 | } | |
367 | else | |
368 | { | |
369 | 0 | sb.append(c); |
370 | } | |
371 | } | |
372 | 0 | return sb.toString(); |
373 | } | |
374 | } |