1. Project Clover database Fri Dec 6 2024 13:47:14 GMT
  2. Package com.stevesoft.pat

File SkipBMH.java

 

Coverage histogram

../../../img/srcFileCovDistChart0.png
60% of files have more coverage

Code metrics

72
91
10
1
279
212
46
0.51
9.1
10
4.6

Classes

Class
Line #
Actions
SkipBMH 26 91 46
0.00%
 

Contributing tests

No tests hitting this source file were found.

Source view

1    //
2    // This software is now distributed according to
3    // the Lesser Gnu Public License. Please see
4    // http://www.gnu.org/copyleft/lesser.txt for
5    // the details.
6    // -- Happy Computing!
7    //
8    package com.stevesoft.pat;
9   
10    import com.stevesoft.pat.wrap.StringWrap;
11   
12    /**
13    * Like Skip, but implements a
14    * <a href="http://www.dcc.uchile.cl/~rbaeza/handbook/algs/7/713b.srch.p.html">
15    * Boyer-Moore-Horspool</a> type search method that has been modified to be more
16    * like a "T-search" (see the Michael Tamm''s article in <i>C'T, magazin fuer
17    * computer und technic</i>, August 97 p 292). Yet another important source of
18    * information for me was the
19    * <a href="http://www.go2net.com/people/paulp/deep/1997/05/14/"> Deep Magic</a>
20    * article on string searching. As of this writing, I can beat String's indexOf
21    * method in many cases.
22    *
23    * @see com.stevesoft.pat.Skip
24    * @see com.stevesoft.pat.Skip2
25    */
 
26    public class SkipBMH extends Skip
27    {
28    // This number could be 256, but I think it's
29    // big enough. Note, it must be a power of 2.
30    final int MAX_CHAR = 64;
31   
32    final char[] skip = new char[MAX_CHAR];
33   
34    int sm1;
35   
36    int jump_ahead = 0;
37   
38    char uc, lc, tc, x;
39   
 
40  0 toggle final boolean exact(char c)
41    {
42  0 return (ign && anyc(c)) || c == x;
43    }
44   
 
45  0 toggle final boolean anyc(char c)
46    {
47  0 return c == uc || c == lc || c == tc;
48    }
49   
 
50  0 toggle public SkipBMH(String pt, boolean ign)
51    {
52  0 this(pt, ign, 0);
53    }
54   
 
55  0 toggle public SkipBMH(String pt)
56    {
57  0 this(pt, false, 0);
58    }
59   
 
60  0 toggle public SkipBMH(String pt, boolean ign, int offset)
61    {
62  0 super(pt, ign, offset);
63  0 for (int k = 0; k < MAX_CHAR; k++)
64    {
65  0 skip[k] = (char) src.length();
66    }
67   
68  0 sm1 = src.length() - 1;
69  0 x = src.charAt(sm1);
70  0 uc = CaseMgr.toUpperCase(x);
71  0 lc = CaseMgr.toLowerCase(x);
72  0 tc = CaseMgr.toTitleCase(x);
73   
74    // We don't really want 65536 long arrays in skip[],
75    // so we mask of the higher bits. This can be combined
76    // with ignore case, so accounting for upper
77    // case costs us nothing extra.
78  0 for (int k = 0; k < src.length() - 1; k++)
79    {
80  0 char x_ = src.charAt(k);
81  0 if (ign)
82    {
83  0 char uc_ = CaseMgr.toUpperCase(x_);
84  0 char lc_ = CaseMgr.toLowerCase(x_);
85  0 char tc_ = CaseMgr.toTitleCase(x_);
86  0 skip[uc_ & (MAX_CHAR - 1)] = (char) (src.length() - k - 1);
87  0 skip[lc_ & (MAX_CHAR - 1)] = (char) (src.length() - k - 1);
88  0 skip[tc_ & (MAX_CHAR - 1)] = (char) (src.length() - k - 1);
89    }
90    else
91    {
92  0 skip[x_ & (MAX_CHAR - 1)] = (char) (src.length() - k - 1);
93    }
94    }
95   
96    // This trick can be found in the July issue of
97    // C-T magazine. This makes the method a type of
98    // "T-search."
99  0 jump_ahead = src.length() - 1;
100  0 for (int k = 0; k < src.length() - 1; k++)
101    {
102  0 char y = src.charAt(sm1 - k - 1);
103  0 if (exact(y))
104    {
105  0 jump_ahead = k;
106  0 break;
107    }
108    }
109    }
110   
111    /**
112    * Set to true if you only want to compare two of the characters in the
113    * String.
114    */
 
115  0 toggle final public int searchRegion(String s, int start, int end)
116    {
117  0 return find(s, start, end);
118    }
119   
 
120  0 toggle final public int searchFrom(String s, int start)
121    {
122  0 return find(s, start, s.length());
123    }
124   
 
125  0 toggle final public int search(String s)
126    {
127  0 return find(s, 0, s.length());
128    }
129   
 
130  0 toggle public int find(String s, int start, int end)
131    {
132  0 start += offset + sm1;
133  0 int vend = min(s.length() - 1, end + sm1 + offset), k;
134  0 int vend1 = vend - jump_ahead;
135  0 if (ign)
136    {
137  0 for (k = start; k <= vend1; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
138    {
139    // table look-up is expensive, avoid it if possible
140  0 if (anyc(s.charAt(k)))
141    {
142  0 if (CaseMgr.regionMatches(src, ign, 0, s, k - sm1, sm1))
143    {
144  0 return k - sm1 - offset;
145    }
146  0 k += jump_ahead;
147    }
148    }
149  0 for (; k <= vend; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
150    {
151    // table look-up is expensive, avoid it if possible
152  0 if (anyc(s.charAt(k)))
153    {
154  0 if (CaseMgr.regionMatches(src, ign, 0, s, k - sm1, sm1))
155    {
156  0 return k - sm1 - offset;
157    }
158  0 k += jump_ahead;
159  0 if (k > vend)
160    {
161  0 return -1;
162    }
163    }
164    }
165    }
166    else
167    {
168  0 for (k = start; k <= vend1; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
169    {
170    // table look-up is expensive, avoid it if possible
171  0 if (x == s.charAt(k))
172    {
173    // if(src.regionMatches(0,s,k-sm1,sm1))
174  0 if (CaseMgr.regionMatches(src, false, 0, s, k - sm1, sm1))
175    {
176  0 return k - sm1 - offset;
177    }
178  0 k += jump_ahead;
179    }
180    }
181  0 for (; k <= vend; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
182    {
183    // table look-up is expensive, avoid it if possible
184  0 if (x == s.charAt(k))
185    {
186    // if(src.regionMatches(0,s,k-sm1,sm1))
187  0 if (CaseMgr.regionMatches(src, false, 0, s, k - sm1, sm1))
188    {
189  0 return k - sm1 - offset;
190    }
191  0 k += jump_ahead;
192  0 if (k > vend)
193    {
194  0 return -1;
195    }
196    }
197    }
198    }
199   
200  0 return -1;
201    }
202   
 
203  0 toggle public int find(StringLike s, int start, int end)
204    {
205  0 if (s instanceof StringWrap)
206    {
207  0 return find(s.toString(), start, end);
208    }
209  0 start += offset + sm1;
210  0 int vend = min(s.length() - 1, end + sm1 + offset), k;
211  0 int vend1 = vend - jump_ahead;
212  0 if (ign)
213    {
214  0 for (k = start; k <= vend1; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
215    {
216    // table look-up is expensive, avoid it if possible
217  0 if (anyc(s.charAt(k)))
218    {
219  0 if (CaseMgr.regionMatches(src, ign, 0, s, k - sm1, sm1))
220    {
221  0 return k - sm1 - offset;
222    }
223  0 k += jump_ahead;
224    }
225    }
226  0 for (; k <= vend; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
227    {
228    // table look-up is expensive, avoid it if possible
229  0 if (anyc(s.charAt(k)))
230    {
231  0 if (CaseMgr.regionMatches(src, ign, 0, s, k - sm1, sm1))
232    {
233  0 return k - sm1 - offset;
234    }
235  0 k += jump_ahead;
236  0 if (k > vend)
237    {
238  0 return -1;
239    }
240    }
241    }
242    }
243    else
244    {
245  0 for (k = start; k <= vend1; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
246    {
247    // table look-up is expensive, avoid it if possible
248  0 if (x == s.charAt(k))
249    {
250    // if(src.regionMatches(0,s,k-sm1,sm1))
251  0 if (CaseMgr.regionMatches(src, false, 0, s, k - sm1, sm1))
252    {
253  0 return k - sm1 - offset;
254    }
255  0 k += jump_ahead;
256    }
257    }
258  0 for (; k <= vend; k += skip[s.charAt(k) & (MAX_CHAR - 1)])
259    {
260    // table look-up is expensive, avoid it if possible
261  0 if (x == s.charAt(k))
262    {
263    // if(src.regionMatches(0,s,k-sm1,sm1))
264  0 if (CaseMgr.regionMatches(src, false, 0, s, k - sm1, sm1))
265    {
266  0 return k - sm1 - offset;
267    }
268  0 k += jump_ahead;
269  0 if (k > vend)
270    {
271  0 return -1;
272    }
273    }
274    }
275    }
276   
277  0 return -1;
278    }
279    }