Class |
Line # |
Actions |
|||
---|---|---|---|---|---|
XMLTokener | 36 | 192 | 84 |
1 | package org.json; | |
2 | ||
3 | /* | |
4 | Copyright (c) 2002 JSON.org | |
5 | ||
6 | Permission is hereby granted, free of charge, to any person obtaining a copy | |
7 | of this software and associated documentation files (the "Software"), to deal | |
8 | in the Software without restriction, including without limitation the rights | |
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
10 | copies of the Software, and to permit persons to whom the Software is | |
11 | furnished to do so, subject to the following conditions: | |
12 | ||
13 | The above copyright notice and this permission notice shall be included in all | |
14 | copies or substantial portions of the Software. | |
15 | ||
16 | The Software shall be used for Good, not Evil. | |
17 | ||
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
24 | SOFTWARE. | |
25 | */ | |
26 | ||
27 | import java.io.Reader; | |
28 | ||
29 | /** | |
30 | * The XMLTokener extends the JSONTokener to provide additional methods for the | |
31 | * parsing of XML texts. | |
32 | * | |
33 | * @author JSON.org | |
34 | * @version 2015-12-09 | |
35 | */ | |
36 | public class XMLTokener extends JSONTokener | |
37 | { | |
38 | ||
39 | /** | |
40 | * The table of entity values. It initially contains Character values for amp, | |
41 | * apos, gt, lt, quot. | |
42 | */ | |
43 | public static final java.util.HashMap<String, Character> entity; | |
44 | ||
45 | 0 | static |
46 | { | |
47 | 0 | entity = new java.util.HashMap<String, Character>(8); |
48 | 0 | entity.put("amp", XML.AMP); |
49 | 0 | entity.put("apos", XML.APOS); |
50 | 0 | entity.put("gt", XML.GT); |
51 | 0 | entity.put("lt", XML.LT); |
52 | 0 | entity.put("quot", XML.QUOT); |
53 | } | |
54 | ||
55 | /** | |
56 | * Construct an XMLTokener from a Reader. | |
57 | * | |
58 | * @param r | |
59 | * A source reader. | |
60 | */ | |
61 | 0 | public XMLTokener(Reader r) |
62 | { | |
63 | 0 | super(r); |
64 | } | |
65 | ||
66 | /** | |
67 | * Construct an XMLTokener from a string. | |
68 | * | |
69 | * @param s | |
70 | * A source string. | |
71 | */ | |
72 | 0 | public XMLTokener(String s) |
73 | { | |
74 | 0 | super(s); |
75 | } | |
76 | ||
77 | /** | |
78 | * Get the text in the CDATA block. | |
79 | * | |
80 | * @return The string up to the <code>]]></code>. | |
81 | * @throws JSONException | |
82 | * If the <code>]]></code> is not found. | |
83 | */ | |
84 | 0 | public String nextCDATA() throws JSONException |
85 | { | |
86 | 0 | char c; |
87 | 0 | int i; |
88 | 0 | StringBuilder sb = new StringBuilder(); |
89 | 0 | while (more()) |
90 | { | |
91 | 0 | c = next(); |
92 | 0 | sb.append(c); |
93 | 0 | i = sb.length() - 3; |
94 | 0 | if (i >= 0 && sb.charAt(i) == ']' && sb.charAt(i + 1) == ']' |
95 | && sb.charAt(i + 2) == '>') | |
96 | { | |
97 | 0 | sb.setLength(i); |
98 | 0 | return sb.toString(); |
99 | } | |
100 | } | |
101 | 0 | throw syntaxError("Unclosed CDATA"); |
102 | } | |
103 | ||
104 | /** | |
105 | * Get the next XML outer token, trimming whitespace. There are two kinds of | |
106 | * tokens: the '<' character which begins a markup tag, and the content text | |
107 | * between markup tags. | |
108 | * | |
109 | * @return A string, or a '<' Character, or null if there is no more source | |
110 | * text. | |
111 | * @throws JSONException | |
112 | */ | |
113 | 0 | public Object nextContent() throws JSONException |
114 | { | |
115 | 0 | char c; |
116 | 0 | StringBuilder sb; |
117 | 0 | do |
118 | { | |
119 | 0 | c = next(); |
120 | 0 | } while (Character.isWhitespace(c)); |
121 | 0 | if (c == 0) |
122 | { | |
123 | 0 | return null; |
124 | } | |
125 | 0 | if (c == '<') |
126 | { | |
127 | 0 | return XML.LT; |
128 | } | |
129 | 0 | sb = new StringBuilder(); |
130 | 0 | for (;;) |
131 | { | |
132 | 0 | if (c == 0) |
133 | { | |
134 | 0 | return sb.toString().trim(); |
135 | } | |
136 | 0 | if (c == '<') |
137 | { | |
138 | 0 | back(); |
139 | 0 | return sb.toString().trim(); |
140 | } | |
141 | 0 | if (c == '&') |
142 | { | |
143 | 0 | sb.append(nextEntity(c)); |
144 | } | |
145 | else | |
146 | { | |
147 | 0 | sb.append(c); |
148 | } | |
149 | 0 | c = next(); |
150 | } | |
151 | } | |
152 | ||
153 | /** | |
154 | * Return the next entity. These entities are translated to Characters: | |
155 | * <code>& ' > < "</code>. | |
156 | * | |
157 | * @param ampersand | |
158 | * An ampersand character. | |
159 | * @return A Character or an entity String if the entity is not recognized. | |
160 | * @throws JSONException | |
161 | * If missing ';' in XML entity. | |
162 | */ | |
163 | 0 | public Object nextEntity(char ampersand) throws JSONException |
164 | { | |
165 | 0 | StringBuilder sb = new StringBuilder(); |
166 | 0 | for (;;) |
167 | { | |
168 | 0 | char c = next(); |
169 | 0 | if (Character.isLetterOrDigit(c) || c == '#') |
170 | { | |
171 | 0 | sb.append(Character.toLowerCase(c)); |
172 | } | |
173 | 0 | else if (c == ';') |
174 | { | |
175 | 0 | break; |
176 | } | |
177 | else | |
178 | { | |
179 | 0 | throw syntaxError("Missing ';' in XML entity: &" + sb); |
180 | } | |
181 | } | |
182 | 0 | String string = sb.toString(); |
183 | 0 | return unescapeEntity(string); |
184 | } | |
185 | ||
186 | /** | |
187 | * Unescapes an XML entity encoding; | |
188 | * | |
189 | * @param e | |
190 | * entity (only the actual entity value, not the preceding & or | |
191 | * ending ; | |
192 | * @return | |
193 | */ | |
194 | 0 | static String unescapeEntity(String e) |
195 | { | |
196 | // validate | |
197 | 0 | if (e == null || e.isEmpty()) |
198 | { | |
199 | 0 | return ""; |
200 | } | |
201 | // if our entity is an encoded unicode point, parse it. | |
202 | 0 | if (e.charAt(0) == '#') |
203 | { | |
204 | 0 | int cp; |
205 | 0 | if (e.charAt(1) == 'x') |
206 | { | |
207 | // hex encoded unicode | |
208 | 0 | cp = Integer.parseInt(e.substring(2), 16); |
209 | } | |
210 | else | |
211 | { | |
212 | // decimal encoded unicode | |
213 | 0 | cp = Integer.parseInt(e.substring(1)); |
214 | } | |
215 | 0 | return new String(new int[] { cp }, 0, 1); |
216 | } | |
217 | 0 | Character knownEntity = entity.get(e); |
218 | 0 | if (knownEntity == null) |
219 | { | |
220 | // we don't know the entity so keep it encoded | |
221 | 0 | return '&' + e + ';'; |
222 | } | |
223 | 0 | return knownEntity.toString(); |
224 | } | |
225 | ||
226 | /** | |
227 | * Returns the next XML meta token. This is used for skipping over <!...> and | |
228 | * <?...?> structures. | |
229 | * | |
230 | * @return Syntax characters (<code>< > / = ! ?</code>) are returned as | |
231 | * Character, and strings and names are returned as Boolean. We don't | |
232 | * care what the values actually are. | |
233 | * @throws JSONException | |
234 | * If a string is not properly closed or if the XML is badly | |
235 | * structured. | |
236 | */ | |
237 | 0 | public Object nextMeta() throws JSONException |
238 | { | |
239 | 0 | char c; |
240 | 0 | char q; |
241 | 0 | do |
242 | { | |
243 | 0 | c = next(); |
244 | 0 | } while (Character.isWhitespace(c)); |
245 | 0 | switch (c) |
246 | { | |
247 | 0 | case 0: |
248 | 0 | throw syntaxError("Misshaped meta tag"); |
249 | 0 | case '<': |
250 | 0 | return XML.LT; |
251 | 0 | case '>': |
252 | 0 | return XML.GT; |
253 | 0 | case '/': |
254 | 0 | return XML.SLASH; |
255 | 0 | case '=': |
256 | 0 | return XML.EQ; |
257 | 0 | case '!': |
258 | 0 | return XML.BANG; |
259 | 0 | case '?': |
260 | 0 | return XML.QUEST; |
261 | 0 | case '"': |
262 | 0 | case '\'': |
263 | 0 | q = c; |
264 | 0 | for (;;) |
265 | { | |
266 | 0 | c = next(); |
267 | 0 | if (c == 0) |
268 | { | |
269 | 0 | throw syntaxError("Unterminated string"); |
270 | } | |
271 | 0 | if (c == q) |
272 | { | |
273 | 0 | return Boolean.TRUE; |
274 | } | |
275 | } | |
276 | 0 | default: |
277 | 0 | for (;;) |
278 | { | |
279 | 0 | c = next(); |
280 | 0 | if (Character.isWhitespace(c)) |
281 | { | |
282 | 0 | return Boolean.TRUE; |
283 | } | |
284 | 0 | switch (c) |
285 | { | |
286 | 0 | case 0: |
287 | 0 | case '<': |
288 | 0 | case '>': |
289 | 0 | case '/': |
290 | 0 | case '=': |
291 | 0 | case '!': |
292 | 0 | case '?': |
293 | 0 | case '"': |
294 | 0 | case '\'': |
295 | 0 | back(); |
296 | 0 | return Boolean.TRUE; |
297 | } | |
298 | } | |
299 | } | |
300 | } | |
301 | ||
302 | /** | |
303 | * Get the next XML Token. These tokens are found inside of angle brackets. It | |
304 | * may be one of these characters: <code>/ > = ! ?</code> or it may be a | |
305 | * string wrapped in single quotes or double quotes, or it may be a name. | |
306 | * | |
307 | * @return a String or a Character. | |
308 | * @throws JSONException | |
309 | * If the XML is not well formed. | |
310 | */ | |
311 | 0 | public Object nextToken() throws JSONException |
312 | { | |
313 | 0 | char c; |
314 | 0 | char q; |
315 | 0 | StringBuilder sb; |
316 | 0 | do |
317 | { | |
318 | 0 | c = next(); |
319 | 0 | } while (Character.isWhitespace(c)); |
320 | 0 | switch (c) |
321 | { | |
322 | 0 | case 0: |
323 | 0 | throw syntaxError("Misshaped element"); |
324 | 0 | case '<': |
325 | 0 | throw syntaxError("Misplaced '<'"); |
326 | 0 | case '>': |
327 | 0 | return XML.GT; |
328 | 0 | case '/': |
329 | 0 | return XML.SLASH; |
330 | 0 | case '=': |
331 | 0 | return XML.EQ; |
332 | 0 | case '!': |
333 | 0 | return XML.BANG; |
334 | 0 | case '?': |
335 | 0 | return XML.QUEST; |
336 | ||
337 | // Quoted string | |
338 | ||
339 | 0 | case '"': |
340 | 0 | case '\'': |
341 | 0 | q = c; |
342 | 0 | sb = new StringBuilder(); |
343 | 0 | for (;;) |
344 | { | |
345 | 0 | c = next(); |
346 | 0 | if (c == 0) |
347 | { | |
348 | 0 | throw syntaxError("Unterminated string"); |
349 | } | |
350 | 0 | if (c == q) |
351 | { | |
352 | 0 | return sb.toString(); |
353 | } | |
354 | 0 | if (c == '&') |
355 | { | |
356 | 0 | sb.append(nextEntity(c)); |
357 | } | |
358 | else | |
359 | { | |
360 | 0 | sb.append(c); |
361 | } | |
362 | } | |
363 | 0 | default: |
364 | ||
365 | // Name | |
366 | ||
367 | 0 | sb = new StringBuilder(); |
368 | 0 | for (;;) |
369 | { | |
370 | 0 | sb.append(c); |
371 | 0 | c = next(); |
372 | 0 | if (Character.isWhitespace(c)) |
373 | { | |
374 | 0 | return sb.toString(); |
375 | } | |
376 | 0 | switch (c) |
377 | { | |
378 | 0 | case 0: |
379 | 0 | return sb.toString(); |
380 | 0 | case '>': |
381 | 0 | case '/': |
382 | 0 | case '=': |
383 | 0 | case '!': |
384 | 0 | case '?': |
385 | 0 | case '[': |
386 | 0 | case ']': |
387 | 0 | back(); |
388 | 0 | return sb.toString(); |
389 | 0 | case '<': |
390 | 0 | case '"': |
391 | 0 | case '\'': |
392 | 0 | throw syntaxError("Bad character in a name"); |
393 | } | |
394 | } | |
395 | } | |
396 | } | |
397 | ||
398 | /** | |
399 | * Skip characters until past the requested string. If it is not found, we are | |
400 | * left at the end of the source with a result of false. | |
401 | * | |
402 | * @param to | |
403 | * A string to skip past. | |
404 | */ | |
405 | // The Android implementation of JSONTokener has a public method of public | |
406 | // void skipPast(String to) | |
407 | // even though ours does not have that method, to have API compatibility, our | |
408 | // method in the subclass | |
409 | // should match. | |
410 | 0 | public void skipPast(String to) |
411 | { | |
412 | 0 | boolean b; |
413 | 0 | char c; |
414 | 0 | int i; |
415 | 0 | int j; |
416 | 0 | int offset = 0; |
417 | 0 | int length = to.length(); |
418 | 0 | char[] circle = new char[length]; |
419 | ||
420 | /* | |
421 | * First fill the circle buffer with as many characters as are in the | |
422 | * to string. If we reach an early end, bail. | |
423 | */ | |
424 | ||
425 | 0 | for (i = 0; i < length; i += 1) |
426 | { | |
427 | 0 | c = next(); |
428 | 0 | if (c == 0) |
429 | { | |
430 | 0 | return; |
431 | } | |
432 | 0 | circle[i] = c; |
433 | } | |
434 | ||
435 | /* We will loop, possibly for all of the remaining characters. */ | |
436 | ||
437 | 0 | for (;;) |
438 | { | |
439 | 0 | j = offset; |
440 | 0 | b = true; |
441 | ||
442 | /* Compare the circle buffer with the to string. */ | |
443 | ||
444 | 0 | for (i = 0; i < length; i += 1) |
445 | { | |
446 | 0 | if (circle[j] != to.charAt(i)) |
447 | { | |
448 | 0 | b = false; |
449 | 0 | break; |
450 | } | |
451 | 0 | j += 1; |
452 | 0 | if (j >= length) |
453 | { | |
454 | 0 | j -= length; |
455 | } | |
456 | } | |
457 | ||
458 | /* If we exit the loop with b intact, then victory is ours. */ | |
459 | ||
460 | 0 | if (b) |
461 | { | |
462 | 0 | return; |
463 | } | |
464 | ||
465 | /* Get the next character. If there isn't one, then defeat is ours. */ | |
466 | ||
467 | 0 | c = next(); |
468 | 0 | if (c == 0) |
469 | { | |
470 | 0 | return; |
471 | } | |
472 | /* | |
473 | * Shove the character in the circle buffer and advance the | |
474 | * circle offset. The offset is mod n. | |
475 | */ | |
476 | 0 | circle[offset] = c; |
477 | 0 | offset += 1; |
478 | 0 | if (offset >= length) |
479 | { | |
480 | 0 | offset -= length; |
481 | } | |
482 | } | |
483 | } | |
484 | } |