Clover icon

Coverage Report

  1. Project Clover database Fri Dec 6 2024 13:47:14 GMT
  2. Package jalview.util

File ParseHtmlBodyAndLinks.java

 

Coverage histogram

../../img/srcFileCovDistChart9.png
12% of files have more coverage

Code metrics

20
42
7
1
183
101
22
0.52
6
7
3.14

Classes

Class Line # Actions
36 42 22
0.869565287%
 

Contributing tests

This file is covered by 90 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.util;
22   
23    import java.util.Locale;
24   
25    import java.util.ArrayList;
26    import java.util.List;
27    import java.util.StringTokenizer;
28    import java.util.regex.Pattern;
29   
30    /**
31    * utility class for dealing with HTML link extraction
32    *
33    * @author jprocter
34    *
35    */
 
36    public class ParseHtmlBodyAndLinks
37    {
38    private static final Pattern LEFT_ANGLE_BRACKET_PATTERN = Pattern
39    .compile("<");
40   
41    String orig = null;
42   
 
43  0 toggle public String getOrig()
44    {
45  0 return orig;
46    }
47   
48    boolean htmlContent = true;
49   
50    /**
51    * @return true if the content looked like HTML
52    */
 
53  30581 toggle public boolean isHtmlContent()
54    {
55  30581 return htmlContent;
56    }
57   
58    List<String> links = new ArrayList<String>();
59   
60    String content;
61   
62    /**
63    * result of parsing description - with or without HTML tags
64    *
65    * @return
66    */
 
67  5 toggle public String getContent()
68    {
69   
70  5 return content;
71    }
72   
73    /**
74    * list of Label|Link encoded URL links extracted from HTML
75    *
76    * @return
77    */
 
78  547 toggle public List<String> getLinks()
79    {
80  547 return links;
81    }
82   
83    /**
84    * Parses the given html and
85    * <ul>
86    * <li>extracts any 'href' links to a list of "displayName|url" strings,
87    * retrievable by #getLinks</li>
88    * <li>extracts the remaining text (with %LINK% placeholders replacing hrefs),
89    * retrievable by #getContent</li>
90    * </ul>
91    *
92    * @param description
93    * - html or text content to be parsed
94    * @param removeHTML
95    * flag to indicate if HTML tags should be removed if they are
96    * present.
97    * @param newline
98    */
 
99  30795 toggle public ParseHtmlBodyAndLinks(String description, boolean removeHTML,
100    String newline)
101    {
102  30795 if (description == null || description.length() == 0)
103    {
104  0 htmlContent = false;
105  0 return;
106    }
107  30795 StringBuilder sb = new StringBuilder(description.length());
108  30795 if (description.toUpperCase(Locale.ROOT).indexOf("<HTML>") == -1)
109    {
110  30654 htmlContent = false;
111    }
112  30795 orig = description;
113  30795 StringTokenizer st = new StringTokenizer(description, "<");
114  30795 String token, link;
115  30795 int startTag;
116  30795 String tag = null;
117  62031 while (st.hasMoreElements())
118    {
119  31236 token = st.nextToken(">");
120  31236 if (token.equalsIgnoreCase("html") || token.startsWith("/"))
121    {
122  0 continue;
123    }
124   
125  31236 tag = null;
126  31236 startTag = token.indexOf("<");
127   
128  31236 if (startTag > -1)
129    {
130  568 tag = token.substring(startTag + 1);
131  568 token = token.substring(0, startTag);
132    }
133   
134  31236 if (tag != null && tag.toUpperCase(Locale.ROOT).startsWith("A HREF="))
135    {
136  140 if (token.length() > 0)
137    {
138  139 sb.append(token);
139    }
140  140 link = tag.substring(tag.indexOf("\"") + 1, tag.length() - 1);
141  140 String label = st.nextToken("<>");
142  140 links.add(label + "|" + link);
143  140 sb.append(label + "%LINK%");
144    }
145  31096 else if (tag != null && tag.equalsIgnoreCase("br"))
146    {
147  0 sb.append(newline);
148    }
149    else
150    {
151  31096 sb.append(token);
152    }
153    }
154  30795 if (removeHTML && !htmlContent)
155    {
156    // instead of parsing the html into plaintext
157    // clean the description ready for embedding in html
158  30471 sb = new StringBuilder(LEFT_ANGLE_BRACKET_PATTERN.matcher(description)
159    .replaceAll("&lt;"));
160    }
161  30795 content = translateEntities(sb.toString());
162    }
163   
 
164  30795 toggle private String translateEntities(String s)
165    {
166  30795 s = s.replaceAll("&amp;", "&");
167  30795 s = s.replaceAll("&lt;", "<");
168  30795 s = s.replaceAll("&gt;", ">");
169  30795 return s;
170    }
171   
172    /**
173    * get either the parsed content or the original, depending on whether the
174    * original looked like html content or not.
175    *
176    * @return
177    */
 
178  30581 toggle public String getNonHtmlContent()
179    {
180  30581 return isHtmlContent() ? content : orig;
181    }
182   
183    }