Clover icon

Coverage Report

  1. Project Clover database Wed Nov 6 2024 00:56:24 GMT
  2. Package jalview.util

File ParseHtmlBodyAndLinks.java

 

Coverage histogram

../../img/srcFileCovDistChart9.png
12% of files have more coverage

Code metrics

20
42
7
1
183
101
22
0.52
6
7
3.14

Classes

Class Line # Actions
36 42 22
0.869565287%
 

Contributing tests

This file is covered by 93 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.util;
22   
23    import java.util.Locale;
24   
25    import java.util.ArrayList;
26    import java.util.List;
27    import java.util.StringTokenizer;
28    import java.util.regex.Pattern;
29   
30    /**
31    * utility class for dealing with HTML link extraction
32    *
33    * @author jprocter
34    *
35    */
 
36    public class ParseHtmlBodyAndLinks
37    {
38    private static final Pattern LEFT_ANGLE_BRACKET_PATTERN = Pattern
39    .compile("<");
40   
41    String orig = null;
42   
 
43  0 toggle public String getOrig()
44    {
45  0 return orig;
46    }
47   
48    boolean htmlContent = true;
49   
50    /**
51    * @return true if the content looked like HTML
52    */
 
53  22806 toggle public boolean isHtmlContent()
54    {
55  22806 return htmlContent;
56    }
57   
58    List<String> links = new ArrayList<String>();
59   
60    String content;
61   
62    /**
63    * result of parsing description - with or without HTML tags
64    *
65    * @return
66    */
 
67  5 toggle public String getContent()
68    {
69   
70  5 return content;
71    }
72   
73    /**
74    * list of Label|Link encoded URL links extracted from HTML
75    *
76    * @return
77    */
 
78  547 toggle public List<String> getLinks()
79    {
80  547 return links;
81    }
82   
83    /**
84    * Parses the given html and
85    * <ul>
86    * <li>extracts any 'href' links to a list of "displayName|url" strings,
87    * retrievable by #getLinks</li>
88    * <li>extracts the remaining text (with %LINK% placeholders replacing hrefs),
89    * retrievable by #getContent</li>
90    * </ul>
91    *
92    * @param description
93    * - html or text content to be parsed
94    * @param removeHTML
95    * flag to indicate if HTML tags should be removed if they are
96    * present.
97    * @param newline
98    */
 
99  23020 toggle public ParseHtmlBodyAndLinks(String description, boolean removeHTML,
100    String newline)
101    {
102  23020 if (description == null || description.length() == 0)
103    {
104  0 htmlContent = false;
105  0 return;
106    }
107  23020 StringBuilder sb = new StringBuilder(description.length());
108  23020 if (description.toUpperCase(Locale.ROOT).indexOf("<HTML>") == -1)
109    {
110  22870 htmlContent = false;
111    }
112  23020 orig = description;
113  23020 StringTokenizer st = new StringTokenizer(description, "<");
114  23020 String token, link;
115  23020 int startTag;
116  23020 String tag = null;
117  46508 while (st.hasMoreElements())
118    {
119  23488 token = st.nextToken(">");
120  23488 if (token.equalsIgnoreCase("html") || token.startsWith("/"))
121    {
122  0 continue;
123    }
124   
125  23488 tag = null;
126  23488 startTag = token.indexOf("<");
127   
128  23488 if (startTag > -1)
129    {
130  604 tag = token.substring(startTag + 1);
131  604 token = token.substring(0, startTag);
132    }
133   
134  23488 if (tag != null && tag.toUpperCase(Locale.ROOT).startsWith("A HREF="))
135    {
136  140 if (token.length() > 0)
137    {
138  139 sb.append(token);
139    }
140  140 link = tag.substring(tag.indexOf("\"") + 1, tag.length() - 1);
141  140 String label = st.nextToken("<>");
142  140 links.add(label + "|" + link);
143  140 sb.append(label + "%LINK%");
144    }
145  23348 else if (tag != null && tag.equalsIgnoreCase("br"))
146    {
147  0 sb.append(newline);
148    }
149    else
150    {
151  23348 sb.append(token);
152    }
153    }
154  23020 if (removeHTML && !htmlContent)
155    {
156    // instead of parsing the html into plaintext
157    // clean the description ready for embedding in html
158  22687 sb = new StringBuilder(LEFT_ANGLE_BRACKET_PATTERN.matcher(description)
159    .replaceAll("&lt;"));
160    }
161  23020 content = translateEntities(sb.toString());
162    }
163   
 
164  23020 toggle private String translateEntities(String s)
165    {
166  23020 s = s.replaceAll("&amp;", "&");
167  23020 s = s.replaceAll("&lt;", "<");
168  23020 s = s.replaceAll("&gt;", ">");
169  23020 return s;
170    }
171   
172    /**
173    * get either the parsed content or the original, depending on whether the
174    * original looked like html content or not.
175    *
176    * @return
177    */
 
178  22806 toggle public String getNonHtmlContent()
179    {
180  22806 return isHtmlContent() ? content : orig;
181    }
182   
183    }