Clover icon

jalviewX

  1. Project Clover database Wed Oct 31 2018 15:13:58 GMT
  2. Package jalview.util

File ParseHtmlBodyAndLinks.java

 

Coverage histogram

../../img/srcFileCovDistChart9.png
12% of files have more coverage

Code metrics

20
42
7
1
181
100
22
0.52
6
7
3.14

Classes

Class Line # Actions
34 42 22 9
0.869565287%
 

Contributing tests

No tests hitting this source file were found.

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.util;
22   
23    import java.util.ArrayList;
24    import java.util.List;
25    import java.util.StringTokenizer;
26    import java.util.regex.Pattern;
27   
28    /**
29    * utility class for dealing with HTML link extraction
30    *
31    * @author jprocter
32    *
33    */
 
34    public class ParseHtmlBodyAndLinks
35    {
36    private static final Pattern LEFT_ANGLE_BRACKET_PATTERN = Pattern
37    .compile("<");
38   
39    String orig = null;
40   
 
41  0 toggle public String getOrig()
42    {
43  0 return orig;
44    }
45   
46    boolean htmlContent = true;
47   
48    /**
49    * @return true if the content looked like HTML
50    */
 
51  331 toggle public boolean isHtmlContent()
52    {
53  331 return htmlContent;
54    }
55   
56    List<String> links = new ArrayList<String>();
57   
58    String content;
59   
60    /**
61    * result of parsing description - with or without HTML tags
62    *
63    * @return
64    */
 
65  5 toggle public String getContent()
66    {
67   
68  5 return content;
69    }
70   
71    /**
72    * list of Label|Link encoded URL links extracted from HTML
73    *
74    * @return
75    */
 
76  337 toggle public List<String> getLinks()
77    {
78  337 return links;
79    }
80   
81    /**
82    * Parses the given html and
83    * <ul>
84    * <li>extracts any 'href' links to a list of "displayName|url" strings,
85    * retrievable by #getLinks</li>
86    * <li>extracts the remaining text (with %LINK% placeholders replacing hrefs),
87    * retrievable by #getContent</li>
88    * </ul>
89    *
90    * @param description
91    * - html or text content to be parsed
92    * @param removeHTML
93    * flag to indicate if HTML tags should be removed if they are
94    * present.
95    * @param newline
96    */
 
97  335 toggle public ParseHtmlBodyAndLinks(String description, boolean removeHTML,
98    String newline)
99    {
100  335 if (description == null || description.length() == 0)
101    {
102  0 htmlContent = false;
103  0 return;
104    }
105  335 StringBuilder sb = new StringBuilder(description.length());
106  335 if (description.toUpperCase().indexOf("<HTML>") == -1)
107    {
108  224 htmlContent = false;
109    }
110  335 orig = description;
111  335 StringTokenizer st = new StringTokenizer(description, "<");
112  335 String token, link;
113  335 int startTag;
114  335 String tag = null;
115  1001 while (st.hasMoreElements())
116    {
117  666 token = st.nextToken(">");
118  666 if (token.equalsIgnoreCase("html") || token.startsWith("/"))
119    {
120  0 continue;
121    }
122   
123  666 tag = null;
124  666 startTag = token.indexOf("<");
125   
126  666 if (startTag > -1)
127    {
128  442 tag = token.substring(startTag + 1);
129  442 token = token.substring(0, startTag);
130    }
131   
132  666 if (tag != null && tag.toUpperCase().startsWith("A HREF="))
133    {
134  110 if (token.length() > 0)
135    {
136  109 sb.append(token);
137    }
138  110 link = tag.substring(tag.indexOf("\"") + 1, tag.length() - 1);
139  110 String label = st.nextToken("<>");
140  110 links.add(label + "|" + link);
141  110 sb.append(label + "%LINK%");
142    }
143  556 else if (tag != null && tag.equalsIgnoreCase("br"))
144    {
145  0 sb.append(newline);
146    }
147    else
148    {
149  556 sb.append(token);
150    }
151    }
152  335 if (removeHTML && !htmlContent)
153    {
154    // instead of parsing the html into plaintext
155    // clean the description ready for embedding in html
156  221 sb = new StringBuilder(LEFT_ANGLE_BRACKET_PATTERN.matcher(description)
157    .replaceAll("&lt;"));
158    }
159  335 content = translateEntities(sb.toString());
160    }
161   
 
162  335 toggle private String translateEntities(String s)
163    {
164  335 s = s.replaceAll("&amp;", "&");
165  335 s = s.replaceAll("&lt;", "<");
166  335 s = s.replaceAll("&gt;", ">");
167  335 return s;
168    }
169   
170    /**
171    * get either the parsed content or the original, depending on whether the
172    * original looked like html content or not.
173    *
174    * @return
175    */
 
176  331 toggle public String getNonHtmlContent()
177    {
178  331 return isHtmlContent() ? content : orig;
179    }
180   
181    }