1 |
|
|
2 |
|
|
3 |
|
|
4 |
|
|
5 |
|
|
6 |
|
|
7 |
|
|
8 |
|
|
9 |
|
|
10 |
|
|
11 |
|
|
12 |
|
|
13 |
|
|
14 |
|
|
15 |
|
|
16 |
|
|
17 |
|
|
18 |
|
|
19 |
|
|
20 |
|
|
21 |
|
package jalview.analysis; |
22 |
|
|
23 |
|
import java.util.Locale; |
24 |
|
|
25 |
|
import jalview.datamodel.DBRefEntry; |
26 |
|
import jalview.datamodel.SequenceI; |
27 |
|
|
28 |
|
import java.util.ArrayList; |
29 |
|
import java.util.Arrays; |
30 |
|
import java.util.HashMap; |
31 |
|
import java.util.List; |
32 |
|
import java.util.Vector; |
33 |
|
|
34 |
|
|
35 |
|
|
36 |
|
|
37 |
|
|
38 |
|
|
39 |
|
|
|
|
| 68.4% |
Uncovered Elements: 36 (114) |
Complexity: 33 |
Complexity Density: 0.47 |
|
40 |
|
public class SequenceIdMatcher |
41 |
|
{ |
42 |
|
private HashMap<SeqIdName, SequenceI> names; |
43 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (2) |
Complexity: 1 |
Complexity Density: 0.5 |
|
44 |
46 |
public SequenceIdMatcher(List<SequenceI> seqs)... |
45 |
|
{ |
46 |
46 |
names = new HashMap<SeqIdName, SequenceI>(); |
47 |
46 |
addAll(seqs); |
48 |
|
} |
49 |
|
|
50 |
|
|
51 |
|
|
52 |
|
|
53 |
|
@param |
54 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (2) |
Complexity: 1 |
Complexity Density: 0.5 |
|
55 |
58 |
public void addAll(List<SequenceI> seqs)... |
56 |
|
{ |
57 |
58 |
for (SequenceI seq : seqs) |
58 |
|
{ |
59 |
326 |
add(seq); |
60 |
|
} |
61 |
|
} |
62 |
|
|
63 |
|
|
64 |
|
|
65 |
|
|
66 |
|
@param |
67 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (19) |
Complexity: 5 |
Complexity Density: 0.45 |
|
68 |
332 |
public void add(SequenceI seq)... |
69 |
|
{ |
70 |
|
|
71 |
|
|
72 |
332 |
names.put(new SeqIdName(seq.getDisplayId(true)), seq); |
73 |
332 |
SequenceI dbseq = seq; |
74 |
518 |
while (dbseq.getDatasetSequence() != null) |
75 |
|
{ |
76 |
186 |
dbseq = dbseq.getDatasetSequence(); |
77 |
|
} |
78 |
|
|
79 |
332 |
List<DBRefEntry> dbr = dbseq.getDBRefs(); |
80 |
332 |
if (dbr != null) |
81 |
|
{ |
82 |
143 |
SeqIdName sid = null; |
83 |
1936 |
for (int r = 0, nr = dbr.size(); r < nr; r++) |
84 |
|
{ |
85 |
1793 |
sid = new SeqIdName(dbr.get(r).getAccessionId()); |
86 |
1793 |
if (!names.containsKey(sid)) |
87 |
|
{ |
88 |
571 |
names.put(sid, seq); |
89 |
|
} |
90 |
|
} |
91 |
|
} |
92 |
|
} |
93 |
|
|
94 |
|
|
95 |
|
|
96 |
|
|
97 |
|
@param |
98 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
99 |
35 |
public SequenceIdMatcher(SequenceI[] sequences)... |
100 |
|
{ |
101 |
35 |
this(Arrays.asList(sequences)); |
102 |
|
} |
103 |
|
|
104 |
|
|
105 |
|
|
106 |
|
|
107 |
|
|
108 |
|
@param |
109 |
|
|
110 |
|
@param |
111 |
|
|
112 |
|
@return |
113 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (4) |
Complexity: 3 |
Complexity Density: 1.5 |
|
114 |
315 |
private SequenceI pickbestMatch(SeqIdName candName,... |
115 |
|
List<SequenceI> matches) |
116 |
|
{ |
117 |
315 |
List<SequenceI> st = pickbestMatches(candName, matches); |
118 |
315 |
return st == null || st.size() == 0 ? null : st.get(0); |
119 |
|
} |
120 |
|
|
121 |
|
|
122 |
|
|
123 |
|
|
124 |
|
|
125 |
|
@param |
126 |
|
|
127 |
|
@param |
128 |
|
|
129 |
|
@return |
130 |
|
|
131 |
|
|
|
|
| 48.5% |
Uncovered Elements: 17 (33) |
Complexity: 10 |
Complexity Density: 0.43 |
|
132 |
315 |
private List<SequenceI> pickbestMatches(SeqIdName candName,... |
133 |
|
List<SequenceI> matches) |
134 |
|
{ |
135 |
315 |
ArrayList<SequenceI> best = new ArrayList<SequenceI>(); |
136 |
315 |
if (candName == null || matches == null || matches.size() == 0) |
137 |
|
{ |
138 |
73 |
return null; |
139 |
|
} |
140 |
242 |
SequenceI match = matches.remove(0); |
141 |
242 |
best.add(match); |
142 |
242 |
names.put(new SeqIdName(match.getName()), match); |
143 |
242 |
int matchlen = match.getName().length(); |
144 |
242 |
int namlen = candName.id.length(); |
145 |
242 |
while (matches.size() > 0) |
146 |
|
{ |
147 |
|
|
148 |
0 |
SequenceI cand = matches.remove(0); |
149 |
0 |
names.put(new SeqIdName(cand.getName()), cand); |
150 |
0 |
int q, w, candlen = cand.getName().length(); |
151 |
|
|
152 |
0 |
if ((q = Math.abs(matchlen - namlen)) > (w = Math |
153 |
|
.abs(candlen - namlen)) && candlen > matchlen) |
154 |
|
{ |
155 |
0 |
best.clear(); |
156 |
0 |
match = cand; |
157 |
0 |
matchlen = candlen; |
158 |
0 |
best.add(match); |
159 |
|
} |
160 |
0 |
if (q == w && candlen == matchlen) |
161 |
|
{ |
162 |
|
|
163 |
0 |
best.add(cand); |
164 |
|
} |
165 |
|
} |
166 |
242 |
if (best.size() == 0) |
167 |
|
{ |
168 |
0 |
return null; |
169 |
|
} |
170 |
242 |
; |
171 |
242 |
return best; |
172 |
|
} |
173 |
|
|
174 |
|
|
175 |
|
|
176 |
|
|
177 |
|
@param |
178 |
|
|
179 |
|
@return |
180 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (2) |
Complexity: 1 |
Complexity Density: 0.5 |
|
181 |
8 |
public SequenceI findIdMatch(SequenceI seq)... |
182 |
|
{ |
183 |
8 |
SeqIdName nam = new SeqIdName(seq.getName()); |
184 |
8 |
return findIdMatch(nam); |
185 |
|
} |
186 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (2) |
Complexity: 1 |
Complexity Density: 0.5 |
|
187 |
269 |
public SequenceI findIdMatch(String seqnam)... |
188 |
|
{ |
189 |
269 |
SeqIdName nam = new SeqIdName(seqnam); |
190 |
269 |
return findIdMatch(nam); |
191 |
|
} |
192 |
|
|
193 |
|
|
194 |
|
|
195 |
|
|
196 |
|
@param |
197 |
|
|
198 |
|
@return |
199 |
|
|
|
|
| 0% |
Uncovered Elements: 7 (7) |
Complexity: 2 |
Complexity Density: 0.4 |
|
200 |
0 |
public SequenceI[] findAllIdMatches(String seqnam)... |
201 |
|
{ |
202 |
|
|
203 |
0 |
SeqIdName nam = new SeqIdName(seqnam); |
204 |
0 |
List<SequenceI> m = findAllIdMatches(nam); |
205 |
0 |
if (m != null) |
206 |
|
{ |
207 |
0 |
return m.toArray(new SequenceI[m.size()]); |
208 |
|
} |
209 |
0 |
return null; |
210 |
|
} |
211 |
|
|
212 |
|
|
213 |
|
|
214 |
|
|
215 |
|
|
216 |
|
|
217 |
|
|
218 |
|
@param |
219 |
|
|
220 |
|
@return |
221 |
|
|
|
|
| 82.4% |
Uncovered Elements: 3 (17) |
Complexity: 4 |
Complexity Density: 0.36 |
|
222 |
6 |
public SequenceI[] findIdMatch(SequenceI[] seqs)... |
223 |
|
{ |
224 |
6 |
SequenceI[] namedseqs = null; |
225 |
6 |
int i = 0; |
226 |
6 |
SeqIdName nam; |
227 |
|
|
228 |
6 |
if (seqs.length > 0) |
229 |
|
{ |
230 |
6 |
namedseqs = new SequenceI[seqs.length]; |
231 |
6 |
do |
232 |
|
{ |
233 |
38 |
nam = new SeqIdName(seqs[i].getName()); |
234 |
|
|
235 |
38 |
if (names.containsKey(nam)) |
236 |
|
{ |
237 |
38 |
namedseqs[i] = findIdMatch(nam); |
238 |
|
} |
239 |
|
else |
240 |
|
{ |
241 |
0 |
namedseqs[i] = null; |
242 |
|
} |
243 |
38 |
} while (++i < seqs.length); |
244 |
|
} |
245 |
|
|
246 |
6 |
return namedseqs; |
247 |
|
} |
248 |
|
|
249 |
|
|
250 |
|
|
251 |
|
|
252 |
|
@param |
253 |
|
|
254 |
|
@return |
255 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (6) |
Complexity: 2 |
Complexity Density: 0.5 |
|
256 |
315 |
private SequenceI findIdMatch(... |
257 |
|
jalview.analysis.SequenceIdMatcher.SeqIdName nam) |
258 |
|
{ |
259 |
315 |
Vector matches = new Vector(); |
260 |
557 |
while (names.containsKey(nam)) |
261 |
|
{ |
262 |
242 |
matches.addElement(names.remove(nam)); |
263 |
|
} |
264 |
315 |
return pickbestMatch(nam, matches); |
265 |
|
} |
266 |
|
|
267 |
|
|
268 |
|
|
269 |
|
|
270 |
|
@param |
271 |
|
|
272 |
|
@return |
273 |
|
|
|
|
| 0% |
Uncovered Elements: 7 (7) |
Complexity: 2 |
Complexity Density: 0.4 |
|
274 |
0 |
private List<SequenceI> findAllIdMatches(... |
275 |
|
jalview.analysis.SequenceIdMatcher.SeqIdName nam) |
276 |
|
{ |
277 |
0 |
ArrayList<SequenceI> matches = new ArrayList<SequenceI>(); |
278 |
0 |
while (names.containsKey(nam)) |
279 |
|
{ |
280 |
0 |
matches.add(names.remove(nam)); |
281 |
|
} |
282 |
0 |
List<SequenceI> r = pickbestMatches(nam, matches); |
283 |
0 |
return r; |
284 |
|
} |
285 |
|
|
|
|
| 84.2% |
Uncovered Elements: 6 (38) |
Complexity: 14 |
Complexity Density: 0.93 |
|
286 |
|
class SeqIdName |
287 |
|
{ |
288 |
|
String id; |
289 |
|
|
|
|
| 60% |
Uncovered Elements: 2 (5) |
Complexity: 2 |
Complexity Density: 0.67 |
|
290 |
2688 |
SeqIdName(String s)... |
291 |
|
{ |
292 |
2688 |
if (s != null) |
293 |
|
{ |
294 |
2688 |
id = s.toLowerCase(Locale.ROOT); |
295 |
|
} |
296 |
|
else |
297 |
|
{ |
298 |
0 |
id = ""; |
299 |
|
} |
300 |
|
} |
301 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (3) |
Complexity: 2 |
Complexity Density: 2 |
|
302 |
3775 |
@Override... |
303 |
|
public int hashCode() |
304 |
|
{ |
305 |
3775 |
return ((id.length() >= 4) ? id.substring(0, 4).hashCode() |
306 |
|
: id.hashCode()); |
307 |
|
} |
308 |
|
|
|
|
| 84.6% |
Uncovered Elements: 2 (13) |
Complexity: 4 |
Complexity Density: 0.57 |
|
309 |
17957 |
@Override... |
310 |
|
public boolean equals(Object s) |
311 |
|
{ |
312 |
17957 |
if (s == null) |
313 |
|
{ |
314 |
1 |
return false; |
315 |
|
} |
316 |
17956 |
if (s instanceof SeqIdName) |
317 |
|
{ |
318 |
17915 |
return this.stringequals(((SeqIdName) s).id); |
319 |
|
} |
320 |
|
else |
321 |
|
{ |
322 |
41 |
if (s instanceof String) |
323 |
|
{ |
324 |
41 |
return this.stringequals(((String) s).toLowerCase(Locale.ROOT)); |
325 |
|
} |
326 |
|
} |
327 |
|
|
328 |
0 |
return false; |
329 |
|
} |
330 |
|
|
331 |
|
|
332 |
|
|
333 |
|
|
334 |
|
|
335 |
|
|
336 |
|
|
337 |
|
private String WORD_SEP = "~. |#\\/<>!\"" + ((char) 0x00A4) |
338 |
|
+ "$%^*)}[@',?_"; |
339 |
|
|
340 |
|
|
341 |
|
|
342 |
|
|
343 |
|
|
344 |
|
|
345 |
|
@param |
346 |
|
@return |
347 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (11) |
Complexity: 5 |
Complexity Density: 1.67 |
|
348 |
17956 |
private boolean stringequals(String s)... |
349 |
|
{ |
350 |
17956 |
if (id.length() > s.length()) |
351 |
|
{ |
352 |
3741 |
return id.startsWith(s) |
353 |
|
? (WORD_SEP.indexOf(id.charAt(s.length())) > -1) |
354 |
|
: false; |
355 |
|
} |
356 |
|
else |
357 |
|
{ |
358 |
14215 |
return s.startsWith(id) |
359 |
1772 |
? (s.equals(id) ? true |
360 |
|
: (WORD_SEP.indexOf(s.charAt(id.length())) > -1)) |
361 |
|
: false; |
362 |
|
} |
363 |
|
} |
364 |
|
|
365 |
|
|
366 |
|
|
367 |
|
|
368 |
|
|
|
|
| 0% |
Uncovered Elements: 1 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
369 |
0 |
@Override... |
370 |
|
public String toString() |
371 |
|
{ |
372 |
0 |
return id; |
373 |
|
} |
374 |
|
} |
375 |
|
} |