1 |
|
|
2 |
|
|
3 |
|
|
4 |
|
|
5 |
|
|
6 |
|
|
7 |
|
|
8 |
|
|
9 |
|
|
10 |
|
|
11 |
|
|
12 |
|
|
13 |
|
|
14 |
|
|
15 |
|
|
16 |
|
|
17 |
|
|
18 |
|
|
19 |
|
|
20 |
|
|
21 |
|
package jalview.analysis; |
22 |
|
|
23 |
|
import jalview.datamodel.DBRefEntry; |
24 |
|
import jalview.datamodel.SequenceI; |
25 |
|
|
26 |
|
import java.util.ArrayList; |
27 |
|
import java.util.Arrays; |
28 |
|
import java.util.HashMap; |
29 |
|
import java.util.List; |
30 |
|
import java.util.Vector; |
31 |
|
|
32 |
|
|
33 |
|
|
34 |
|
|
35 |
|
|
36 |
|
|
37 |
|
|
|
|
| 68.4% |
Uncovered Elements: 36 (114) |
Complexity: 33 |
Complexity Density: 0.47 |
|
38 |
|
public class SequenceIdMatcher |
39 |
|
{ |
40 |
|
private HashMap<SeqIdName, SequenceI> names; |
41 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (2) |
Complexity: 1 |
Complexity Density: 0.5 |
|
42 |
49 |
public SequenceIdMatcher(List<SequenceI> seqs)... |
43 |
|
{ |
44 |
49 |
names = new HashMap<SeqIdName, SequenceI>(); |
45 |
49 |
addAll(seqs); |
46 |
|
} |
47 |
|
|
48 |
|
|
49 |
|
|
50 |
|
|
51 |
|
@param |
52 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (2) |
Complexity: 1 |
Complexity Density: 0.5 |
|
53 |
61 |
public void addAll(List<SequenceI> seqs)... |
54 |
|
{ |
55 |
61 |
for (SequenceI seq : seqs) |
56 |
|
{ |
57 |
213 |
add(seq); |
58 |
|
} |
59 |
|
} |
60 |
|
|
61 |
|
|
62 |
|
|
63 |
|
|
64 |
|
@param |
65 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (19) |
Complexity: 5 |
Complexity Density: 0.45 |
|
66 |
225 |
public void add(SequenceI seq)... |
67 |
|
{ |
68 |
|
|
69 |
|
|
70 |
225 |
names.put(new SeqIdName(seq.getDisplayId(true)), seq); |
71 |
225 |
SequenceI dbseq = seq; |
72 |
361 |
while (dbseq.getDatasetSequence() != null) |
73 |
|
{ |
74 |
136 |
dbseq = dbseq.getDatasetSequence(); |
75 |
|
} |
76 |
|
|
77 |
225 |
if (dbseq.getDBRefs() != null) |
78 |
|
{ |
79 |
124 |
DBRefEntry dbr[] = dbseq.getDBRefs(); |
80 |
124 |
SeqIdName sid = null; |
81 |
282 |
for (int r = 0; r < dbr.length; r++) |
82 |
|
{ |
83 |
158 |
sid = new SeqIdName(dbr[r].getAccessionId()); |
84 |
158 |
if (!names.containsKey(sid)) |
85 |
|
{ |
86 |
133 |
names.put(sid, seq); |
87 |
|
} |
88 |
|
} |
89 |
|
} |
90 |
|
} |
91 |
|
|
92 |
|
|
93 |
|
|
94 |
|
|
95 |
|
@param |
96 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
97 |
37 |
public SequenceIdMatcher(SequenceI[] sequences)... |
98 |
|
{ |
99 |
37 |
this(Arrays.asList(sequences)); |
100 |
|
} |
101 |
|
|
102 |
|
|
103 |
|
|
104 |
|
|
105 |
|
|
106 |
|
@param |
107 |
|
|
108 |
|
@param |
109 |
|
|
110 |
|
@return |
111 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (4) |
Complexity: 3 |
Complexity Density: 1.5 |
|
112 |
256 |
private SequenceI pickbestMatch(SeqIdName candName,... |
113 |
|
List<SequenceI> matches) |
114 |
|
{ |
115 |
256 |
List<SequenceI> st = pickbestMatches(candName, matches); |
116 |
256 |
return st == null || st.size() == 0 ? null : st.get(0); |
117 |
|
} |
118 |
|
|
119 |
|
|
120 |
|
|
121 |
|
|
122 |
|
|
123 |
|
@param |
124 |
|
|
125 |
|
@param |
126 |
|
|
127 |
|
@return |
128 |
|
|
129 |
|
|
|
|
| 48.5% |
Uncovered Elements: 17 (33) |
Complexity: 10 |
Complexity Density: 0.43 |
|
130 |
261 |
private List<SequenceI> pickbestMatches(SeqIdName candName,... |
131 |
|
List<SequenceI> matches) |
132 |
|
{ |
133 |
261 |
ArrayList<SequenceI> best = new ArrayList<SequenceI>(); |
134 |
261 |
if (candName == null || matches == null || matches.size() == 0) |
135 |
|
{ |
136 |
63 |
return null; |
137 |
|
} |
138 |
198 |
SequenceI match = matches.remove(0); |
139 |
198 |
best.add(match); |
140 |
198 |
names.put(new SeqIdName(match.getName()), match); |
141 |
198 |
int matchlen = match.getName().length(); |
142 |
198 |
int namlen = candName.id.length(); |
143 |
198 |
while (matches.size() > 0) |
144 |
|
{ |
145 |
|
|
146 |
0 |
SequenceI cand = matches.remove(0); |
147 |
0 |
names.put(new SeqIdName(cand.getName()), cand); |
148 |
0 |
int q, w, candlen = cand.getName().length(); |
149 |
|
|
150 |
0 |
if ((q = Math.abs(matchlen - namlen)) > (w = Math |
151 |
|
.abs(candlen - namlen)) && candlen > matchlen) |
152 |
|
{ |
153 |
0 |
best.clear(); |
154 |
0 |
match = cand; |
155 |
0 |
matchlen = candlen; |
156 |
0 |
best.add(match); |
157 |
|
} |
158 |
0 |
if (q == w && candlen == matchlen) |
159 |
|
{ |
160 |
|
|
161 |
0 |
best.add(cand); |
162 |
|
} |
163 |
|
} |
164 |
198 |
if (best.size() == 0) |
165 |
|
{ |
166 |
0 |
return null; |
167 |
|
} |
168 |
198 |
; |
169 |
198 |
return best; |
170 |
|
} |
171 |
|
|
172 |
|
|
173 |
|
|
174 |
|
|
175 |
|
@param |
176 |
|
|
177 |
|
@return |
178 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (2) |
Complexity: 1 |
Complexity Density: 0.5 |
|
179 |
8 |
public SequenceI findIdMatch(SequenceI seq)... |
180 |
|
{ |
181 |
8 |
SeqIdName nam = new SeqIdName(seq.getName()); |
182 |
8 |
return findIdMatch(nam); |
183 |
|
} |
184 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (2) |
Complexity: 1 |
Complexity Density: 0.5 |
|
185 |
210 |
public SequenceI findIdMatch(String seqnam)... |
186 |
|
{ |
187 |
210 |
SeqIdName nam = new SeqIdName(seqnam); |
188 |
210 |
return findIdMatch(nam); |
189 |
|
} |
190 |
|
|
191 |
|
|
192 |
|
|
193 |
|
|
194 |
|
@param |
195 |
|
|
196 |
|
@return |
197 |
|
|
|
|
| 0% |
Uncovered Elements: 7 (7) |
Complexity: 2 |
Complexity Density: 0.4 |
|
198 |
0 |
public SequenceI[] findAllIdMatches(String seqnam)... |
199 |
|
{ |
200 |
|
|
201 |
0 |
SeqIdName nam = new SeqIdName(seqnam); |
202 |
0 |
List<SequenceI> m = findAllIdMatches(nam); |
203 |
0 |
if (m != null) |
204 |
|
{ |
205 |
0 |
return m.toArray(new SequenceI[m.size()]); |
206 |
|
} |
207 |
0 |
return null; |
208 |
|
} |
209 |
|
|
210 |
|
|
211 |
|
|
212 |
|
|
213 |
|
|
214 |
|
|
215 |
|
|
216 |
|
@param |
217 |
|
|
218 |
|
@return |
219 |
|
|
|
|
| 82.4% |
Uncovered Elements: 3 (17) |
Complexity: 4 |
Complexity Density: 0.36 |
|
220 |
6 |
public SequenceI[] findIdMatch(SequenceI[] seqs)... |
221 |
|
{ |
222 |
6 |
SequenceI[] namedseqs = null; |
223 |
6 |
int i = 0; |
224 |
6 |
SeqIdName nam; |
225 |
|
|
226 |
6 |
if (seqs.length > 0) |
227 |
|
{ |
228 |
6 |
namedseqs = new SequenceI[seqs.length]; |
229 |
6 |
do |
230 |
|
{ |
231 |
38 |
nam = new SeqIdName(seqs[i].getName()); |
232 |
|
|
233 |
38 |
if (names.containsKey(nam)) |
234 |
|
{ |
235 |
38 |
namedseqs[i] = findIdMatch(nam); |
236 |
|
} |
237 |
|
else |
238 |
|
{ |
239 |
0 |
namedseqs[i] = null; |
240 |
|
} |
241 |
38 |
} while (++i < seqs.length); |
242 |
|
} |
243 |
|
|
244 |
6 |
return namedseqs; |
245 |
|
} |
246 |
|
|
247 |
|
|
248 |
|
|
249 |
|
|
250 |
|
@param |
251 |
|
|
252 |
|
@return |
253 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (6) |
Complexity: 2 |
Complexity Density: 0.5 |
|
254 |
256 |
private SequenceI findIdMatch(... |
255 |
|
jalview.analysis.SequenceIdMatcher.SeqIdName nam) |
256 |
|
{ |
257 |
256 |
Vector matches = new Vector(); |
258 |
450 |
while (names.containsKey(nam)) |
259 |
|
{ |
260 |
194 |
matches.addElement(names.remove(nam)); |
261 |
|
} |
262 |
256 |
return pickbestMatch(nam, matches); |
263 |
|
} |
264 |
|
|
265 |
|
|
266 |
|
|
267 |
|
|
268 |
|
@param |
269 |
|
|
270 |
|
@return |
271 |
|
|
|
|
| 0% |
Uncovered Elements: 7 (7) |
Complexity: 2 |
Complexity Density: 0.4 |
|
272 |
0 |
private List<SequenceI> findAllIdMatches(... |
273 |
|
jalview.analysis.SequenceIdMatcher.SeqIdName nam) |
274 |
|
{ |
275 |
0 |
ArrayList<SequenceI> matches = new ArrayList<SequenceI>(); |
276 |
0 |
while (names.containsKey(nam)) |
277 |
|
{ |
278 |
0 |
matches.add(names.remove(nam)); |
279 |
|
} |
280 |
0 |
List<SequenceI> r = pickbestMatches(nam, matches); |
281 |
0 |
return r; |
282 |
|
} |
283 |
|
|
|
|
| 81.6% |
Uncovered Elements: 7 (38) |
Complexity: 14 |
Complexity Density: 0.93 |
|
284 |
|
class SeqIdName |
285 |
|
{ |
286 |
|
String id; |
287 |
|
|
|
|
| 60% |
Uncovered Elements: 2 (5) |
Complexity: 2 |
Complexity Density: 0.67 |
|
288 |
848 |
SeqIdName(String s)... |
289 |
|
{ |
290 |
848 |
if (s != null) |
291 |
|
{ |
292 |
848 |
id = s.toLowerCase(); |
293 |
|
} |
294 |
|
else |
295 |
|
{ |
296 |
0 |
id = ""; |
297 |
|
} |
298 |
|
} |
299 |
|
|
|
|
| 66.7% |
Uncovered Elements: 1 (3) |
Complexity: 2 |
Complexity Density: 2 |
|
300 |
1409 |
@Override... |
301 |
|
public int hashCode() |
302 |
|
{ |
303 |
1409 |
return ((id.length() >= 4) ? id.substring(0, 4).hashCode() |
304 |
|
: id.hashCode()); |
305 |
|
} |
306 |
|
|
|
|
| 84.6% |
Uncovered Elements: 2 (13) |
Complexity: 4 |
Complexity Density: 0.57 |
|
307 |
1746 |
@Override... |
308 |
|
public boolean equals(Object s) |
309 |
|
{ |
310 |
1746 |
if (s == null) |
311 |
|
{ |
312 |
1 |
return false; |
313 |
|
} |
314 |
1745 |
if (s instanceof SeqIdName) |
315 |
|
{ |
316 |
1704 |
return this.stringequals(((SeqIdName) s).id); |
317 |
|
} |
318 |
|
else |
319 |
|
{ |
320 |
41 |
if (s instanceof String) |
321 |
|
{ |
322 |
41 |
return this.stringequals(((String) s).toLowerCase()); |
323 |
|
} |
324 |
|
} |
325 |
|
|
326 |
0 |
return false; |
327 |
|
} |
328 |
|
|
329 |
|
|
330 |
|
|
331 |
|
|
332 |
|
|
333 |
|
|
334 |
|
|
335 |
|
private String WORD_SEP = "~. |#\\/<>!\"" + ((char) 0x00A4) |
336 |
|
+ "$%^*)}[@',?_"; |
337 |
|
|
338 |
|
|
339 |
|
|
340 |
|
|
341 |
|
|
342 |
|
|
343 |
|
@param |
344 |
|
@return |
345 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (11) |
Complexity: 5 |
Complexity Density: 1.67 |
|
346 |
1745 |
private boolean stringequals(String s)... |
347 |
|
{ |
348 |
1745 |
if (id.length() > s.length()) |
349 |
|
{ |
350 |
218 |
return id.startsWith(s) |
351 |
|
? (WORD_SEP.indexOf(id.charAt(s.length())) > -1) |
352 |
|
: false; |
353 |
|
} |
354 |
|
else |
355 |
|
{ |
356 |
1527 |
return s.startsWith(id) |
357 |
487 |
? (s.equals(id) ? true |
358 |
|
: (WORD_SEP.indexOf(s.charAt(id.length())) > -1)) |
359 |
|
: false; |
360 |
|
} |
361 |
|
} |
362 |
|
|
363 |
|
|
364 |
|
|
365 |
|
|
366 |
|
|
|
|
| 0% |
Uncovered Elements: 1 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
367 |
0 |
@Override... |
368 |
|
public String toString() |
369 |
|
{ |
370 |
0 |
return id; |
371 |
|
} |
372 |
|
} |
373 |
|
} |