1 |
|
|
2 |
|
|
3 |
|
|
4 |
|
|
5 |
|
|
6 |
|
|
7 |
|
|
8 |
|
|
9 |
|
|
10 |
|
|
11 |
|
|
12 |
|
|
13 |
|
|
14 |
|
|
15 |
|
|
16 |
|
|
17 |
|
|
18 |
|
|
19 |
|
|
20 |
|
|
21 |
|
package jalview.util; |
22 |
|
|
23 |
|
import jalview.datamodel.SequenceI; |
24 |
|
|
25 |
|
import java.util.ArrayList; |
26 |
|
import java.util.List; |
27 |
|
|
28 |
|
|
29 |
|
|
30 |
|
|
|
|
| 74% |
Uncovered Elements: 46 (177) |
Complexity: 54 |
Complexity Density: 0.52 |
|
31 |
|
public class Comparison |
32 |
|
{ |
33 |
|
private static final int EIGHTY_FIVE = 85; |
34 |
|
|
35 |
|
private static final int TO_UPPER_CASE = 'a' - 'A'; |
36 |
|
|
37 |
|
public static final char GAP_SPACE = ' '; |
38 |
|
|
39 |
|
public static final char GAP_DOT = '.'; |
40 |
|
|
41 |
|
public static final char GAP_DASH = '-'; |
42 |
|
|
43 |
|
public static final String GapChars = new String( |
44 |
|
new char[] |
45 |
|
{ GAP_SPACE, GAP_DOT, GAP_DASH }); |
46 |
|
|
47 |
|
|
48 |
|
|
49 |
|
|
50 |
|
@param |
51 |
|
|
52 |
|
@param |
53 |
|
|
54 |
|
|
55 |
|
@return |
56 |
|
|
|
|
| 0% |
Uncovered Elements: 1 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
57 |
0 |
public static final float compare(SequenceI ii, SequenceI jj)... |
58 |
|
{ |
59 |
0 |
return Comparison.compare(ii, jj, 0, ii.getLength() - 1); |
60 |
|
} |
61 |
|
|
62 |
|
|
63 |
|
|
64 |
|
|
65 |
|
@param |
66 |
|
|
67 |
|
@param |
68 |
|
|
69 |
|
@param |
70 |
|
|
71 |
|
@param |
72 |
|
|
73 |
|
@return |
74 |
|
|
|
|
| 0% |
Uncovered Elements: 37 (37) |
Complexity: 8 |
Complexity Density: 0.35 |
|
75 |
0 |
public static float compare(SequenceI ii, SequenceI jj, int start,... |
76 |
|
int end) |
77 |
|
{ |
78 |
0 |
String si = ii.getSequenceAsString(); |
79 |
0 |
String sj = jj.getSequenceAsString(); |
80 |
|
|
81 |
0 |
int ilen = si.length() - 1; |
82 |
0 |
int jlen = sj.length() - 1; |
83 |
|
|
84 |
0 |
while (Comparison.isGap(si.charAt(start + ilen))) |
85 |
|
{ |
86 |
0 |
ilen--; |
87 |
|
} |
88 |
|
|
89 |
0 |
while (Comparison.isGap(sj.charAt(start + jlen))) |
90 |
|
{ |
91 |
0 |
jlen--; |
92 |
|
} |
93 |
|
|
94 |
0 |
int count = 0; |
95 |
0 |
int match = 0; |
96 |
0 |
float pid = -1; |
97 |
|
|
98 |
0 |
if (ilen > jlen) |
99 |
|
{ |
100 |
0 |
for (int j = 0; j < jlen; j++) |
101 |
|
{ |
102 |
0 |
if (si.substring(start + j, start + j + 1) |
103 |
|
.equals(sj.substring(start + j, start + j + 1))) |
104 |
|
{ |
105 |
0 |
match++; |
106 |
|
} |
107 |
|
|
108 |
0 |
count++; |
109 |
|
} |
110 |
|
|
111 |
0 |
pid = (float) match / (float) ilen * 100; |
112 |
|
} |
113 |
|
else |
114 |
|
{ |
115 |
0 |
for (int j = 0; j < jlen; j++) |
116 |
|
{ |
117 |
0 |
if (si.substring(start + j, start + j + 1) |
118 |
|
.equals(sj.substring(start + j, start + j + 1))) |
119 |
|
{ |
120 |
0 |
match++; |
121 |
|
} |
122 |
|
|
123 |
0 |
count++; |
124 |
|
} |
125 |
|
|
126 |
0 |
pid = (float) match / (float) jlen * 100; |
127 |
|
} |
128 |
|
|
129 |
0 |
return pid; |
130 |
|
} |
131 |
|
|
132 |
|
|
133 |
|
|
134 |
|
|
135 |
|
@param |
136 |
|
|
137 |
|
@param |
138 |
|
|
139 |
|
@return |
140 |
|
@deprecated |
141 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
142 |
8 |
@Deprecated... |
143 |
|
public final static float PID(String seq1, String seq2) |
144 |
|
{ |
145 |
8 |
return PID(seq1, seq2, 0, seq1.length()); |
146 |
|
} |
147 |
|
|
148 |
|
static final int caseShift = 'a' - 'A'; |
149 |
|
|
150 |
|
|
151 |
|
|
152 |
|
@deprecated |
153 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
154 |
8 |
@Deprecated... |
155 |
|
public final static float PID(String seq1, String seq2, int start, |
156 |
|
int end) |
157 |
|
{ |
158 |
8 |
return PID(seq1, seq2, start, end, true, false); |
159 |
|
} |
160 |
|
|
161 |
|
|
162 |
|
|
163 |
|
|
164 |
|
|
165 |
|
@param |
166 |
|
@param |
167 |
|
@param |
168 |
|
|
169 |
|
@param |
170 |
|
|
171 |
|
@param |
172 |
|
|
173 |
|
|
174 |
|
@param |
175 |
|
|
176 |
|
@return |
177 |
|
@deprecated |
178 |
|
|
|
|
| 87.8% |
Uncovered Elements: 6 (49) |
Complexity: 13 |
Complexity Density: 0.45 |
|
179 |
12 |
@Deprecated... |
180 |
|
public final static float PID(String seq1, String seq2, int start, |
181 |
|
int end, boolean wcGaps, boolean ungappedOnly) |
182 |
|
{ |
183 |
12 |
int s1len = seq1.length(); |
184 |
12 |
int s2len = seq2.length(); |
185 |
|
|
186 |
12 |
int len = Math.min(s1len, s2len); |
187 |
|
|
188 |
12 |
if (end < len) |
189 |
|
{ |
190 |
0 |
len = end; |
191 |
|
} |
192 |
|
|
193 |
12 |
if (len < start) |
194 |
|
{ |
195 |
0 |
start = len - 1; |
196 |
|
} |
197 |
|
|
198 |
12 |
int elen = len - start, bad = 0; |
199 |
12 |
char chr1; |
200 |
12 |
char chr2; |
201 |
12 |
boolean agap; |
202 |
109 |
for (int i = start; i < len; i++) |
203 |
|
{ |
204 |
97 |
chr1 = seq1.charAt(i); |
205 |
|
|
206 |
97 |
chr2 = seq2.charAt(i); |
207 |
97 |
agap = isGap(chr1) || isGap(chr2); |
208 |
97 |
if ('a' <= chr1 && chr1 <= 'z') |
209 |
|
{ |
210 |
|
|
211 |
|
|
212 |
35 |
chr1 -= caseShift; |
213 |
|
} |
214 |
97 |
if ('a' <= chr2 && chr2 <= 'z') |
215 |
|
{ |
216 |
|
|
217 |
|
|
218 |
48 |
chr2 -= caseShift; |
219 |
|
} |
220 |
|
|
221 |
97 |
if (chr1 != chr2) |
222 |
|
{ |
223 |
30 |
if (agap) |
224 |
|
{ |
225 |
18 |
if (ungappedOnly) |
226 |
|
{ |
227 |
4 |
elen--; |
228 |
|
} |
229 |
14 |
else if (!wcGaps) |
230 |
|
{ |
231 |
2 |
bad++; |
232 |
|
} |
233 |
|
} |
234 |
|
else |
235 |
|
{ |
236 |
12 |
bad++; |
237 |
|
} |
238 |
|
} |
239 |
|
|
240 |
|
} |
241 |
12 |
if (elen < 1) |
242 |
|
{ |
243 |
0 |
return 0f; |
244 |
|
} |
245 |
12 |
return ((float) 100 * (elen - bad)) / elen; |
246 |
|
} |
247 |
|
|
248 |
|
|
249 |
|
|
250 |
|
|
251 |
|
|
252 |
|
|
253 |
|
@param |
254 |
|
|
255 |
|
@return |
256 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (3) |
Complexity: 4 |
Complexity Density: 4 |
|
257 |
66874510 |
public static final boolean isGap(char c)... |
258 |
|
{ |
259 |
67206718 |
return (c == GAP_DASH || c == GAP_DOT || c == GAP_SPACE) ? true : false; |
260 |
|
} |
261 |
|
|
262 |
|
|
263 |
|
|
264 |
|
|
265 |
|
|
266 |
|
@param |
267 |
|
@return |
268 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
269 |
395 |
public static final boolean isNucleotide(SequenceI seq)... |
270 |
|
{ |
271 |
395 |
return isNucleotide(new SequenceI[] { seq }); |
272 |
|
} |
273 |
|
|
274 |
|
|
275 |
|
|
276 |
|
|
277 |
|
|
278 |
|
|
279 |
|
@param |
280 |
|
@return |
281 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (29) |
Complexity: 7 |
Complexity Density: 0.41 |
|
282 |
1655 |
public static final boolean isNucleotide(SequenceI[] seqs)... |
283 |
|
{ |
284 |
1655 |
if (seqs == null) |
285 |
|
{ |
286 |
1 |
return false; |
287 |
|
} |
288 |
|
|
289 |
1654 |
int ntCount = 0; |
290 |
1654 |
int aaCount = 0; |
291 |
1654 |
for (SequenceI seq : seqs) |
292 |
|
{ |
293 |
29498 |
if (seq == null) |
294 |
|
{ |
295 |
1 |
continue; |
296 |
|
} |
297 |
|
|
298 |
|
|
299 |
29497 |
int len = seq.getLength(); |
300 |
6786791 |
for (int i = 0; i < len; i++) |
301 |
|
{ |
302 |
6757294 |
char c = seq.getCharAt(i); |
303 |
6757294 |
if (isNucleotide(c)) |
304 |
|
{ |
305 |
1271911 |
ntCount++; |
306 |
|
} |
307 |
5485383 |
else if (!isGap(c)) |
308 |
|
{ |
309 |
1046759 |
aaCount++; |
310 |
|
} |
311 |
|
} |
312 |
|
} |
313 |
|
|
314 |
|
|
315 |
|
|
316 |
|
|
317 |
|
|
318 |
1654 |
if (ntCount * 100 > EIGHTY_FIVE * (ntCount + aaCount)) |
319 |
|
{ |
320 |
446 |
return true; |
321 |
|
} |
322 |
|
else |
323 |
|
{ |
324 |
1208 |
return false; |
325 |
|
} |
326 |
|
|
327 |
|
} |
328 |
|
|
329 |
|
|
330 |
|
|
331 |
|
|
332 |
|
@param |
333 |
|
@return |
334 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (12) |
Complexity: 8 |
Complexity Density: 0.8 |
|
335 |
16057096 |
public static boolean isNucleotide(char c)... |
336 |
|
{ |
337 |
16058492 |
if ('a' <= c && c <= 'z') |
338 |
|
{ |
339 |
126764 |
c -= TO_UPPER_CASE; |
340 |
|
} |
341 |
|
|
342 |
16059206 |
switch (c) |
343 |
|
{ |
344 |
638152 |
case 'A': |
345 |
374146 |
case 'C': |
346 |
447890 |
case 'G': |
347 |
600320 |
case 'T': |
348 |
12947 |
case 'U': |
349 |
2070388 |
return true; |
350 |
|
} |
351 |
14185862 |
return false; |
352 |
|
} |
353 |
|
|
354 |
|
|
355 |
|
|
356 |
|
|
357 |
|
|
358 |
|
@param |
359 |
|
@param |
360 |
|
@return |
361 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (16) |
Complexity: 6 |
Complexity Density: 0.75 |
|
362 |
21 |
public static boolean isNucleotideSequence(String s, boolean allowGaps)... |
363 |
|
{ |
364 |
21 |
if (s == null) |
365 |
|
{ |
366 |
1 |
return false; |
367 |
|
} |
368 |
83 |
for (int i = 0; i < s.length(); i++) |
369 |
|
{ |
370 |
69 |
char c = s.charAt(i); |
371 |
69 |
if (!isNucleotide(c)) |
372 |
|
{ |
373 |
11 |
if (!allowGaps || !isGap(c)) |
374 |
|
{ |
375 |
6 |
return false; |
376 |
|
} |
377 |
|
} |
378 |
|
} |
379 |
14 |
return true; |
380 |
|
} |
381 |
|
|
382 |
|
|
383 |
|
|
384 |
|
|
385 |
|
@param |
386 |
|
@return |
387 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (10) |
Complexity: 2 |
Complexity Density: 0.25 |
|
388 |
17 |
public static boolean isNucleotide(SequenceI[][] seqs)... |
389 |
|
{ |
390 |
17 |
if (seqs == null) |
391 |
|
{ |
392 |
1 |
return false; |
393 |
|
} |
394 |
16 |
List<SequenceI> flattened = new ArrayList<SequenceI>(); |
395 |
16 |
for (SequenceI[] ss : seqs) |
396 |
|
{ |
397 |
29 |
for (SequenceI s : ss) |
398 |
|
{ |
399 |
54 |
flattened.add(s); |
400 |
|
} |
401 |
|
} |
402 |
16 |
final SequenceI[] oneDArray = flattened |
403 |
|
.toArray(new SequenceI[flattened.size()]); |
404 |
16 |
return isNucleotide(oneDArray); |
405 |
|
} |
406 |
|
|
407 |
|
|
408 |
|
|
409 |
|
|
410 |
|
|
411 |
|
@param |
412 |
|
|
413 |
|
@param |
414 |
|
|
415 |
|
@param |
416 |
|
|
417 |
|
@return |
418 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (5) |
Complexity: 2 |
Complexity Density: 0.67 |
|
419 |
18589 |
public static boolean isSameResidue(char c1, char c2,... |
420 |
|
boolean caseSensitive) |
421 |
|
{ |
422 |
18589 |
if (caseSensitive) |
423 |
|
{ |
424 |
3 |
return (c1 == c2); |
425 |
|
} |
426 |
|
else |
427 |
|
{ |
428 |
18586 |
return Character.toUpperCase(c1) == Character.toUpperCase(c2); |
429 |
|
} |
430 |
|
} |
431 |
|
} |