1 |
|
|
2 |
|
|
3 |
|
|
4 |
|
|
5 |
|
|
6 |
|
|
7 |
|
|
8 |
|
|
9 |
|
|
10 |
|
|
11 |
|
|
12 |
|
|
13 |
|
|
14 |
|
|
15 |
|
|
16 |
|
|
17 |
|
|
18 |
|
|
19 |
|
|
20 |
|
|
21 |
|
package jalview.io; |
22 |
|
|
23 |
|
import java.io.IOException; |
24 |
|
import java.util.StringTokenizer; |
25 |
|
|
26 |
|
import jalview.analysis.scoremodels.ScoreMatrix; |
27 |
|
import jalview.analysis.scoremodels.ScoreModels; |
28 |
|
import jalview.datamodel.SequenceI; |
29 |
|
|
30 |
|
|
31 |
|
|
32 |
|
|
33 |
|
|
34 |
|
|
35 |
|
|
36 |
|
|
37 |
|
|
38 |
|
|
39 |
|
|
40 |
|
|
41 |
|
|
42 |
|
|
43 |
|
|
44 |
|
|
45 |
|
|
46 |
|
|
47 |
|
|
48 |
|
|
49 |
|
|
50 |
|
|
51 |
|
|
52 |
|
|
|
|
| 96% |
Uncovered Elements: 8 (198) |
Complexity: 52 |
Complexity Density: 0.4 |
|
53 |
|
public class ScoreMatrixFile extends AlignFile |
54 |
|
implements AlignmentFileReaderI |
55 |
|
{ |
56 |
|
|
57 |
|
public static final String SCOREMATRIX = "SCOREMATRIX"; |
58 |
|
|
59 |
|
private static final String DELIMITERS = " ,\t"; |
60 |
|
|
61 |
|
private static final String COMMENT_CHAR = "#"; |
62 |
|
|
63 |
|
private String matrixName; |
64 |
|
|
65 |
|
|
66 |
|
|
67 |
|
|
68 |
|
boolean isLowerDiagonalOnly; |
69 |
|
|
70 |
|
|
71 |
|
|
72 |
|
|
73 |
|
boolean hasGuideColumn; |
74 |
|
|
75 |
|
|
76 |
|
|
77 |
|
|
78 |
|
@param |
79 |
|
@throws |
80 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
81 |
376 |
public ScoreMatrixFile(FileParse source) throws IOException... |
82 |
|
{ |
83 |
376 |
super(false, source); |
84 |
|
} |
85 |
|
|
|
|
| 0% |
Uncovered Elements: 1 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
86 |
0 |
@Override... |
87 |
|
public String print(SequenceI[] sqs, boolean jvsuffix) |
88 |
|
{ |
89 |
0 |
return null; |
90 |
|
} |
91 |
|
|
92 |
|
|
93 |
|
|
94 |
|
|
95 |
|
|
96 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (2) |
Complexity: 1 |
Complexity Density: 0.5 |
|
97 |
1 |
@Override... |
98 |
|
public void parse() throws IOException |
99 |
|
{ |
100 |
1 |
ScoreMatrix sm = parseMatrix(); |
101 |
|
|
102 |
1 |
ScoreModels.getInstance().registerScoreModel(sm); |
103 |
|
} |
104 |
|
|
105 |
|
|
106 |
|
|
107 |
|
|
108 |
|
|
109 |
|
|
110 |
|
@return |
111 |
|
@throws |
112 |
|
|
|
|
| 97.3% |
Uncovered Elements: 2 (73) |
Complexity: 14 |
Complexity Density: 0.27 |
|
113 |
376 |
public ScoreMatrix parseMatrix() throws IOException... |
114 |
|
{ |
115 |
376 |
ScoreMatrix sm = null; |
116 |
376 |
int lineNo = 0; |
117 |
376 |
String name = null; |
118 |
376 |
char[] alphabet = null; |
119 |
376 |
float[][] scores = null; |
120 |
376 |
int size = 0; |
121 |
376 |
int row = 0; |
122 |
376 |
String err = null; |
123 |
376 |
String data; |
124 |
376 |
isLowerDiagonalOnly = false; |
125 |
|
|
126 |
? |
while ((data = nextLine()) != null) |
127 |
|
{ |
128 |
9516 |
lineNo++; |
129 |
9516 |
data = data.trim(); |
130 |
9516 |
if (data.startsWith(COMMENT_CHAR) || data.length() == 0) |
131 |
|
{ |
132 |
3173 |
continue; |
133 |
|
} |
134 |
|
|
135 |
6342 |
if (data.regionMatches(true, 0, SCOREMATRIX, 0, SCOREMATRIX.length())) |
136 |
|
{ |
137 |
|
|
138 |
|
|
139 |
|
|
140 |
|
|
141 |
369 |
if (name != null) |
142 |
|
{ |
143 |
1 |
throw new FileFormatException( |
144 |
|
"Error: 'ScoreMatrix' repeated in file at line " |
145 |
|
+ lineNo); |
146 |
|
} |
147 |
368 |
StringTokenizer nameLine = new StringTokenizer(data, DELIMITERS); |
148 |
368 |
if (nameLine.countTokens() < 2) |
149 |
|
{ |
150 |
1 |
err = "Format error: expected 'ScoreMatrix <name>', found '" |
151 |
|
+ data + "' at line " + lineNo; |
152 |
1 |
throw new FileFormatException(err); |
153 |
|
} |
154 |
367 |
nameLine.nextToken(); |
155 |
367 |
name = nameLine.nextToken(); |
156 |
367 |
name = data.substring(1).substring(data.substring(1).indexOf(name)); |
157 |
367 |
continue; |
158 |
|
} |
159 |
5974 |
else if (data.startsWith("H ") && name == null) |
160 |
|
{ |
161 |
|
|
162 |
|
|
163 |
|
|
164 |
7 |
return parseAAIndexFormat(lineNo, data); |
165 |
|
} |
166 |
5967 |
else if (name == null) |
167 |
|
{ |
168 |
1 |
err = "Format error: 'ScoreMatrix <name>' should be the first non-comment line"; |
169 |
1 |
throw new FileFormatException(err); |
170 |
|
} |
171 |
|
|
172 |
|
|
173 |
|
|
174 |
|
|
175 |
|
|
176 |
5966 |
if (alphabet == null) |
177 |
|
{ |
178 |
366 |
StringTokenizer columnHeadings = new StringTokenizer(data, |
179 |
|
DELIMITERS); |
180 |
366 |
size = columnHeadings.countTokens(); |
181 |
366 |
alphabet = new char[size]; |
182 |
366 |
int col = 0; |
183 |
5965 |
while (columnHeadings.hasMoreTokens()) |
184 |
|
{ |
185 |
5601 |
alphabet[col++] = columnHeadings.nextToken().charAt(0); |
186 |
|
} |
187 |
366 |
scores = new float[size][]; |
188 |
366 |
continue; |
189 |
|
} |
190 |
|
|
191 |
|
|
192 |
|
|
193 |
|
|
194 |
5600 |
if (row >= size) |
195 |
|
{ |
196 |
1 |
err = "Unexpected extra input line in score model file: '" + data |
197 |
|
+ "'"; |
198 |
1 |
throw new FileFormatException(err); |
199 |
|
} |
200 |
|
|
201 |
5599 |
parseValues(data, lineNo, scores, row, alphabet); |
202 |
5590 |
row++; |
203 |
|
} |
204 |
|
|
205 |
|
|
206 |
|
|
207 |
|
|
208 |
357 |
if (row < size) |
209 |
|
{ |
210 |
1 |
err = String.format( |
211 |
|
"Expected %d rows of score data in score matrix but only found %d", |
212 |
|
size, row); |
213 |
1 |
throw new FileFormatException(err); |
214 |
|
} |
215 |
|
|
216 |
|
|
217 |
|
|
218 |
|
|
219 |
356 |
sm = new ScoreMatrix(name, alphabet, scores); |
220 |
356 |
matrixName = name; |
221 |
|
|
222 |
356 |
return sm; |
223 |
|
} |
224 |
|
|
225 |
|
|
226 |
|
|
227 |
|
|
228 |
|
|
229 |
|
@param |
230 |
|
@param |
231 |
|
@return |
232 |
|
@throws |
233 |
|
|
|
|
| 94.7% |
Uncovered Elements: 2 (38) |
Complexity: 7 |
Complexity Density: 0.27 |
|
234 |
7 |
protected ScoreMatrix parseAAIndexFormat(int lineNo, String data)... |
235 |
|
throws IOException |
236 |
|
{ |
237 |
7 |
String name = data.substring(2).trim(); |
238 |
7 |
String description = null; |
239 |
|
|
240 |
7 |
float[][] scores = null; |
241 |
7 |
char[] alphabet = null; |
242 |
7 |
int row = 0; |
243 |
7 |
int size = 0; |
244 |
|
|
245 |
? |
while ((data = nextLine()) != null) |
246 |
|
{ |
247 |
54 |
lineNo++; |
248 |
54 |
data = data.trim(); |
249 |
54 |
if (skipAAindexLine(data)) |
250 |
|
{ |
251 |
11 |
continue; |
252 |
|
} |
253 |
43 |
if (data.startsWith("D ")) |
254 |
|
{ |
255 |
2 |
description = data.substring(2).trim(); |
256 |
|
} |
257 |
41 |
else if (data.startsWith("M ")) |
258 |
|
{ |
259 |
6 |
alphabet = parseAAindexRowsColumns(lineNo, data); |
260 |
5 |
size = alphabet.length; |
261 |
5 |
scores = new float[size][size]; |
262 |
|
} |
263 |
35 |
else if (scores == null) |
264 |
|
{ |
265 |
1 |
throw new FileFormatException( |
266 |
|
"No alphabet specified in matrix file"); |
267 |
|
} |
268 |
34 |
else if (row >= size) |
269 |
|
{ |
270 |
2 |
throw new FileFormatException("Too many data rows in matrix file"); |
271 |
|
} |
272 |
|
else |
273 |
|
{ |
274 |
32 |
parseValues(data, lineNo, scores, row, alphabet); |
275 |
31 |
row++; |
276 |
|
} |
277 |
|
} |
278 |
|
|
279 |
2 |
ScoreMatrix sm = new ScoreMatrix(name, description, alphabet, scores); |
280 |
2 |
matrixName = name; |
281 |
|
|
282 |
2 |
return sm; |
283 |
|
} |
284 |
|
|
285 |
|
|
286 |
|
|
287 |
|
|
288 |
|
|
289 |
|
|
290 |
|
|
291 |
|
@param |
292 |
|
|
293 |
|
@param |
294 |
|
@param |
295 |
|
|
296 |
|
@param |
297 |
|
|
298 |
|
@param |
299 |
|
@return |
300 |
|
@throws |
301 |
|
|
302 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (53) |
Complexity: 15 |
Complexity Density: 0.45 |
|
303 |
5631 |
protected void parseValues(String data, int lineNo, float[][] scores,... |
304 |
|
int row, char[] alphabet) throws FileFormatException |
305 |
|
{ |
306 |
5630 |
String err; |
307 |
5631 |
int size = alphabet.length; |
308 |
5630 |
StringTokenizer scoreLine = new StringTokenizer(data, DELIMITERS); |
309 |
|
|
310 |
5629 |
int tokenCount = scoreLine.countTokens(); |
311 |
|
|
312 |
|
|
313 |
|
|
314 |
|
|
315 |
|
|
316 |
5629 |
if (row == 0) |
317 |
|
{ |
318 |
371 |
if (data.startsWith(String.valueOf(alphabet[0]))) |
319 |
|
{ |
320 |
363 |
hasGuideColumn = true; |
321 |
|
} |
322 |
371 |
if (tokenCount == (hasGuideColumn ? 2 : 1)) |
323 |
|
{ |
324 |
6 |
isLowerDiagonalOnly = true; |
325 |
|
} |
326 |
|
} |
327 |
|
|
328 |
5631 |
if (hasGuideColumn) |
329 |
|
{ |
330 |
|
|
331 |
|
|
332 |
|
|
333 |
5597 |
String symbol = scoreLine.nextToken(); |
334 |
5597 |
if (symbol.length() > 1 || symbol.charAt(0) != alphabet[row]) |
335 |
|
{ |
336 |
2 |
err = String.format( |
337 |
|
"Error parsing score matrix at line %d, expected '%s' but found '%s'", |
338 |
|
lineNo, alphabet[row], symbol); |
339 |
2 |
throw new FileFormatException(err); |
340 |
|
} |
341 |
5595 |
tokenCount = scoreLine.countTokens(); |
342 |
|
} |
343 |
|
|
344 |
|
|
345 |
|
|
346 |
|
|
347 |
5628 |
if (isLowerDiagonalOnly && tokenCount != row + 1) |
348 |
|
{ |
349 |
1 |
err = String.format( |
350 |
|
"Expected %d scores at line %d: '%s' but found %d", row + 1, |
351 |
|
lineNo, data, tokenCount); |
352 |
1 |
throw new FileFormatException(err); |
353 |
|
} |
354 |
|
|
355 |
5628 |
if (!isLowerDiagonalOnly && tokenCount != size) |
356 |
|
{ |
357 |
4 |
err = String.format( |
358 |
|
"Expected %d scores at line %d: '%s' but found %d", size, |
359 |
|
lineNo, data, scoreLine.countTokens()); |
360 |
4 |
throw new FileFormatException(err); |
361 |
|
} |
362 |
|
|
363 |
|
|
364 |
|
|
365 |
|
|
366 |
|
|
367 |
5624 |
scores[row] = new float[size]; |
368 |
5623 |
int col = 0; |
369 |
5624 |
String value = null; |
370 |
119885 |
while (scoreLine.hasMoreTokens()) |
371 |
|
{ |
372 |
114299 |
try |
373 |
|
{ |
374 |
114291 |
value = scoreLine.nextToken(); |
375 |
114273 |
scores[row][col] = Float.valueOf(value); |
376 |
114253 |
if (isLowerDiagonalOnly) |
377 |
|
{ |
378 |
231 |
scores[col][row] = scores[row][col]; |
379 |
|
} |
380 |
114314 |
col++; |
381 |
|
} catch (NumberFormatException e) |
382 |
|
{ |
383 |
2 |
err = String.format("Invalid score value '%s' at line %d column %d", |
384 |
|
value, lineNo, col); |
385 |
2 |
throw new FileFormatException(err); |
386 |
|
} |
387 |
|
} |
388 |
|
} |
389 |
|
|
390 |
|
|
391 |
|
|
392 |
|
|
393 |
|
|
394 |
|
|
395 |
|
|
396 |
|
|
397 |
|
|
398 |
|
|
399 |
|
|
400 |
|
@param |
401 |
|
@param |
402 |
|
@return |
403 |
|
@throws |
404 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (11) |
Complexity: 3 |
Complexity Density: 0.33 |
|
405 |
6 |
protected char[] parseAAindexRowsColumns(int lineNo, String data)... |
406 |
|
throws FileFormatException |
407 |
|
{ |
408 |
6 |
String err = "Unexpected aaIndex score matrix data at line " + lineNo |
409 |
|
+ ": " + data; |
410 |
|
|
411 |
6 |
try |
412 |
|
{ |
413 |
6 |
String[] toks = data.split(","); |
414 |
6 |
String rowsAlphabet = toks[0].split("=")[1].trim(); |
415 |
6 |
String colsAlphabet = toks[1].split("=")[1].trim(); |
416 |
6 |
if (!rowsAlphabet.equals(colsAlphabet)) |
417 |
|
{ |
418 |
1 |
throw new FileFormatException("rows != cols"); |
419 |
|
} |
420 |
5 |
return rowsAlphabet.toCharArray(); |
421 |
|
} catch (Throwable t) |
422 |
|
{ |
423 |
1 |
throw new FileFormatException(err + " " + t.getMessage()); |
424 |
|
} |
425 |
|
} |
426 |
|
|
427 |
|
|
428 |
|
|
429 |
|
|
430 |
|
|
431 |
|
@param |
432 |
|
@return |
433 |
|
|
|
|
| 77.8% |
Uncovered Elements: 2 (9) |
Complexity: 9 |
Complexity Density: 1.8 |
|
434 |
54 |
protected boolean skipAAindexLine(String data)... |
435 |
|
{ |
436 |
54 |
if (data.startsWith(COMMENT_CHAR) || data.length() == 0) |
437 |
|
{ |
438 |
0 |
return true; |
439 |
|
} |
440 |
54 |
if (data.startsWith("*") || data.startsWith("R ") |
441 |
|
|| data.startsWith("A ") || data.startsWith("T ") |
442 |
|
|| data.startsWith("J ") || data.startsWith("//")) |
443 |
|
{ |
444 |
11 |
return true; |
445 |
|
} |
446 |
43 |
return false; |
447 |
|
} |
448 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
449 |
2 |
public String getMatrixName()... |
450 |
|
{ |
451 |
2 |
return matrixName; |
452 |
|
} |
453 |
|
} |