1 |
|
|
2 |
|
|
3 |
|
|
4 |
|
|
5 |
|
|
6 |
|
|
7 |
|
|
8 |
|
|
9 |
|
|
10 |
|
|
11 |
|
|
12 |
|
|
13 |
|
|
14 |
|
|
15 |
|
|
16 |
|
|
17 |
|
|
18 |
|
|
19 |
|
|
20 |
|
|
21 |
|
package jalview.io; |
22 |
|
|
23 |
|
import jalview.analysis.scoremodels.ScoreMatrix; |
24 |
|
import jalview.analysis.scoremodels.ScoreModels; |
25 |
|
import jalview.datamodel.SequenceI; |
26 |
|
|
27 |
|
import java.io.IOException; |
28 |
|
import java.util.StringTokenizer; |
29 |
|
|
30 |
|
|
31 |
|
|
32 |
|
|
33 |
|
|
34 |
|
|
35 |
|
|
36 |
|
|
37 |
|
|
38 |
|
|
39 |
|
|
40 |
|
|
41 |
|
|
42 |
|
|
43 |
|
|
44 |
|
|
45 |
|
|
46 |
|
|
47 |
|
|
48 |
|
|
49 |
|
|
50 |
|
|
51 |
|
|
52 |
|
|
|
|
| 96% |
Uncovered Elements: 8 (198) |
Complexity: 52 |
Complexity Density: 0.4 |
|
53 |
|
public class ScoreMatrixFile extends AlignFile |
54 |
|
implements AlignmentFileReaderI |
55 |
|
{ |
56 |
|
|
57 |
|
public static final String SCOREMATRIX = "SCOREMATRIX"; |
58 |
|
|
59 |
|
private static final String DELIMITERS = " ,\t"; |
60 |
|
|
61 |
|
private static final String COMMENT_CHAR = "#"; |
62 |
|
|
63 |
|
private String matrixName; |
64 |
|
|
65 |
|
|
66 |
|
|
67 |
|
|
68 |
|
boolean isLowerDiagonalOnly; |
69 |
|
|
70 |
|
|
71 |
|
|
72 |
|
|
73 |
|
boolean hasGuideColumn; |
74 |
|
|
75 |
|
|
76 |
|
|
77 |
|
|
78 |
|
@param |
79 |
|
@throws |
80 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
81 |
138 |
public ScoreMatrixFile(FileParse source) throws IOException... |
82 |
|
{ |
83 |
138 |
super(false, source); |
84 |
|
} |
85 |
|
|
|
|
| 0% |
Uncovered Elements: 1 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
86 |
0 |
@Override... |
87 |
|
public String print(SequenceI[] sqs, boolean jvsuffix) |
88 |
|
{ |
89 |
0 |
return null; |
90 |
|
} |
91 |
|
|
92 |
|
|
93 |
|
|
94 |
|
|
95 |
|
|
96 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (2) |
Complexity: 1 |
Complexity Density: 0.5 |
|
97 |
1 |
@Override... |
98 |
|
public void parse() throws IOException |
99 |
|
{ |
100 |
1 |
ScoreMatrix sm = parseMatrix(); |
101 |
|
|
102 |
1 |
ScoreModels.getInstance().registerScoreModel(sm); |
103 |
|
} |
104 |
|
|
105 |
|
|
106 |
|
|
107 |
|
|
108 |
|
|
109 |
|
|
110 |
|
@return |
111 |
|
@throws |
112 |
|
|
|
|
| 97.3% |
Uncovered Elements: 2 (73) |
Complexity: 14 |
Complexity Density: 0.27 |
|
113 |
138 |
public ScoreMatrix parseMatrix() throws IOException... |
114 |
|
{ |
115 |
138 |
ScoreMatrix sm = null; |
116 |
138 |
int lineNo = 0; |
117 |
138 |
String name = null; |
118 |
138 |
char[] alphabet = null; |
119 |
138 |
float[][] scores = null; |
120 |
138 |
int size = 0; |
121 |
138 |
int row = 0; |
122 |
138 |
String err = null; |
123 |
138 |
String data; |
124 |
138 |
isLowerDiagonalOnly = false; |
125 |
|
|
126 |
? |
while ((data = nextLine()) != null) |
127 |
|
{ |
128 |
3634 |
lineNo++; |
129 |
3634 |
data = data.trim(); |
130 |
3634 |
if (data.startsWith(COMMENT_CHAR) || data.length() == 0) |
131 |
|
{ |
132 |
1069 |
continue; |
133 |
|
} |
134 |
2565 |
if (data.toUpperCase().startsWith(SCOREMATRIX)) |
135 |
|
{ |
136 |
|
|
137 |
|
|
138 |
|
|
139 |
|
|
140 |
131 |
if (name != null) |
141 |
|
{ |
142 |
1 |
throw new FileFormatException( |
143 |
|
"Error: 'ScoreMatrix' repeated in file at line " |
144 |
|
+ lineNo); |
145 |
|
} |
146 |
130 |
StringTokenizer nameLine = new StringTokenizer(data, DELIMITERS); |
147 |
130 |
if (nameLine.countTokens() < 2) |
148 |
|
{ |
149 |
1 |
err = "Format error: expected 'ScoreMatrix <name>', found '" |
150 |
|
+ data + "' at line " + lineNo; |
151 |
1 |
throw new FileFormatException(err); |
152 |
|
} |
153 |
129 |
nameLine.nextToken(); |
154 |
129 |
name = nameLine.nextToken(); |
155 |
129 |
name = data.substring(1).substring(data.substring(1).indexOf(name)); |
156 |
129 |
continue; |
157 |
|
} |
158 |
2434 |
else if (data.startsWith("H ") && name == null) |
159 |
|
{ |
160 |
|
|
161 |
|
|
162 |
|
|
163 |
7 |
return parseAAIndexFormat(lineNo, data); |
164 |
|
} |
165 |
2427 |
else if (name == null) |
166 |
|
{ |
167 |
1 |
err = "Format error: 'ScoreMatrix <name>' should be the first non-comment line"; |
168 |
1 |
throw new FileFormatException(err); |
169 |
|
} |
170 |
|
|
171 |
|
|
172 |
|
|
173 |
|
|
174 |
|
|
175 |
2426 |
if (alphabet == null) |
176 |
|
{ |
177 |
128 |
StringTokenizer columnHeadings = new StringTokenizer(data, |
178 |
|
DELIMITERS); |
179 |
128 |
size = columnHeadings.countTokens(); |
180 |
128 |
alphabet = new char[size]; |
181 |
128 |
int col = 0; |
182 |
2432 |
while (columnHeadings.hasMoreTokens()) |
183 |
|
{ |
184 |
2304 |
alphabet[col++] = columnHeadings.nextToken().charAt(0); |
185 |
|
} |
186 |
128 |
scores = new float[size][]; |
187 |
128 |
continue; |
188 |
|
} |
189 |
|
|
190 |
|
|
191 |
|
|
192 |
|
|
193 |
2298 |
if (row >= size) |
194 |
|
{ |
195 |
1 |
err = "Unexpected extra input line in score model file: '" + data |
196 |
|
+ "'"; |
197 |
1 |
throw new FileFormatException(err); |
198 |
|
} |
199 |
|
|
200 |
2297 |
parseValues(data, lineNo, scores, row, alphabet); |
201 |
2289 |
row++; |
202 |
|
} |
203 |
|
|
204 |
|
|
205 |
|
|
206 |
|
|
207 |
119 |
if (row < size) |
208 |
|
{ |
209 |
1 |
err = String.format( |
210 |
|
"Expected %d rows of score data in score matrix but only found %d", |
211 |
|
size, row); |
212 |
1 |
throw new FileFormatException(err); |
213 |
|
} |
214 |
|
|
215 |
|
|
216 |
|
|
217 |
|
|
218 |
118 |
sm = new ScoreMatrix(name, alphabet, scores); |
219 |
118 |
matrixName = name; |
220 |
|
|
221 |
118 |
return sm; |
222 |
|
} |
223 |
|
|
224 |
|
|
225 |
|
|
226 |
|
|
227 |
|
|
228 |
|
@param |
229 |
|
@param |
230 |
|
@return |
231 |
|
@throws |
232 |
|
|
|
|
| 94.7% |
Uncovered Elements: 2 (38) |
Complexity: 7 |
Complexity Density: 0.27 |
|
233 |
7 |
protected ScoreMatrix parseAAIndexFormat(int lineNo, String data)... |
234 |
|
throws IOException |
235 |
|
{ |
236 |
7 |
String name = data.substring(2).trim(); |
237 |
7 |
String description = null; |
238 |
|
|
239 |
7 |
float[][] scores = null; |
240 |
7 |
char[] alphabet = null; |
241 |
7 |
int row = 0; |
242 |
7 |
int size = 0; |
243 |
|
|
244 |
? |
while ((data = nextLine()) != null) |
245 |
|
{ |
246 |
54 |
lineNo++; |
247 |
54 |
data = data.trim(); |
248 |
54 |
if (skipAAindexLine(data)) |
249 |
|
{ |
250 |
11 |
continue; |
251 |
|
} |
252 |
43 |
if (data.startsWith("D ")) |
253 |
|
{ |
254 |
2 |
description = data.substring(2).trim(); |
255 |
|
} |
256 |
41 |
else if (data.startsWith("M ")) |
257 |
|
{ |
258 |
6 |
alphabet = parseAAindexRowsColumns(lineNo, data); |
259 |
5 |
size = alphabet.length; |
260 |
5 |
scores = new float[size][size]; |
261 |
|
} |
262 |
35 |
else if (scores == null) |
263 |
|
{ |
264 |
1 |
throw new FileFormatException( |
265 |
|
"No alphabet specified in matrix file"); |
266 |
|
} |
267 |
34 |
else if (row >= size) |
268 |
|
{ |
269 |
2 |
throw new FileFormatException("Too many data rows in matrix file"); |
270 |
|
} |
271 |
|
else |
272 |
|
{ |
273 |
32 |
parseValues(data, lineNo, scores, row, alphabet); |
274 |
31 |
row++; |
275 |
|
} |
276 |
|
} |
277 |
|
|
278 |
2 |
ScoreMatrix sm = new ScoreMatrix(name, description, alphabet, scores); |
279 |
2 |
matrixName = name; |
280 |
|
|
281 |
2 |
return sm; |
282 |
|
} |
283 |
|
|
284 |
|
|
285 |
|
|
286 |
|
|
287 |
|
|
288 |
|
|
289 |
|
|
290 |
|
@param |
291 |
|
|
292 |
|
@param |
293 |
|
@param |
294 |
|
|
295 |
|
@param |
296 |
|
|
297 |
|
@param |
298 |
|
@return |
299 |
|
@throws |
300 |
|
|
301 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (53) |
Complexity: 15 |
Complexity Density: 0.45 |
|
302 |
2329 |
protected void parseValues(String data, int lineNo, float[][] scores,... |
303 |
|
int row, char[] alphabet) throws FileFormatException |
304 |
|
{ |
305 |
2329 |
String err; |
306 |
2329 |
int size = alphabet.length; |
307 |
2329 |
StringTokenizer scoreLine = new StringTokenizer(data, DELIMITERS); |
308 |
|
|
309 |
2329 |
int tokenCount = scoreLine.countTokens(); |
310 |
|
|
311 |
|
|
312 |
|
|
313 |
|
|
314 |
|
|
315 |
2329 |
if (row == 0) |
316 |
|
{ |
317 |
133 |
if (data.startsWith(String.valueOf(alphabet[0]))) |
318 |
|
{ |
319 |
125 |
hasGuideColumn = true; |
320 |
|
} |
321 |
133 |
if (tokenCount == (hasGuideColumn ? 2 : 1)) |
322 |
|
{ |
323 |
6 |
isLowerDiagonalOnly = true; |
324 |
|
} |
325 |
|
} |
326 |
|
|
327 |
2329 |
if (hasGuideColumn) |
328 |
|
{ |
329 |
|
|
330 |
|
|
331 |
|
|
332 |
2295 |
String symbol = scoreLine.nextToken(); |
333 |
2295 |
if (symbol.length() > 1 || symbol.charAt(0) != alphabet[row]) |
334 |
|
{ |
335 |
2 |
err = String.format( |
336 |
|
"Error parsing score matrix at line %d, expected '%s' but found '%s'", |
337 |
|
lineNo, alphabet[row], symbol); |
338 |
2 |
throw new FileFormatException(err); |
339 |
|
} |
340 |
2293 |
tokenCount = scoreLine.countTokens(); |
341 |
|
} |
342 |
|
|
343 |
|
|
344 |
|
|
345 |
|
|
346 |
2327 |
if (isLowerDiagonalOnly && tokenCount != row + 1) |
347 |
|
{ |
348 |
1 |
err = String.format( |
349 |
|
"Expected %d scores at line %d: '%s' but found %d", row + 1, |
350 |
|
lineNo, data, tokenCount); |
351 |
1 |
throw new FileFormatException(err); |
352 |
|
} |
353 |
|
|
354 |
2326 |
if (!isLowerDiagonalOnly && tokenCount != size) |
355 |
|
{ |
356 |
4 |
err = String.format( |
357 |
|
"Expected %d scores at line %d: '%s' but found %d", size, |
358 |
|
lineNo, data, scoreLine.countTokens()); |
359 |
4 |
throw new FileFormatException(err); |
360 |
|
} |
361 |
|
|
362 |
|
|
363 |
|
|
364 |
|
|
365 |
|
|
366 |
2322 |
scores[row] = new float[size]; |
367 |
2322 |
int col = 0; |
368 |
2322 |
String value = null; |
369 |
51588 |
while (scoreLine.hasMoreTokens()) |
370 |
|
{ |
371 |
49268 |
try |
372 |
|
{ |
373 |
49268 |
value = scoreLine.nextToken(); |
374 |
49268 |
scores[row][col] = Float.valueOf(value); |
375 |
49266 |
if (isLowerDiagonalOnly) |
376 |
|
{ |
377 |
231 |
scores[col][row] = scores[row][col]; |
378 |
|
} |
379 |
49266 |
col++; |
380 |
|
} catch (NumberFormatException e) |
381 |
|
{ |
382 |
2 |
err = String.format("Invalid score value '%s' at line %d column %d", |
383 |
|
value, lineNo, col); |
384 |
2 |
throw new FileFormatException(err); |
385 |
|
} |
386 |
|
} |
387 |
|
} |
388 |
|
|
389 |
|
|
390 |
|
|
391 |
|
|
392 |
|
|
393 |
|
|
394 |
|
|
395 |
|
|
396 |
|
|
397 |
|
|
398 |
|
|
399 |
|
@param |
400 |
|
@param |
401 |
|
@return |
402 |
|
@throws |
403 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (11) |
Complexity: 3 |
Complexity Density: 0.33 |
|
404 |
6 |
protected char[] parseAAindexRowsColumns(int lineNo, String data)... |
405 |
|
throws FileFormatException |
406 |
|
{ |
407 |
6 |
String err = "Unexpected aaIndex score matrix data at line " + lineNo |
408 |
|
+ ": " + data; |
409 |
|
|
410 |
6 |
try |
411 |
|
{ |
412 |
6 |
String[] toks = data.split(","); |
413 |
6 |
String rowsAlphabet = toks[0].split("=")[1].trim(); |
414 |
6 |
String colsAlphabet = toks[1].split("=")[1].trim(); |
415 |
6 |
if (!rowsAlphabet.equals(colsAlphabet)) |
416 |
|
{ |
417 |
1 |
throw new FileFormatException("rows != cols"); |
418 |
|
} |
419 |
5 |
return rowsAlphabet.toCharArray(); |
420 |
|
} catch (Throwable t) |
421 |
|
{ |
422 |
1 |
throw new FileFormatException(err + " " + t.getMessage()); |
423 |
|
} |
424 |
|
} |
425 |
|
|
426 |
|
|
427 |
|
|
428 |
|
|
429 |
|
|
430 |
|
@param |
431 |
|
@return |
432 |
|
|
|
|
| 77.8% |
Uncovered Elements: 2 (9) |
Complexity: 9 |
Complexity Density: 1.8 |
|
433 |
54 |
protected boolean skipAAindexLine(String data)... |
434 |
|
{ |
435 |
54 |
if (data.startsWith(COMMENT_CHAR) || data.length() == 0) |
436 |
|
{ |
437 |
0 |
return true; |
438 |
|
} |
439 |
54 |
if (data.startsWith("*") || data.startsWith("R ") |
440 |
|
|| data.startsWith("A ") || data.startsWith("T ") |
441 |
|
|| data.startsWith("J ") || data.startsWith("//")) |
442 |
|
{ |
443 |
11 |
return true; |
444 |
|
} |
445 |
43 |
return false; |
446 |
|
} |
447 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
448 |
2 |
public String getMatrixName()... |
449 |
|
{ |
450 |
2 |
return matrixName; |
451 |
|
} |
452 |
|
} |