1 |
|
|
2 |
|
|
3 |
|
|
4 |
|
|
5 |
|
|
6 |
|
|
7 |
|
|
8 |
|
|
9 |
|
|
10 |
|
|
11 |
|
|
12 |
|
|
13 |
|
|
14 |
|
|
15 |
|
|
16 |
|
|
17 |
|
|
18 |
|
|
19 |
|
|
20 |
|
|
21 |
|
package jalview.io; |
22 |
|
|
23 |
|
import java.io.File; |
24 |
|
import java.io.IOException; |
25 |
|
|
26 |
|
|
27 |
|
|
28 |
|
|
29 |
|
@author |
30 |
|
@version |
31 |
|
|
|
|
| 75.7% |
Uncovered Elements: 69 (284) |
Complexity: 84 |
Complexity Density: 0.48 |
|
32 |
|
public class IdentifyFile |
33 |
|
{ |
34 |
|
|
|
|
| 66.7% |
Uncovered Elements: 1 (3) |
Complexity: 2 |
Complexity Density: 2 |
|
35 |
2 |
public FileFormatI identify(Object file, DataSourceType protocol) throws FileFormatException... |
36 |
|
{ |
37 |
|
|
38 |
2 |
return (file instanceof File ? identify((File) file, protocol) : identify((String) file, protocol)); |
39 |
|
|
40 |
|
} |
41 |
|
|
|
|
| 0% |
Uncovered Elements: 16 (16) |
Complexity: 4 |
Complexity Density: 0.33 |
|
42 |
0 |
public FileFormatI identify(File file, DataSourceType sourceType)... |
43 |
|
throws FileFormatException |
44 |
|
{ |
45 |
|
|
46 |
0 |
String emessage = "UNIDENTIFIED FILE PARSING ERROR"; |
47 |
0 |
FileParse parser = null; |
48 |
0 |
try |
49 |
|
{ |
50 |
0 |
parser = new FileParse(file, sourceType); |
51 |
0 |
if (parser.isValid()) |
52 |
|
{ |
53 |
0 |
return identify(parser); |
54 |
|
} |
55 |
|
} catch (Exception e) |
56 |
|
{ |
57 |
0 |
System.err.println("Error whilst identifying " + file); |
58 |
0 |
e.printStackTrace(System.err); |
59 |
0 |
emessage = e.getMessage(); |
60 |
|
} |
61 |
0 |
if (parser != null) |
62 |
|
{ |
63 |
0 |
throw new FileFormatException(parser.errormessage); |
64 |
|
} |
65 |
0 |
throw new FileFormatException(emessage); |
66 |
|
} |
67 |
|
|
68 |
|
|
69 |
|
|
70 |
|
|
71 |
|
|
72 |
|
|
73 |
|
|
74 |
|
@param |
75 |
|
@param |
76 |
|
@return |
77 |
|
@throws |
78 |
|
|
|
|
| 43.8% |
Uncovered Elements: 9 (16) |
Complexity: 4 |
Complexity Density: 0.33 |
|
79 |
132 |
public FileFormatI identify(String file, DataSourceType sourceType)... |
80 |
|
throws FileFormatException |
81 |
|
{ |
82 |
132 |
String emessage = "UNIDENTIFIED FILE PARSING ERROR"; |
83 |
132 |
FileParse parser = null; |
84 |
132 |
try |
85 |
|
{ |
86 |
132 |
parser = new FileParse(file, sourceType); |
87 |
130 |
if (parser.isValid()) |
88 |
|
{ |
89 |
130 |
return identify(parser); |
90 |
|
} |
91 |
|
} catch (Exception e) |
92 |
|
{ |
93 |
0 |
System.err.println("Error whilst identifying " + file); |
94 |
0 |
e.printStackTrace(System.err); |
95 |
0 |
emessage = e.getMessage(); |
96 |
|
} |
97 |
0 |
if (parser != null) |
98 |
|
{ |
99 |
0 |
throw new FileFormatException(parser.errormessage); |
100 |
|
} |
101 |
0 |
throw new FileFormatException(emessage); |
102 |
|
} |
103 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
104 |
136 |
public FileFormatI identify(FileParse source) throws FileFormatException... |
105 |
|
{ |
106 |
136 |
return identify(source, true); |
107 |
|
|
108 |
|
} |
109 |
|
|
|
|
| 0% |
Uncovered Elements: 2 (2) |
Complexity: 1 |
Complexity Density: 0.5 |
|
110 |
0 |
public FileFormatI identify(AlignmentFileReaderI file,... |
111 |
|
boolean closeSource) throws IOException |
112 |
|
{ |
113 |
0 |
FileParse fp = new FileParse(file.getInFile(), |
114 |
|
file.getDataSourceType()); |
115 |
0 |
return identify(fp, closeSource); |
116 |
|
} |
117 |
|
|
118 |
|
|
119 |
|
|
120 |
|
|
121 |
|
|
122 |
|
@param |
123 |
|
@param |
124 |
|
@return |
125 |
|
@throws |
126 |
|
|
|
|
| 87.7% |
Uncovered Elements: 25 (203) |
Complexity: 60 |
Complexity Density: 0.49 |
|
127 |
136 |
public FileFormatI identify(FileParse source, boolean closeSource)... |
128 |
|
throws FileFormatException |
129 |
|
{ |
130 |
136 |
FileFormatI reply = FileFormat.Pfam; |
131 |
136 |
String data; |
132 |
136 |
int bytesRead = 0; |
133 |
136 |
int trimmedLength = 0; |
134 |
136 |
boolean lineswereskipped = false; |
135 |
136 |
boolean isBinary = false; |
136 |
|
|
137 |
|
|
138 |
136 |
try |
139 |
|
{ |
140 |
136 |
if (!closeSource) |
141 |
|
{ |
142 |
0 |
source.mark(); |
143 |
|
} |
144 |
136 |
boolean aaIndexHeaderRead = false; |
145 |
|
|
146 |
? |
while ((data = source.nextLine()) != null) |
147 |
|
{ |
148 |
240 |
bytesRead += data.length(); |
149 |
240 |
trimmedLength += data.trim().length(); |
150 |
240 |
if (!lineswereskipped) |
151 |
|
{ |
152 |
7230 |
for (int i = 0; !isBinary && i < data.length(); i++) |
153 |
|
{ |
154 |
7094 |
char c = data.charAt(i); |
155 |
7094 |
isBinary = (c < 32 && c != '\t' && c != '\n' && c != '\r' |
156 |
|
&& c != 5 && c != 27); |
157 |
|
|
158 |
|
|
159 |
|
} |
160 |
|
} |
161 |
240 |
if (isBinary) |
162 |
|
{ |
163 |
|
|
164 |
|
|
165 |
17 |
if (source.inFile != null) |
166 |
|
{ |
167 |
17 |
String fileStr = source.inFile.getName(); |
168 |
|
|
169 |
17 |
if (fileStr.lastIndexOf(".jar") > -1 |
170 |
|
|| fileStr.lastIndexOf(".zip") > -1) |
171 |
|
{ |
172 |
6 |
reply = FileFormat.Jalview; |
173 |
|
} |
174 |
|
} |
175 |
17 |
if (!lineswereskipped && data.startsWith("PK")) |
176 |
|
{ |
177 |
17 |
reply = FileFormat.Jalview; |
178 |
17 |
break; |
179 |
|
} |
180 |
|
} |
181 |
223 |
data = data.toUpperCase(); |
182 |
|
|
183 |
223 |
if (data.startsWith(ScoreMatrixFile.SCOREMATRIX)) |
184 |
|
{ |
185 |
1 |
reply = FileFormat.ScoreMatrix; |
186 |
1 |
break; |
187 |
|
} |
188 |
222 |
if (data.startsWith("H ") && !aaIndexHeaderRead) |
189 |
|
{ |
190 |
1 |
aaIndexHeaderRead = true; |
191 |
|
} |
192 |
222 |
if (data.startsWith("D ") && aaIndexHeaderRead) |
193 |
|
{ |
194 |
1 |
reply = FileFormat.ScoreMatrix; |
195 |
1 |
break; |
196 |
|
} |
197 |
221 |
if (data.startsWith("##GFF-VERSION")) |
198 |
|
{ |
199 |
|
|
200 |
4 |
reply = FileFormat.Features; |
201 |
4 |
break; |
202 |
|
} |
203 |
217 |
if (looksLikeFeatureData(data)) |
204 |
|
{ |
205 |
4 |
reply = FileFormat.Features; |
206 |
4 |
break; |
207 |
|
} |
208 |
213 |
if (data.indexOf("# STOCKHOLM") > -1) |
209 |
|
{ |
210 |
7 |
reply = FileFormat.Stockholm; |
211 |
7 |
break; |
212 |
|
} |
213 |
206 |
if (data.indexOf("_ENTRY.ID") > -1 |
214 |
|
|| data.indexOf("_AUDIT_AUTHOR.NAME") > -1 |
215 |
|
|| data.indexOf("_ATOM_SITE.") > -1) |
216 |
|
{ |
217 |
1 |
reply = FileFormat.MMCif; |
218 |
1 |
break; |
219 |
|
} |
220 |
|
|
221 |
205 |
if (data.startsWith(">")) |
222 |
|
{ |
223 |
|
|
224 |
76 |
boolean checkPIR = false, starterm = false; |
225 |
76 |
if ((data.indexOf(">P1;") > -1) || (data.indexOf(">DL;") > -1)) |
226 |
|
{ |
227 |
|
|
228 |
1 |
checkPIR = true; |
229 |
1 |
reply = FileFormat.PIR; |
230 |
|
} |
231 |
|
|
232 |
76 |
data = source.nextLine(); |
233 |
|
|
234 |
76 |
if (data.indexOf(">") > -1) |
235 |
|
{ |
236 |
1 |
reply = FileFormat.BLC; |
237 |
|
} |
238 |
|
else |
239 |
|
{ |
240 |
|
|
241 |
75 |
String data1 = source.nextLine(); |
242 |
75 |
String data2 = source.nextLine(); |
243 |
75 |
int c1; |
244 |
75 |
if (checkPIR) |
245 |
|
{ |
246 |
1 |
starterm = (data1 != null && data1.indexOf("*") > -1) |
247 |
|
|| (data2 != null && data2.indexOf("*") > -1); |
248 |
|
} |
249 |
? |
if (data2 != null && (c1 = data.indexOf("*")) > -1) |
250 |
|
{ |
251 |
1 |
if (c1 == 0 && c1 == data2.indexOf("*")) |
252 |
|
{ |
253 |
0 |
reply = FileFormat.BLC; |
254 |
|
} |
255 |
|
else |
256 |
|
{ |
257 |
1 |
reply = FileFormat.Fasta; |
258 |
|
|
259 |
|
|
260 |
|
} |
261 |
|
|
262 |
|
} |
263 |
|
else |
264 |
|
{ |
265 |
74 |
reply = FileFormat.Fasta; |
266 |
|
|
267 |
|
|
268 |
|
|
269 |
74 |
if (!checkPIR) |
270 |
|
{ |
271 |
73 |
break; |
272 |
|
} |
273 |
|
} |
274 |
|
} |
275 |
|
|
276 |
|
|
277 |
|
|
278 |
|
|
279 |
|
|
280 |
|
|
281 |
3 |
if (checkPIR) |
282 |
|
{ |
283 |
1 |
String dta = null; |
284 |
1 |
if (!starterm) |
285 |
|
{ |
286 |
1 |
do |
287 |
|
{ |
288 |
1 |
try |
289 |
|
{ |
290 |
1 |
dta = source.nextLine(); |
291 |
|
} catch (IOException ex) |
292 |
|
{ |
293 |
|
} |
294 |
1 |
if (dta != null && dta.indexOf("*") > -1) |
295 |
|
{ |
296 |
1 |
starterm = true; |
297 |
|
} |
298 |
1 |
} while (dta != null && !starterm); |
299 |
|
} |
300 |
1 |
if (starterm) |
301 |
|
{ |
302 |
1 |
reply = FileFormat.PIR; |
303 |
1 |
break; |
304 |
|
} |
305 |
|
else |
306 |
|
{ |
307 |
0 |
reply = FileFormat.Fasta; |
308 |
|
} |
309 |
|
} |
310 |
|
|
311 |
2 |
break; |
312 |
|
} |
313 |
129 |
if (data.indexOf("{\"") > -1) |
314 |
|
{ |
315 |
1 |
reply = FileFormat.Json; |
316 |
1 |
break; |
317 |
|
} |
318 |
128 |
int lessThan = data.indexOf("<"); |
319 |
128 |
if ((lessThan > -1)) |
320 |
|
|
321 |
|
{ |
322 |
10 |
String upper = data.toUpperCase(); |
323 |
10 |
if (upper.substring(lessThan).startsWith("<HTML")) |
324 |
|
{ |
325 |
1 |
reply = FileFormat.Html; |
326 |
1 |
break; |
327 |
|
} |
328 |
9 |
if (upper.substring(lessThan).startsWith("<RNAML")) |
329 |
|
{ |
330 |
2 |
reply = FileFormat.Rnaml; |
331 |
2 |
break; |
332 |
|
} |
333 |
|
} |
334 |
|
|
335 |
125 |
if ((data.length() < 1) || (data.indexOf("#") == 0)) |
336 |
|
{ |
337 |
13 |
lineswereskipped = true; |
338 |
13 |
continue; |
339 |
|
} |
340 |
|
|
341 |
112 |
if (data.indexOf("PILEUP") > -1) |
342 |
|
{ |
343 |
1 |
reply = FileFormat.Pileup; |
344 |
|
|
345 |
1 |
break; |
346 |
|
} |
347 |
|
|
348 |
111 |
if ((data.indexOf("//") == 0) || ((data.indexOf("!!") > -1) && (data |
349 |
|
.indexOf("!!") < data.indexOf("_MULTIPLE_ALIGNMENT ")))) |
350 |
|
{ |
351 |
1 |
reply = FileFormat.MSF; |
352 |
|
|
353 |
1 |
break; |
354 |
|
} |
355 |
110 |
else if (data.indexOf("CLUSTAL") > -1) |
356 |
|
{ |
357 |
1 |
reply = FileFormat.Clustal; |
358 |
|
|
359 |
1 |
break; |
360 |
|
} |
361 |
|
|
362 |
109 |
else if (data.indexOf("HEADER") == 0 || data.indexOf("ATOM") == 0) |
363 |
|
{ |
364 |
10 |
reply = FileFormat.PDB; |
365 |
10 |
break; |
366 |
|
} |
367 |
99 |
else if (data.matches("\\s*\\d+\\s+\\d+\\s*")) |
368 |
|
{ |
369 |
1 |
reply = FileFormat.Phylip; |
370 |
1 |
break; |
371 |
|
} |
372 |
|
else |
373 |
|
{ |
374 |
98 |
if (!lineswereskipped && looksLikeJnetData(data)) |
375 |
|
{ |
376 |
0 |
reply = FileFormat.Jnet; |
377 |
0 |
break; |
378 |
|
} |
379 |
|
} |
380 |
|
|
381 |
98 |
lineswereskipped = true; |
382 |
|
|
383 |
|
} |
384 |
136 |
if (closeSource) |
385 |
|
{ |
386 |
136 |
source.close(); |
387 |
|
} |
388 |
|
else |
389 |
|
{ |
390 |
0 |
source.reset(bytesRead); |
391 |
|
} |
392 |
|
} catch (Exception ex) |
393 |
|
{ |
394 |
0 |
System.err.println("File Identification failed!\n" + ex); |
395 |
0 |
throw new FileFormatException(source.errormessage); |
396 |
|
} |
397 |
136 |
if (trimmedLength == 0) |
398 |
|
{ |
399 |
0 |
System.err.println( |
400 |
|
"File Identification failed! - Empty file was read."); |
401 |
0 |
throw new FileFormatException("EMPTY DATA FILE"); |
402 |
|
} |
403 |
136 |
System.out.println("File format identified as " + reply.toString()); |
404 |
136 |
return reply; |
405 |
|
} |
406 |
|
|
407 |
|
|
408 |
|
|
409 |
|
|
410 |
|
@param |
411 |
|
@return |
412 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (5) |
Complexity: 1 |
Complexity Density: 0.2 |
|
413 |
13 |
protected boolean looksLikeJnetData(String data)... |
414 |
|
{ |
415 |
13 |
char firstChar = data.charAt(0); |
416 |
13 |
int colonPos = data.indexOf(":"); |
417 |
13 |
int commaPos = data.indexOf(","); |
418 |
13 |
boolean isJnet = firstChar != '*' && firstChar != ' ' && colonPos > -1 |
419 |
|
&& commaPos > -1 && colonPos < commaPos; |
420 |
|
|
421 |
13 |
return isJnet; |
422 |
|
} |
423 |
|
|
424 |
|
|
425 |
|
|
426 |
|
|
427 |
|
|
428 |
|
@param |
429 |
|
@return |
430 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (16) |
Complexity: 5 |
Complexity Density: 0.5 |
|
431 |
224 |
protected boolean looksLikeFeatureData(String data)... |
432 |
|
{ |
433 |
224 |
if (data == null) |
434 |
|
{ |
435 |
1 |
return false; |
436 |
|
} |
437 |
223 |
String[] columns = data.split("\t"); |
438 |
223 |
if (columns.length < 6) |
439 |
|
{ |
440 |
215 |
return false; |
441 |
|
} |
442 |
21 |
for (int col = 3; col < 5; col++) |
443 |
|
{ |
444 |
15 |
try |
445 |
|
{ |
446 |
15 |
Integer.parseInt(columns[col]); |
447 |
|
} catch (NumberFormatException e) |
448 |
|
{ |
449 |
2 |
return false; |
450 |
|
} |
451 |
|
} |
452 |
6 |
return true; |
453 |
|
} |
454 |
|
|
|
|
| 0% |
Uncovered Elements: 13 (13) |
Complexity: 6 |
Complexity Density: 0.67 |
|
455 |
0 |
public static void main(String[] args)... |
456 |
|
{ |
457 |
0 |
for (int i = 0; args != null && i < args.length; i++) |
458 |
|
{ |
459 |
0 |
IdentifyFile ider = new IdentifyFile(); |
460 |
0 |
FileFormatI type = null; |
461 |
0 |
try |
462 |
|
{ |
463 |
0 |
type = ider.identify(args[i], DataSourceType.FILE); |
464 |
|
} catch (FileFormatException e) |
465 |
|
{ |
466 |
0 |
System.err.println( |
467 |
|
String.format("Error '%s' identifying file type for %s", |
468 |
|
args[i], e.getMessage())); |
469 |
|
} |
470 |
0 |
System.out.println("Type of " + args[i] + " is " + type); |
471 |
|
} |
472 |
0 |
if (args == null || args.length == 0) |
473 |
|
{ |
474 |
0 |
System.err.println("Usage: <Filename> [<Filename> ...]"); |
475 |
|
} |
476 |
|
} |
477 |
|
|
478 |
|
|
479 |
|
} |