1 |
|
|
2 |
|
|
3 |
|
|
4 |
|
|
5 |
|
|
6 |
|
|
7 |
|
|
8 |
|
|
9 |
|
|
10 |
|
|
11 |
|
|
12 |
|
|
13 |
|
|
14 |
|
|
15 |
|
|
16 |
|
|
17 |
|
|
18 |
|
|
19 |
|
|
20 |
|
|
21 |
|
package jalview.io; |
22 |
|
|
23 |
|
import java.io.File; |
24 |
|
import java.io.FileNotFoundException; |
25 |
|
import java.io.IOException; |
26 |
|
import java.util.Locale; |
27 |
|
|
28 |
|
import jalview.bin.Console; |
29 |
|
|
30 |
|
|
31 |
|
|
32 |
|
|
33 |
|
@author |
34 |
|
@version |
35 |
|
|
|
|
| 75.9% |
Uncovered Elements: 76 (315) |
Complexity: 96 |
Complexity Density: 0.49 |
|
36 |
|
public class IdentifyFile |
37 |
|
{ |
38 |
|
|
39 |
|
private static final String XMLHEADER = "<?XML VERSION=\"1.0\" ENCODING=\"UTF-8\" STANDALONE=\"YES\"?>"; |
40 |
|
|
|
|
| 0% |
Uncovered Elements: 3 (3) |
Complexity: 2 |
Complexity Density: 2 |
|
41 |
0 |
public FileFormatI identify(Object file, DataSourceType protocol)... |
42 |
|
throws FileFormatException, FileNotFoundException |
43 |
|
{ |
44 |
|
|
45 |
0 |
return (file instanceof File ? identify((File) file, protocol) |
46 |
|
: identify((String) file, protocol)); |
47 |
|
|
48 |
|
} |
49 |
|
|
|
|
| 46.7% |
Uncovered Elements: 8 (15) |
Complexity: 4 |
Complexity Density: 0.36 |
|
50 |
6 |
public FileFormatI identify(File file, DataSourceType sourceType)... |
51 |
|
throws FileFormatException |
52 |
|
{ |
53 |
|
|
54 |
6 |
String emessage = "UNIDENTIFIED FILE PARSING ERROR"; |
55 |
6 |
FileParse parser = null; |
56 |
6 |
try |
57 |
|
{ |
58 |
6 |
parser = new FileParse(file, sourceType); |
59 |
6 |
if (parser.isValid()) |
60 |
|
{ |
61 |
6 |
return identify(parser); |
62 |
|
} |
63 |
|
} catch (Exception e) |
64 |
|
{ |
65 |
0 |
Console.error("Error whilst identifying " + file, e); |
66 |
0 |
emessage = e.getMessage(); |
67 |
|
} |
68 |
0 |
if (parser != null) |
69 |
|
{ |
70 |
0 |
throw new FileFormatException(parser.errormessage); |
71 |
|
} |
72 |
0 |
throw new FileFormatException(emessage); |
73 |
|
} |
74 |
|
|
75 |
|
|
76 |
|
|
77 |
|
|
78 |
|
|
79 |
|
|
80 |
|
|
81 |
|
@param |
82 |
|
@param |
83 |
|
@return |
84 |
|
@throws |
85 |
|
|
|
|
| 33.3% |
Uncovered Elements: 16 (24) |
Complexity: 7 |
Complexity Density: 0.39 |
|
86 |
213 |
public FileFormatI identify(String file, DataSourceType sourceType)... |
87 |
|
throws FileFormatException, FileNotFoundException |
88 |
|
{ |
89 |
213 |
String emessage = "UNIDENTIFIED FILE PARSING ERROR"; |
90 |
213 |
FileParse parser = null; |
91 |
213 |
FileNotFoundException fnf = null; |
92 |
213 |
try |
93 |
|
{ |
94 |
213 |
parser = new FileParse(file, sourceType); |
95 |
213 |
if (parser.isValid()) |
96 |
|
{ |
97 |
213 |
return identify(parser); |
98 |
|
} |
99 |
|
} catch (FileNotFoundException e) |
100 |
|
{ |
101 |
0 |
fnf = e; |
102 |
0 |
emessage = "Could not find '" + file + "'"; |
103 |
0 |
Console.error("Could not find '" + file + "'", e); |
104 |
|
} catch (IOException e) |
105 |
|
{ |
106 |
0 |
Console.error("Error whilst trying to read " + file, e); |
107 |
|
} catch (Exception e) |
108 |
|
{ |
109 |
0 |
Console.error("Error whilst identifying " + file, e); |
110 |
0 |
emessage = e.getMessage(); |
111 |
|
} |
112 |
0 |
if (parser != null) |
113 |
|
{ |
114 |
0 |
throw new FileFormatException(parser.errormessage); |
115 |
|
} |
116 |
0 |
if (fnf != null) |
117 |
|
{ |
118 |
0 |
throw (fnf); |
119 |
|
} |
120 |
0 |
throw new FileFormatException(emessage); |
121 |
|
} |
122 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
123 |
219 |
public FileFormatI identify(FileParse source) throws FileFormatException... |
124 |
|
{ |
125 |
219 |
return identify(source, true); |
126 |
|
|
127 |
|
} |
128 |
|
|
|
|
| 0% |
Uncovered Elements: 2 (2) |
Complexity: 1 |
Complexity Density: 0.5 |
|
129 |
0 |
public FileFormatI identify(AlignmentFileReaderI file,... |
130 |
|
boolean closeSource) throws IOException |
131 |
|
{ |
132 |
0 |
FileParse fp = new FileParse(file.getInFile(), |
133 |
|
file.getDataSourceType()); |
134 |
0 |
return identify(fp, closeSource); |
135 |
|
} |
136 |
|
|
137 |
|
|
138 |
|
|
139 |
|
|
140 |
|
|
141 |
|
@param |
142 |
|
@param |
143 |
|
@return |
144 |
|
@throws |
145 |
|
|
|
|
| 86.7% |
Uncovered Elements: 30 (226) |
Complexity: 68 |
Complexity Density: 0.5 |
|
146 |
219 |
public FileFormatI identify(FileParse source, boolean closeSource)... |
147 |
|
throws FileFormatException |
148 |
|
{ |
149 |
219 |
FileFormatI reply = FileFormat.Pfam; |
150 |
219 |
String data; |
151 |
219 |
int bytesRead = 0; |
152 |
219 |
int trimmedLength = 0; |
153 |
219 |
boolean isXml = false; |
154 |
219 |
boolean lineswereskipped = false; |
155 |
219 |
boolean isBinary = false; |
156 |
|
|
157 |
|
|
158 |
219 |
try |
159 |
|
{ |
160 |
219 |
if (!closeSource) |
161 |
|
{ |
162 |
0 |
source.mark(); |
163 |
|
} |
164 |
219 |
boolean aaIndexHeaderRead = false; |
165 |
|
|
166 |
? |
while ((data = source.nextLine()) != null) |
167 |
|
{ |
168 |
309 |
bytesRead += data.length(); |
169 |
309 |
trimmedLength += data.trim().length(); |
170 |
309 |
if (!lineswereskipped) |
171 |
|
{ |
172 |
59409 |
for (int i = 0; !isBinary && i < data.length(); i++) |
173 |
|
{ |
174 |
59190 |
char c = data.charAt(i); |
175 |
59190 |
isBinary = (c < 32 && c != '\t' && c != '\n' && c != '\r' |
176 |
|
&& c != 5 && c != 27); |
177 |
|
|
178 |
|
|
179 |
|
} |
180 |
|
} |
181 |
309 |
if (isBinary) |
182 |
|
{ |
183 |
|
|
184 |
|
|
185 |
2 |
if (source.inFile != null) |
186 |
|
{ |
187 |
2 |
String fileStr = source.inFile.getName(); |
188 |
2 |
if (fileStr.contains(".jar") || fileStr.contains(".zip") |
189 |
|
|| fileStr.contains(".jvp")) |
190 |
|
{ |
191 |
|
|
192 |
2 |
reply = FileFormat.Jalview; |
193 |
|
} |
194 |
|
} |
195 |
2 |
if (!lineswereskipped && data.startsWith("PK")) |
196 |
|
{ |
197 |
2 |
reply = FileFormat.Jalview; |
198 |
2 |
break; |
199 |
|
} |
200 |
|
} |
201 |
307 |
data = data.toUpperCase(Locale.ROOT); |
202 |
|
|
203 |
307 |
if (data.startsWith(ScoreMatrixFile.SCOREMATRIX)) |
204 |
|
{ |
205 |
1 |
reply = FileFormat.ScoreMatrix; |
206 |
1 |
break; |
207 |
|
} |
208 |
306 |
if (data.startsWith(XMLHEADER) && !lineswereskipped) |
209 |
|
{ |
210 |
2 |
isXml = true; |
211 |
|
} |
212 |
306 |
if (data.startsWith("LOCUS")) |
213 |
|
{ |
214 |
1 |
reply = FileFormat.GenBank; |
215 |
1 |
break; |
216 |
|
} |
217 |
305 |
if (data.startsWith("ID ")) |
218 |
|
{ |
219 |
1 |
if (data.substring(2).trim().split(";").length == 7) |
220 |
|
{ |
221 |
1 |
reply = FileFormat.Embl; |
222 |
1 |
break; |
223 |
|
} |
224 |
|
} |
225 |
304 |
if (data.startsWith("H ") && !aaIndexHeaderRead) |
226 |
|
{ |
227 |
1 |
aaIndexHeaderRead = true; |
228 |
|
} |
229 |
304 |
if (data.startsWith("D ") && aaIndexHeaderRead) |
230 |
|
{ |
231 |
1 |
reply = FileFormat.ScoreMatrix; |
232 |
1 |
break; |
233 |
|
} |
234 |
303 |
if (data.startsWith("##GFF-VERSION")) |
235 |
|
{ |
236 |
|
|
237 |
1 |
reply = FileFormat.Features; |
238 |
1 |
break; |
239 |
|
} |
240 |
302 |
if (looksLikeFeatureData(data)) |
241 |
|
{ |
242 |
4 |
reply = FileFormat.Features; |
243 |
4 |
break; |
244 |
|
} |
245 |
298 |
if (data.indexOf("# STOCKHOLM") > -1) |
246 |
|
{ |
247 |
7 |
reply = FileFormat.Stockholm; |
248 |
7 |
break; |
249 |
|
} |
250 |
291 |
if (data.indexOf("_ENTRY.ID") > -1 |
251 |
|
|| data.indexOf("_AUDIT_AUTHOR.NAME") > -1 |
252 |
|
|| data.indexOf("_ATOM_SITE.") > -1) |
253 |
|
{ |
254 |
1 |
reply = FileFormat.MMCif; |
255 |
1 |
break; |
256 |
|
} |
257 |
|
|
258 |
290 |
if (data.startsWith(">")) |
259 |
|
{ |
260 |
|
|
261 |
174 |
boolean checkPIR = false, starterm = false; |
262 |
174 |
if ((data.indexOf(">P1;") > -1) || (data.indexOf(">DL;") > -1)) |
263 |
|
{ |
264 |
|
|
265 |
1 |
checkPIR = true; |
266 |
1 |
reply = FileFormat.PIR; |
267 |
|
} |
268 |
|
|
269 |
174 |
data = source.nextLine(); |
270 |
|
|
271 |
174 |
if (data.indexOf(">") > -1) |
272 |
|
{ |
273 |
1 |
reply = FileFormat.BLC; |
274 |
|
} |
275 |
|
else |
276 |
|
{ |
277 |
|
|
278 |
173 |
String data1 = source.nextLine(); |
279 |
173 |
String data2 = source.nextLine(); |
280 |
173 |
int c1; |
281 |
173 |
if (checkPIR) |
282 |
|
{ |
283 |
1 |
starterm = (data1 != null && data1.indexOf("*") > -1) |
284 |
|
|| (data2 != null && data2.indexOf("*") > -1); |
285 |
|
} |
286 |
? |
if (data2 != null && (c1 = data.indexOf("*")) > -1) |
287 |
|
{ |
288 |
0 |
if (c1 == 0 && c1 == data2.indexOf("*")) |
289 |
|
{ |
290 |
0 |
reply = FileFormat.BLC; |
291 |
|
} |
292 |
|
else |
293 |
|
{ |
294 |
0 |
reply = FileFormat.Fasta; |
295 |
|
|
296 |
|
|
297 |
|
} |
298 |
|
|
299 |
|
} |
300 |
|
else |
301 |
|
{ |
302 |
173 |
reply = FileFormat.Fasta; |
303 |
|
|
304 |
|
|
305 |
|
|
306 |
173 |
if (!checkPIR) |
307 |
|
{ |
308 |
172 |
break; |
309 |
|
} |
310 |
|
} |
311 |
|
} |
312 |
|
|
313 |
|
|
314 |
|
|
315 |
|
|
316 |
|
|
317 |
|
|
318 |
2 |
if (checkPIR) |
319 |
|
{ |
320 |
1 |
String dta = null; |
321 |
1 |
if (!starterm) |
322 |
|
{ |
323 |
1 |
do |
324 |
|
{ |
325 |
1 |
try |
326 |
|
{ |
327 |
1 |
dta = source.nextLine(); |
328 |
|
} catch (IOException ex) |
329 |
|
{ |
330 |
|
} |
331 |
1 |
if (dta != null && dta.indexOf("*") > -1) |
332 |
|
{ |
333 |
1 |
starterm = true; |
334 |
|
} |
335 |
1 |
} while (dta != null && !starterm); |
336 |
|
} |
337 |
1 |
if (starterm) |
338 |
|
{ |
339 |
1 |
reply = FileFormat.PIR; |
340 |
1 |
break; |
341 |
|
} |
342 |
|
else |
343 |
|
{ |
344 |
0 |
reply = FileFormat.Fasta; |
345 |
|
} |
346 |
|
} |
347 |
|
|
348 |
1 |
break; |
349 |
|
} |
350 |
116 |
if (data.indexOf("{\"") > -1) |
351 |
|
{ |
352 |
1 |
reply = FileFormat.Json; |
353 |
1 |
break; |
354 |
|
} |
355 |
115 |
int lessThan = data.indexOf("<"); |
356 |
115 |
if ((lessThan > -1)) |
357 |
|
|
358 |
|
{ |
359 |
13 |
String upper = data.toUpperCase(Locale.ROOT); |
360 |
13 |
if (upper.substring(lessThan).startsWith("<HTML")) |
361 |
|
{ |
362 |
1 |
reply = FileFormat.Html; |
363 |
1 |
break; |
364 |
|
} |
365 |
12 |
if (upper.substring(lessThan).startsWith("<RNAML")) |
366 |
|
{ |
367 |
2 |
reply = FileFormat.Rnaml; |
368 |
2 |
break; |
369 |
|
} |
370 |
10 |
if (isXml && data.contains( |
371 |
|
"<NS2:JALVIEWUSERCOLOURS SCHEMENAME=\"SEQUENCE FEATURES\" XMLNS:NS2=\"WWW.JALVIEW.ORG/COLOURS\">")) |
372 |
|
{ |
373 |
2 |
reply = FileFormat.FeatureSettings; |
374 |
2 |
break; |
375 |
|
} |
376 |
|
} |
377 |
|
|
378 |
110 |
if ((data.length() < 1) || (data.indexOf("#") == 0)) |
379 |
|
{ |
380 |
9 |
lineswereskipped = true; |
381 |
9 |
continue; |
382 |
|
} |
383 |
|
|
384 |
101 |
if (data.indexOf("PILEUP") > -1) |
385 |
|
{ |
386 |
1 |
reply = FileFormat.Pileup; |
387 |
|
|
388 |
1 |
break; |
389 |
|
} |
390 |
|
|
391 |
100 |
if ((data.indexOf("//") == 0) || ((data.indexOf("!!") > -1) && (data |
392 |
|
.indexOf("!!") < data.indexOf("_MULTIPLE_ALIGNMENT ")))) |
393 |
|
{ |
394 |
1 |
reply = FileFormat.MSF; |
395 |
|
|
396 |
1 |
break; |
397 |
|
} |
398 |
99 |
else if (data.indexOf("CLUSTAL") > -1) |
399 |
|
{ |
400 |
1 |
reply = FileFormat.Clustal; |
401 |
|
|
402 |
1 |
break; |
403 |
|
} |
404 |
|
|
405 |
98 |
else if (data.indexOf("HEADER") == 0 || data.indexOf("ATOM") == 0) |
406 |
|
{ |
407 |
12 |
reply = FileFormat.PDB; |
408 |
12 |
break; |
409 |
|
} |
410 |
86 |
else if (data.matches("\\s*\\d+\\s+\\d+\\s*")) |
411 |
|
{ |
412 |
1 |
reply = FileFormat.Phylip; |
413 |
1 |
break; |
414 |
|
} |
415 |
|
else |
416 |
|
{ |
417 |
85 |
if (!lineswereskipped && looksLikeJnetData(data)) |
418 |
|
{ |
419 |
0 |
reply = FileFormat.Jnet; |
420 |
0 |
break; |
421 |
|
} |
422 |
|
} |
423 |
|
|
424 |
85 |
lineswereskipped = true; |
425 |
|
|
426 |
|
} |
427 |
219 |
if (closeSource) |
428 |
|
{ |
429 |
219 |
source.close(); |
430 |
|
} |
431 |
|
else |
432 |
|
{ |
433 |
0 |
source.reset(bytesRead); |
434 |
|
} |
435 |
|
} catch (Exception ex) |
436 |
|
{ |
437 |
0 |
Console.error("File Identification failed!\n" + ex); |
438 |
0 |
throw new FileFormatException(source.errormessage); |
439 |
|
} |
440 |
219 |
if (trimmedLength == 0) |
441 |
|
{ |
442 |
0 |
Console.error("File Identification failed! - Empty file was read."); |
443 |
0 |
throw new FileFormatException("EMPTY DATA FILE"); |
444 |
|
} |
445 |
219 |
Console.debug("File format identified as " + reply.toString()); |
446 |
219 |
return reply; |
447 |
|
} |
448 |
|
|
449 |
|
|
450 |
|
|
451 |
|
|
452 |
|
@param |
453 |
|
@return |
454 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (5) |
Complexity: 1 |
Complexity Density: 0.2 |
|
455 |
11 |
protected boolean looksLikeJnetData(String data)... |
456 |
|
{ |
457 |
11 |
char firstChar = data.charAt(0); |
458 |
11 |
int colonPos = data.indexOf(":"); |
459 |
11 |
int commaPos = data.indexOf(","); |
460 |
11 |
boolean isJnet = firstChar != '*' && firstChar != ' ' && colonPos > -1 |
461 |
|
&& commaPos > -1 && colonPos < commaPos; |
462 |
|
|
463 |
11 |
return isJnet; |
464 |
|
} |
465 |
|
|
466 |
|
|
467 |
|
|
468 |
|
|
469 |
|
|
470 |
|
@param |
471 |
|
@return |
472 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (16) |
Complexity: 5 |
Complexity Density: 0.5 |
|
473 |
309 |
protected boolean looksLikeFeatureData(String data)... |
474 |
|
{ |
475 |
309 |
if (data == null) |
476 |
|
{ |
477 |
1 |
return false; |
478 |
|
} |
479 |
308 |
String[] columns = data.split("\t"); |
480 |
308 |
if (columns.length < 6) |
481 |
|
{ |
482 |
300 |
return false; |
483 |
|
} |
484 |
21 |
for (int col = 3; col < 5; col++) |
485 |
|
{ |
486 |
15 |
try |
487 |
|
{ |
488 |
15 |
Integer.parseInt(columns[col]); |
489 |
|
} catch (NumberFormatException e) |
490 |
|
{ |
491 |
2 |
return false; |
492 |
|
} |
493 |
|
} |
494 |
6 |
return true; |
495 |
|
} |
496 |
|
|
497 |
|
|
498 |
|
|
499 |
|
@param |
500 |
|
|
501 |
|
|
|
|
| 0% |
Uncovered Elements: 14 (14) |
Complexity: 7 |
Complexity Density: 0.7 |
|
502 |
0 |
public static void main(String[] args)... |
503 |
|
{ |
504 |
0 |
for (int i = 0; args != null && i < args.length; i++) |
505 |
|
{ |
506 |
0 |
IdentifyFile ider = new IdentifyFile(); |
507 |
0 |
FileFormatI type = null; |
508 |
0 |
try |
509 |
|
{ |
510 |
0 |
type = ider.identify(args[i], DataSourceType.FILE); |
511 |
|
} catch (FileNotFoundException e) |
512 |
|
{ |
513 |
0 |
Console.error(String.format("Error '%s' fetching file %s", args[i], |
514 |
|
e.getMessage())); |
515 |
|
} catch (FileFormatException e) |
516 |
|
{ |
517 |
0 |
Console.error( |
518 |
|
String.format("Error '%s' identifying file type for %s", |
519 |
|
args[i], e.getMessage())); |
520 |
|
} |
521 |
0 |
Console.debug("Type of " + args[i] + " is " + type); |
522 |
|
} |
523 |
0 |
if (args == null || args.length == 0) |
524 |
|
{ |
525 |
0 |
Console.error("Usage: <Filename> [<Filename> ...]"); |
526 |
|
} |
527 |
|
} |
528 |
|
|
529 |
|
} |