Clover icon

Coverage Report

  1. Project Clover database Tue Mar 10 2026 14:58:44 GMT
  2. Package jalview.io

File IdentifyFile.java

 

Coverage histogram

../../img/srcFileCovDistChart0.png
0% of files have more coverage

Code metrics

122
213
12
1
578
452
109
0.51
17.75
12
9.08

Classes

Class Line # Actions
IdentifyFile 36 213 109
0.00%
 

Contributing tests

No tests hitting this source file were found.

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.io;
22   
23    import java.io.File;
24    import java.io.FileNotFoundException;
25    import java.io.IOException;
26    import java.util.Locale;
27   
28    import jalview.bin.Console;
29   
30    /**
31    * DOCUMENT ME!
32    *
33    * @author $author$
34    * @version $Revision$
35    */
 
36    public class IdentifyFile
37    {
38   
39    private static final String XMLHEADER = "<?XML VERSION=\"1.0\" ENCODING=\"UTF-8\" STANDALONE=\"YES\"?>";
40   
 
41  0 toggle public FileFormatI identify(Object file, DataSourceType protocol)
42    throws FileFormatException, FileNotFoundException
43    {
44  0 return identify(file, protocol, false);
45    }
46   
 
47  0 toggle public FileFormatI identify(Object file, DataSourceType protocol,
48    boolean checkForAnnotations)
49    throws FileFormatException, FileNotFoundException
50    {
51    // BH 2018
52  0 return (file instanceof File
53    ? identify((File) file, protocol, checkForAnnotations)
54    : identify((String) file, protocol, checkForAnnotations));
55   
56    }
57   
 
58  0 toggle public FileFormatI identify(File file, DataSourceType sourceType)
59    throws FileFormatException
60    {
61  0 return identify(file, sourceType, false);
62    }
63   
 
64  0 toggle public FileFormatI identify(File file, DataSourceType sourceType,
65    boolean checkForAnnotations) throws FileFormatException
66    {
67    // BH 2018
68  0 String emessage = "UNIDENTIFIED FILE PARSING ERROR";
69  0 FileParse parser = null;
70  0 try
71    {
72   
73  0 parser = new FileParse(file, sourceType);
74  0 if (parser.isValid())
75    {
76  0 FileFormatI format = identify(parser, true, checkForAnnotations);
77  0 if (file.getName()!=null && file.getName().toLowerCase(Locale.ROOT).endsWith(".a3m"))
78    {
79  0 Console.debug("Identified an A3m file! ("+file.getName()+")");
80  0 format = FileFormat.A3M;
81    }
82  0 return format;
83    }
84    } catch (Exception e)
85    {
86  0 Console.error("Error whilst identifying " + file, e);
87  0 emessage = e.getMessage();
88    }
89  0 if (parser != null)
90    {
91  0 throw new FileFormatException(parser.errormessage);
92    }
93  0 throw new FileFormatException(emessage);
94    }
95   
 
96  0 toggle public FileFormatI identify(String file, DataSourceType sourceType)
97    throws FileFormatException, FileNotFoundException
98    {
99  0 return identify(file, sourceType, false);
100    }
101   
102    /**
103    * Identify a datasource's file content.
104    *
105    * @note Do not use this method for stream sources - create a FileParse object
106    * instead.
107    *
108    * @param file
109    * @param sourceType
110    * @return
111    * @throws FileFormatException
112    */
 
113  0 toggle public FileFormatI identify(String file, DataSourceType sourceType,
114    boolean checkForAnnotations)
115    throws FileFormatException, FileNotFoundException
116    {
117  0 String emessage = "UNIDENTIFIED FILE PARSING ERROR";
118  0 FileParse parser = null;
119  0 FileNotFoundException fnf = null;
120  0 try
121    {
122  0 parser = new FileParse(file, sourceType);
123  0 if (parser.isValid())
124    {
125  0 FileFormatI format = identify(parser);
126  0 if (DataSourceType.FILE.equals(sourceType) && file!=null && file.toLowerCase(Locale.ROOT).endsWith(".a3m"))
127    {
128  0 Console.debug("Identified an A3m file! ("+file+")");
129  0 format = FileFormat.A3M;
130    }
131  0 return format;
132   
133    }
134    } catch (FileNotFoundException e)
135    {
136  0 fnf = e;
137  0 emessage = "Could not find '" + file + "'";
138  0 Console.error("Could not find '" + file + "'", e);
139    } catch (IOException e)
140    {
141  0 Console.error("Error whilst trying to read " + file, e);
142    } catch (Exception e)
143    {
144  0 Console.error("Error whilst identifying " + file, e);
145  0 emessage = e.getMessage();
146    }
147  0 if (parser != null)
148    {
149  0 throw new FileFormatException(parser.errormessage);
150    }
151  0 if (fnf != null)
152    {
153  0 throw (fnf);
154    }
155  0 throw new FileFormatException(emessage);
156    }
157   
 
158  0 toggle public FileFormatI identify(FileParse source) throws FileFormatException
159    {
160  0 return identify(source, true, false);
161    // preserves original behaviour prior to version 2.3
162    }
163   
 
164  0 toggle public FileFormatI identify(AlignmentFileReaderI file,
165    boolean closeSource) throws IOException
166    {
167  0 FileParse fp = new FileParse(file.getInFile(),
168    file.getDataSourceType());
169  0 return identify(fp, closeSource, false);
170    }
171   
172    /**
173    * Identify contents of source, closing it or resetting source to start
174    * afterwards.
175    *
176    * @param source
177    * @param closeSource
178    * @return (best guess at) file format
179    * @throws FileFormatException
180    */
 
181  0 toggle public FileFormatI identify(FileParse source, boolean closeSource,
182    boolean checkForAnnotations) throws FileFormatException
183    {
184  0 FileFormatI reply = FileFormat.Pfam;
185  0 String data;
186  0 int bytesRead = 0;
187  0 int trimmedLength = 0;
188  0 boolean isXml = false; // set true if first line is XMLHEADER
189  0 boolean lineswereskipped = false;
190  0 boolean isBinary = false; // true if length is non-zero and non-printable
191    // characters are encountered
192   
193  0 try
194    {
195  0 if (!closeSource)
196    {
197  0 source.mark();
198    }
199  0 boolean aaIndexHeaderRead = false;
200   
201  0 while ((data = source.nextLine()) != null)
202    {
203  0 bytesRead += data.length();
204  0 trimmedLength += data.trim().length();
205  0 if (!lineswereskipped)
206    {
207  0 for (int i = 0; !isBinary && i < data.length(); i++)
208    {
209  0 char c = data.charAt(i);
210  0 isBinary = (c < 32 && c != '\t' && c != '\n' && c != '\r'
211    && c != 5 && c != 27); // nominal binary character filter
212    // excluding CR, LF, tab,DEL and ^E
213    // for certain blast ids
214    }
215    }
216  0 if (isBinary)
217    {
218    // jar files are special - since they contain all sorts of random
219    // characters.
220  0 if (source.inFile != null)
221    {
222  0 String fileStr = source.inFile.getName();
223  0 if (fileStr.contains(".jar") || fileStr.contains(".zip")
224    || fileStr.contains(".jvp"))
225    {
226    // possibly a Jalview archive (but check further)
227  0 reply = FileFormat.Jalview;
228    }
229    }
230  0 if (!lineswereskipped && data.startsWith("PK"))
231    {
232  0 reply = FileFormat.Jalview; // archive
233  0 break;
234    }
235    }
236  0 data = data.toUpperCase(Locale.ROOT);
237   
238  0 if (data.startsWith(ScoreMatrixFile.SCOREMATRIX))
239    {
240  0 reply = FileFormat.ScoreMatrix;
241  0 break;
242    }
243  0 if (data.startsWith(XMLHEADER) && !lineswereskipped)
244    {
245  0 isXml = true;
246    }
247  0 if (data.startsWith("HMMER3"))
248    {
249  0 reply = FileFormat.HMMER3;
250  0 break;
251    }
252  0 if (data.startsWith("LOCUS"))
253    {
254  0 reply = FileFormat.GenBank;
255  0 break;
256    }
257  0 if (data.startsWith("ID "))
258    {
259  0 if (data.substring(2).trim().split(";").length == 7)
260    {
261  0 reply = FileFormat.Embl;
262  0 break;
263    }
264    }
265  0 if (data.startsWith("H ") && !aaIndexHeaderRead)
266    {
267  0 aaIndexHeaderRead = true;
268    }
269  0 if (data.startsWith("D ") && aaIndexHeaderRead)
270    {
271  0 reply = FileFormat.ScoreMatrix;
272  0 break;
273    }
274  0 if (data.startsWith("##GFF-VERSION"))
275    {
276    // GFF - possibly embedded in a Jalview features file!
277  0 reply = FileFormat.Features;
278  0 break;
279    }
280  0 if (looksLikeFeatureData(data))
281    {
282  0 reply = FileFormat.Features;
283  0 break;
284    }
285  0 if (data.indexOf("# STOCKHOLM") > -1)
286    {
287  0 reply = FileFormat.Stockholm;
288  0 break;
289    }
290  0 if (data.indexOf("_ENTRY.ID") > -1
291    || data.indexOf("_AUDIT_AUTHOR.NAME") > -1
292    || data.indexOf("_ATOM_SITE.") > -1)
293    {
294  0 reply = FileFormat.MMCif;
295  0 break;
296    }
297    // if (data.indexOf(">") > -1)
298  0 if (data.startsWith(">"))
299    {
300    // FASTA, PIR file or BLC file
301  0 boolean checkPIR = false, starterm = false;
302  0 if ((data.indexOf(">P1;") > -1) || (data.indexOf(">DL;") > -1))
303    {
304    // watch for PIR file attributes
305  0 checkPIR = true;
306  0 reply = FileFormat.PIR;
307    }
308    // could also be BLC file, read next line to confirm
309  0 data = source.nextLine();
310   
311  0 if (data.indexOf(">") > -1)
312    {
313  0 reply = FileFormat.BLC;
314    }
315    else
316    {
317    // Is this a single line BLC file?
318  0 String data1 = source.nextLine();
319  0 String data2 = source.nextLine();
320  0 int c1;
321  0 if (checkPIR)
322    {
323  0 starterm = (data1 != null && data1.indexOf("*") > -1)
324    || (data2 != null && data2.indexOf("*") > -1);
325    }
326  0 if (data2 != null && (c1 = data.indexOf("*")) > -1)
327    {
328  0 if (c1 == 0 && c1 == data2.indexOf("*"))
329    {
330  0 reply = FileFormat.BLC;
331    }
332    else
333    {
334  0 reply = FileFormat.Fasta; // possibly a bad choice - may be
335    // recognised as PIR
336    }
337    // otherwise can still possibly be a PIR file
338    }
339    else
340    {
341  0 reply = FileFormat.Fasta;
342    // TODO : AMSA File is indicated if there is annotation in the
343    // FASTA file - but FASTA will automatically generate this at the
344    // mo.
345  0 if (!checkPIR)
346    {
347  0 break;
348    }
349    }
350    }
351    // final check for PIR content. require
352    // >P1;title\n<blah>\nterminated sequence to occur at least once.
353   
354    // TODO the PIR/fasta ambiguity may be the use case that is needed to
355    // have
356    // a 'Parse as type XXX' parameter for the applet/application.
357  0 if (checkPIR)
358    {
359  0 String dta = null;
360  0 if (!starterm)
361    {
362  0 do
363    {
364  0 try
365    {
366  0 dta = source.nextLine();
367    } catch (IOException ex)
368    {
369    }
370  0 if (dta != null && dta.indexOf("*") > -1)
371    {
372  0 starterm = true;
373    }
374  0 } while (dta != null && !starterm);
375    }
376  0 if (starterm)
377    {
378  0 reply = FileFormat.PIR;
379  0 break;
380    }
381    else
382    {
383  0 reply = FileFormat.Fasta; // probably a bad choice!
384    }
385    }
386    // read as a FASTA (probably)
387  0 break;
388    }
389  0 if (data.indexOf("{\"") > -1)
390    {
391  0 reply = FileFormat.Json;
392  0 break;
393    }
394  0 int lessThan = data.indexOf("<");
395  0 if ((lessThan > -1)) // possible Markup Language data i.e HTML, RNAML,
396    // XML
397    {
398  0 String upper = data.toUpperCase(Locale.ROOT);
399  0 if (upper.substring(lessThan).startsWith("<HTML"))
400    {
401  0 reply = FileFormat.Html;
402  0 break;
403    }
404  0 if (upper.substring(lessThan).startsWith("<RNAML"))
405    {
406  0 reply = FileFormat.Rnaml;
407  0 break;
408    }
409  0 if (isXml && data.contains(
410    "<NS2:JALVIEWUSERCOLOURS SCHEMENAME=\"SEQUENCE FEATURES\" XMLNS:NS2=\"WWW.JALVIEW.ORG/COLOURS\">"))
411    {
412  0 reply = FileFormat.FeatureSettings;
413  0 break;
414    }
415  0 if (upper.substring(lessThan).startsWith("<BSML"))
416    {
417  0 reply = FileFormat.BSML;
418  0 break;
419    }
420    }
421   
422  0 if ((data.length() < 1) || (data.indexOf("#") == 0))
423    {
424  0 lineswereskipped = true;
425  0 continue;
426    }
427   
428  0 if (data.indexOf("PILEUP") > -1)
429    {
430  0 reply = FileFormat.Pileup;
431   
432  0 break;
433    }
434   
435  0 if ((data.indexOf("//") == 0) || ((data.indexOf("!!") > -1) && (data
436    .indexOf("!!") < data.indexOf("_MULTIPLE_ALIGNMENT "))))
437    {
438  0 reply = FileFormat.MSF;
439   
440  0 break;
441    }
442  0 else if (data.indexOf("CLUSTAL") > -1)
443    {
444  0 reply = FileFormat.Clustal;
445   
446  0 break;
447    }
448   
449  0 else if (data.indexOf("HEADER") == 0 || data.indexOf("ATOM") == 0)
450    {
451  0 reply = FileFormat.PDB;
452  0 break;
453    }
454  0 else if (data.matches("\\s*\\d+\\s+\\d+\\s*"))
455    {
456  0 reply = FileFormat.Phylip;
457  0 break;
458    }
459  0 else if (checkForAnnotations && !lineswereskipped
460    && data.equals(AnnotationFile.JALVIEW_ANNOTATION))
461    {
462  0 reply = FileFormat.JalviewAnnotation;
463    }
464    else
465    {
466  0 if (!lineswereskipped && looksLikeJnetData(data))
467    {
468  0 reply = FileFormat.Jnet;
469  0 break;
470    }
471    }
472   
473  0 lineswereskipped = true; // this means there was some junk before any
474    // key file signature
475    }
476  0 if (closeSource)
477    {
478  0 source.close();
479    }
480    else
481    {
482  0 source.reset(bytesRead); // so the file can be parsed from the mark
483    }
484    } catch (Exception ex)
485    {
486  0 Console.error("File Identification failed!\n" + ex);
487  0 throw new FileFormatException(source.errormessage);
488    }
489  0 if (trimmedLength == 0)
490    {
491  0 Console.error("File Identification failed! - Empty file was read.");
492  0 throw new FileFormatException("EMPTY DATA FILE");
493    }
494  0 Console.debug("File format identified as " + reply.toString());
495  0 return reply;
496    }
497   
498    /**
499    * Returns true if the data appears to be Jnet concise annotation format
500    *
501    * @param data
502    * @return
503    */
 
504  0 toggle protected boolean looksLikeJnetData(String data)
505    {
506  0 char firstChar = data.charAt(0);
507  0 int colonPos = data.indexOf(":");
508  0 int commaPos = data.indexOf(",");
509  0 boolean isJnet = firstChar != '*' && firstChar != ' ' && colonPos > -1
510    && commaPos > -1 && colonPos < commaPos;
511    // && data.indexOf(",")<data.indexOf(",", data.indexOf(","))) / ??
512  0 return isJnet;
513    }
514   
515    /**
516    * Returns true if the data has at least 6 tab-delimited fields _and_ fields 4
517    * and 5 are integer (start/end)
518    *
519    * @param data
520    * @return
521    */
 
522  0 toggle protected boolean looksLikeFeatureData(String data)
523    {
524  0 if (data == null)
525    {
526  0 return false;
527    }
528  0 String[] columns = data.split("\t");
529  0 if (columns.length < 6)
530    {
531  0 return false;
532    }
533  0 for (int col = 3; col < 5; col++)
534    {
535  0 try
536    {
537  0 Integer.parseInt(columns[col]);
538    } catch (NumberFormatException e)
539    {
540  0 return false;
541    }
542    }
543  0 return true;
544    }
545   
546    /**
547    *
548    * @param args
549    * @j2sIgnore
550    */
 
551  0 toggle public static void main(String[] args)
552    {
553  0 for (int i = 0; args != null && i < args.length; i++)
554    {
555  0 IdentifyFile ider = new IdentifyFile();
556  0 FileFormatI type = null;
557  0 try
558    {
559  0 type = ider.identify(args[i], DataSourceType.FILE);
560    } catch (FileNotFoundException e)
561    {
562  0 Console.error(String.format("Error '%s' fetching file %s", args[i],
563    e.getMessage()));
564    } catch (FileFormatException e)
565    {
566  0 Console.error(
567    String.format("Error '%s' identifying file type for %s",
568    args[i], e.getMessage()));
569    }
570  0 Console.info("Type of " + args[i] + " is " + type);
571    }
572  0 if (args == null || args.length == 0)
573    {
574  0 Console.error("Usage: <Filename> [<Filename> ...]");
575    }
576    }
577   
578    }