Clover icon

Coverage Report

  1. Project Clover database Thu Dec 4 2025 16:11:35 GMT
  2. Package jalview.io

File HMMFile.java

 

Coverage histogram

../../img/srcFileCovDistChart10.png
0% of files have more coverage

Code metrics

64
199
27
1
716
436
64
0.32
7.37
27
2.37

Classes

Class Line # Actions
HMMFile 23 199 64
0.9344827593.4%
 

Contributing tests

This file is covered by 14 tests. .

Source view

1    package jalview.io;
2   
3    import jalview.api.AlignExportSettingsI;
4    import jalview.api.AlignmentViewPanel;
5    import jalview.datamodel.HMMNode;
6    import jalview.datamodel.HiddenMarkovModel;
7    import jalview.datamodel.SequenceI;
8   
9    import java.io.BufferedReader;
10    import java.io.IOException;
11    import java.util.ArrayList;
12    import java.util.List;
13    import java.util.Scanner;
14   
15   
16    /**
17    * Adds capability to read in and write out HMMER3 files. .
18    *
19    *
20    * @author TZVanaalten
21    *
22    */
 
23    public class HMMFile extends AlignFile
24    implements AlignmentFileReaderI, AlignmentFileWriterI
25    {
26    private static final String TERMINATOR = "//";
27   
28    /*
29    * keys to data in HMM file, used to store as properties of the HiddenMarkovModel
30    */
31    public static final String HMM = "HMM";
32   
33    public static final String NAME = "NAME";
34   
35    public static final String ACCESSION_NUMBER = "ACC";
36   
37    public static final String DESCRIPTION = "DESC";
38   
39    public static final String LENGTH = "LENG";
40   
41    public static final String MAX_LENGTH = "MAXL";
42   
43    public static final String ALPHABET = "ALPH";
44   
45    public static final String DATE = "DATE";
46   
47    public static final String COMMAND_LOG = "COM";
48   
49    public static final String NUMBER_OF_SEQUENCES = "NSEQ";
50   
51    public static final String EFF_NUMBER_OF_SEQUENCES = "EFFN";
52   
53    public static final String CHECK_SUM = "CKSUM";
54   
55    public static final String STATISTICS = "STATS";
56   
57    public static final String COMPO = "COMPO";
58   
59    public static final String GATHERING_THRESHOLD = "GA";
60   
61    public static final String TRUSTED_CUTOFF = "TC";
62   
63    public static final String NOISE_CUTOFF = "NC";
64   
65    public static final String VITERBI = "VITERBI";
66   
67    public static final String MSV = "MSV";
68   
69    public static final String FORWARD = "FORWARD";
70   
71    public static final String MAP = "MAP";
72   
73    public static final String REFERENCE_ANNOTATION = "RF";
74   
75    public static final String CONSENSUS_RESIDUE = "CONS";
76   
77    public static final String CONSENSUS_STRUCTURE = "CS";
78   
79    public static final String MASKED_VALUE = "MM";
80   
81    private static final String ALPH_AMINO = "amino";
82   
83    private static final String ALPH_DNA = "DNA";
84   
85    private static final String ALPH_RNA = "RNA";
86   
87    private static final String ALPHABET_AMINO = "ACDEFGHIKLMNPQRSTVWY";
88   
89    private static final String ALPHABET_DNA = "ACGT";
90   
91    private static final String ALPHABET_RNA = "ACGU";
92   
93    private static final int NUMBER_OF_TRANSITIONS = 7;
94   
95    private static final String SPACE = " ";
96   
97    /*
98    * optional guide line added to an output HMMER file, purely for readability
99    */
100    private static final String TRANSITIONTYPELINE = " m->m m->i m->d i->m i->i d->m d->d";
101   
102    private static String NL = System.lineSeparator();
103   
104    private HiddenMarkovModel hmm;
105   
106    // number of symbols in the alphabet used in the hidden Markov model
107    private int numberOfSymbols;
108   
109    /**
110    * Constructor that parses immediately
111    *
112    * @param inFile
113    * @param type
114    * @throws IOException
115    */
 
116  7 toggle public HMMFile(String inFile, DataSourceType type) throws IOException
117    {
118  7 super(inFile, type);
119    }
120   
121    /**
122    * Constructor that parses immediately
123    *
124    * @param source
125    * @throws IOException
126    */
 
127  8 toggle public HMMFile(FileParse source) throws IOException
128    {
129  8 super(source);
130    }
131   
132    /**
133    * Default constructor
134    */
 
135  4 toggle public HMMFile()
136    {
137    }
138   
139    /**
140    * Constructor for HMMFile used for exporting
141    *
142    * @param hmm
143    */
 
144  2 toggle public HMMFile(HiddenMarkovModel markov)
145    {
146  2 hmm = markov;
147    }
148   
149    /**
150    * Returns the HMM produced by parsing a HMMER3 file
151    *
152    * @return
153    */
 
154  10 toggle public HiddenMarkovModel getHMM()
155    {
156  10 return hmm;
157    }
158   
159    /**
160    * Gets the name of the hidden Markov model
161    *
162    * @return
163    */
 
164  0 toggle public String getName()
165    {
166  0 return hmm.getName();
167    }
168   
169    /**
170    * Reads the data from HMM file into the HMM model
171    */
 
172  15 toggle @Override
173    public void parse()
174    {
175  15 try
176    {
177  15 hmm = new HiddenMarkovModel();
178  15 parseHeaderLines(dataIn);
179  15 parseModel(dataIn);
180    } catch (Exception e)
181    {
182  0 e.printStackTrace();
183    }
184    }
185   
186    /**
187    * Reads the header properties from a HMMER3 file and saves them in the
188    * HiddeMarkovModel. This method exits after reading the next line after the
189    * HMM line.
190    *
191    * @param input
192    * @throws IOException
193    */
 
194  17 toggle void parseHeaderLines(BufferedReader input) throws IOException
195    {
196  17 boolean readingHeaders = true;
197  17 hmm.setFileHeader(input.readLine());
198  17 String line = input.readLine();
199  327 while (readingHeaders && line != null)
200    {
201  310 Scanner parser = new Scanner(line);
202  310 String next = parser.next();
203  310 if (ALPHABET.equals(next))
204    {
205  16 String alphabetType = parser.next();
206  16 hmm.setProperty(ALPHABET, alphabetType);
207  16 String alphabet = ALPH_DNA.equalsIgnoreCase(alphabetType)
208    ? ALPHABET_DNA
209  13 : (ALPH_RNA.equalsIgnoreCase(alphabetType) ? ALPHABET_RNA
210    : ALPHABET_AMINO);
211  16 numberOfSymbols = hmm.setAlphabet(alphabet);
212    }
213  294 else if (HMM.equals(next))
214    {
215  17 readingHeaders = false;
216  17 String symbols = line.substring(line.indexOf(HMM) + HMM.length());
217  17 numberOfSymbols = hmm.setAlphabet(symbols);
218    }
219  277 else if (STATISTICS.equals(next))
220    {
221  48 parser.next();
222  48 String key;
223  48 String value;
224  48 key = parser.next();
225  48 value = parser.next() + SPACE + SPACE + parser.next();
226  48 hmm.setProperty(key, value);
227    }
228    else
229    {
230  229 String key = next;
231  229 String value = parser.next();
232  368 while (parser.hasNext())
233    {
234  139 value = value + SPACE + parser.next();
235    }
236  229 hmm.setProperty(key, value);
237    }
238  310 parser.close();
239  310 line = input.readLine();
240    }
241    }
242   
243    /**
244    * Parses the model data from the HMMER3 file. The input buffer should be
245    * positioned at the (optional) COMPO line if there is one, else at the insert
246    * emissions line for the BEGIN node of the model.
247    *
248    * @param input
249    * @throws IOException
250    */
 
251  16 toggle void parseModel(BufferedReader input) throws IOException
252    {
253    /*
254    * specification says there must always be an HMM header (already read)
255    * and one more header (guide headings) which is skipped here
256    */
257  16 int nodeNo = 0;
258  16 String line = input.readLine();
259  16 List<HMMNode> nodes = new ArrayList<>();
260   
261  2664 while (line != null && !TERMINATOR.equals(line))
262    {
263  2648 HMMNode node = new HMMNode();
264  2648 nodes.add(node);
265  2648 Scanner scanner = new Scanner(line);
266  2648 String next = scanner.next();
267   
268    /*
269    * expect COMPO (optional) for average match emissions
270    * or a node number followed by node's match emissions
271    */
272  2648 if (COMPO.equals(next) || nodeNo > 0)
273    {
274    /*
275    * parse match emissions
276    */
277  2647 double[] matches = parseDoubles(scanner, numberOfSymbols);
278  2647 node.setMatchEmissions(matches);
279  2647 if (!COMPO.equals(next))
280    {
281  2632 int resNo = parseAnnotations(scanner, node);
282  2632 if (resNo == 0)
283    {
284    /*
285    * no MAP annotation provided, just number off from 0 (begin node)
286    */
287  2 resNo = nodeNo;
288    }
289  2632 node.setResidueNumber(resNo);
290    }
291  2647 line = input.readLine();
292    }
293  2648 scanner.close();
294   
295    /*
296    * parse insert emissions
297    */
298  2648 scanner = new Scanner(line);
299  2648 double[] inserts = parseDoubles(scanner, numberOfSymbols);
300  2648 node.setInsertEmissions(inserts);
301  2648 scanner.close();
302   
303    /*
304    * parse state transitions
305    */
306  2648 line = input.readLine();
307  2648 scanner = new Scanner(line);
308  2648 double[] transitions = parseDoubles(scanner,
309    NUMBER_OF_TRANSITIONS);
310  2648 node.setStateTransitions(transitions);
311  2648 scanner.close();
312  2648 line = input.readLine();
313   
314  2648 nodeNo++;
315    }
316   
317  16 hmm.setNodes(nodes);
318    }
319   
320    /**
321    * Parses the annotations on the match emission line and add them to the node.
322    * (See p109 of the HMMER User Guide (V3.1b2) for the specification.) Returns
323    * the residue position that the node maps to, if provided, else zero.
324    *
325    * @param scanner
326    * @param node
327    */
 
328  2634 toggle int parseAnnotations(Scanner scanner, HMMNode node)
329    {
330  2634 int mapTo = 0;
331   
332    /*
333    * map from hmm node to sequence position, if provided
334    */
335  2634 if (scanner.hasNext())
336    {
337  2634 String value = scanner.next();
338  2634 if (!"-".equals(value))
339    {
340  2631 try
341    {
342  2631 mapTo = Integer.parseInt(value);
343  2631 node.setResidueNumber(mapTo);
344    } catch (NumberFormatException e)
345    {
346    // ignore
347    }
348    }
349    }
350   
351    /*
352    * hmm consensus residue if provided, else '-'
353    */
354  2634 if (scanner.hasNext())
355    {
356  2634 node.setConsensusResidue(scanner.next().charAt(0));
357    }
358   
359    /*
360    * RF reference annotation, if provided, else '-'
361    */
362  2634 if (scanner.hasNext())
363    {
364  2634 node.setReferenceAnnotation(scanner.next().charAt(0));
365    }
366   
367    /*
368    * 'm' for masked position, if provided, else '-'
369    */
370  2634 if (scanner.hasNext())
371    {
372  2374 node.setMaskValue(scanner.next().charAt(0));
373    }
374   
375    /*
376    * structure consensus symbol, if provided, else '-'
377    */
378  2634 if (scanner.hasNext())
379    {
380  2374 node.setConsensusStructure(scanner.next().charAt(0));
381    }
382   
383  2634 return mapTo;
384    }
385   
386    /**
387    * Fills an array of doubles parsed from an input line
388    *
389    * @param input
390    * @param numberOfElements
391    * @return
392    * @throws IOException
393    */
 
394  7945 toggle static double[] parseDoubles(Scanner input,
395    int numberOfElements) throws IOException
396    {
397  7945 double[] values = new double[numberOfElements];
398  124534 for (int i = 0; i < numberOfElements; i++)
399    {
400  116589 if (!input.hasNext())
401    {
402  0 throw new IOException("Incomplete data");
403    }
404  116589 String next = input.next();
405  116589 if (next.contains("*"))
406    {
407  45 values[i] = Double.NEGATIVE_INFINITY;
408    }
409    else
410    {
411  116544 double prob = Double.valueOf(next);
412  116544 prob = Math.pow(Math.E, -prob);
413  116544 values[i] = prob;
414    }
415    }
416  7945 return values;
417    }
418   
419    /**
420    * Returns a string to be added to the StringBuilder containing the entire
421    * output String.
422    *
423    * @param initialColumnSeparation
424    * The initial whitespace separation between the left side of the
425    * file and first character.
426    * @param columnSeparation
427    * The separation between subsequent data entries.
428    * @param data
429    * The list of data to be added to the String.
430    * @return
431    */
 
432  2385 toggle String addData(int initialColumnSeparation,
433    int columnSeparation, List<String> data)
434    {
435  2385 String line = "";
436  2385 boolean first = true;
437  2385 for (String value : data)
438    {
439  37365 int sep = first ? initialColumnSeparation : columnSeparation;
440  37365 line += String.format("%" + sep + "s", value);
441  37365 first = false;
442    }
443  2385 return line;
444    }
445   
446    /**
447    * Converts list of characters into a list of Strings.
448    *
449    * @param list
450    * @return Returns the list of Strings.
451    */
 
452  0 toggle List<String> charListToStringList(List<Character> list)
453    {
454  0 List<String> strList = new ArrayList<>();
455  0 for (char value : list)
456    {
457  0 String strValue = Character.toString(value);
458  0 strList.add(strValue);
459    }
460  0 return strList;
461    }
462   
463    /**
464    * Converts an array of doubles into a list of Strings, rounded to the nearest
465    * 5th decimal place
466    *
467    * @param doubles
468    * @param noOfDecimals
469    * @return
470    */
 
471  2385 toggle List<String> doublesToStringList(double[] doubles)
472    {
473  2385 List<String> strList = new ArrayList<>();
474  2385 for (double value : doubles)
475    {
476  37365 String strValue;
477  37365 if (value > 0)
478    {
479  37340 strValue = String.format("%.5f", value);
480    }
481  25 else if (value == -0.00000d)
482    {
483  10 strValue = "0.00000";
484    }
485    else
486    {
487  15 strValue = "*";
488    }
489  37365 strList.add(strValue);
490    }
491  2385 return strList;
492    }
493   
494    /**
495    * Appends model data in string format to the string builder
496    *
497    * @param output
498    */
 
499  5 toggle void appendModelAsString(StringBuilder output)
500    {
501  5 output.append(HMM).append(" ");
502  5 String charSymbols = hmm.getSymbols();
503  5 for (char c : charSymbols.toCharArray())
504    {
505  100 output.append(String.format("%9s", c));
506    }
507  5 output.append(NL).append(TRANSITIONTYPELINE);
508   
509  5 int length = hmm.getLength();
510   
511  800 for (int nodeNo = 0; nodeNo <= length; nodeNo++)
512    {
513  795 String matchLine = String.format("%7s",
514  795 nodeNo == 0 ? COMPO : Integer.toString(nodeNo));
515   
516  795 double[] doubleMatches = convertToLogSpace(
517    hmm.getNode(nodeNo).getMatchEmissions());
518  795 List<String> strMatches = doublesToStringList(doubleMatches);
519  795 matchLine += addData(10, 9, strMatches);
520   
521  795 if (nodeNo != 0)
522    {
523  790 matchLine += SPACE + (hmm.getNodeMapPosition(nodeNo));
524  790 matchLine += SPACE + hmm.getConsensusResidue(nodeNo);
525  790 matchLine += SPACE + hmm.getReferenceAnnotation(nodeNo);
526  790 if (hmm.getFileHeader().contains("HMMER3/f"))
527    {
528  530 matchLine += SPACE + hmm.getMaskedValue(nodeNo);
529  530 matchLine += SPACE + hmm.getConsensusStructure(nodeNo);
530    }
531    }
532   
533  795 output.append(NL).append(matchLine);
534   
535  795 String insertLine = "";
536   
537  795 double[] doubleInserts = convertToLogSpace(
538    hmm.getNode(nodeNo).getInsertEmissions());
539  795 List<String> strInserts = doublesToStringList(doubleInserts);
540  795 insertLine += addData(17, 9, strInserts);
541   
542  795 output.append(NL).append(insertLine);
543   
544  795 String transitionLine = "";
545  795 double[] doubleTransitions = convertToLogSpace(
546    hmm.getNode(nodeNo).getStateTransitions());
547  795 List<String> strTransitions = doublesToStringList(
548    doubleTransitions);
549  795 transitionLine += addData(17, 9, strTransitions);
550   
551  795 output.append(NL).append(transitionLine);
552    }
553    }
554   
555    /**
556    * Appends formatted HMM file properties to the string builder
557    *
558    * @param output
559    */
 
560  5 toggle void appendProperties(StringBuilder output)
561    {
562  5 output.append(hmm.getFileHeader());
563   
564  5 String format = "%n%-5s %1s";
565  5 appendProperty(output, format, NAME);
566  5 appendProperty(output, format, ACCESSION_NUMBER);
567  5 appendProperty(output, format, DESCRIPTION);
568  5 appendProperty(output, format, LENGTH);
569  5 appendProperty(output, format, MAX_LENGTH);
570  5 appendProperty(output, format, ALPHABET);
571  5 appendBooleanProperty(output, format, REFERENCE_ANNOTATION);
572  5 appendBooleanProperty(output, format, MASKED_VALUE);
573  5 appendBooleanProperty(output, format, CONSENSUS_RESIDUE);
574  5 appendBooleanProperty(output, format, CONSENSUS_STRUCTURE);
575  5 appendBooleanProperty(output, format, MAP);
576  5 appendProperty(output, format, DATE);
577  5 appendProperty(output, format, NUMBER_OF_SEQUENCES);
578  5 appendProperty(output, format, EFF_NUMBER_OF_SEQUENCES);
579  5 appendProperty(output, format, CHECK_SUM);
580  5 appendProperty(output, format, GATHERING_THRESHOLD);
581  5 appendProperty(output, format, TRUSTED_CUTOFF);
582  5 appendProperty(output, format, NOISE_CUTOFF);
583   
584  5 if (hmm.getMSV() != null)
585    {
586  5 format = "%n%-19s %18s";
587  5 output.append(String.format(format, "STATS LOCAL MSV", hmm.getMSV()));
588   
589  5 output.append(String.format(format, "STATS LOCAL VITERBI",
590    hmm.getViterbi()));
591   
592  5 output.append(String.format(format, "STATS LOCAL FORWARD",
593    hmm.getForward()));
594    }
595    }
596   
597    /**
598    * Appends 'yes' or 'no' for the given property, according to whether or not
599    * it is set in the HMM
600    *
601    * @param output
602    * @param format
603    * @param propertyName
604    */
 
605  25 toggle private void appendBooleanProperty(StringBuilder output, String format,
606    String propertyName)
607    {
608  25 boolean set = hmm.getBooleanProperty(propertyName);
609  25 output.append(String.format(format, propertyName,
610  25 set ? HiddenMarkovModel.YES : HiddenMarkovModel.NO));
611    }
612   
613    /**
614    * Appends the value of the given property to the output, if not null
615    *
616    * @param output
617    * @param format
618    * @param propertyName
619    */
 
620  65 toggle private void appendProperty(StringBuilder output, String format,
621    String propertyName)
622    {
623  65 String value = hmm.getProperty(propertyName);
624  65 if (value != null)
625    {
626  45 output.append(String.format(format, propertyName, value));
627    }
628    }
629   
 
630  1 toggle @Override
631    public String print(SequenceI[] sequences, boolean jvsuffix)
632    {
633  1 if (sequences[0].getHMM() != null)
634    {
635  1 hmm = sequences[0].getHMM();
636    }
637  1 return print();
638    }
639   
640    /**
641    * Prints the .hmm file to a String.
642    *
643    * @return
644    */
 
645  4 toggle public String print()
646    {
647  4 StringBuilder output = new StringBuilder();
648  4 appendProperties(output);
649  4 output.append(NL);
650  4 appendModelAsString(output);
651  4 output.append(NL).append(TERMINATOR).append(NL);
652  4 return output.toString();
653    }
654   
655    /**
656    * Converts the probabilities contained in an array into log space
657    *
658    * @param ds
659    */
 
660  2385 toggle double[] convertToLogSpace(double[] ds)
661    {
662  2385 double[] converted = new double[ds.length];
663  39750 for (int i = 0; i < ds.length; i++)
664    {
665  37365 double prob = ds[i];
666  37365 double logProb = -1 * Math.log(prob);
667   
668  37365 converted[i] = logProb;
669    }
670  2385 return converted;
671    }
672   
673    /**
674    * Returns the HMM sequence produced by reading a .hmm file.
675    */
 
676  5 toggle @Override
677    public SequenceI[] getSeqsAsArray()
678    {
679  5 SequenceI hmmSeq = hmm.getConsensusSequence();
680  5 SequenceI[] seq = new SequenceI[1];
681  5 seq[0] = hmmSeq;
682  5 return seq;
683    }
684   
 
685  1 toggle @Override
686    public void setNewlineString(String newLine)
687    {
688  1 NL = newLine;
689    }
690   
 
691  1 toggle @Override
692    public void setExportSettings(AlignExportSettingsI exportSettings)
693    {
694   
695    }
696   
 
697  1 toggle @Override
698    public void configureForView(AlignmentViewPanel viewpanel)
699    {
700   
701    }
702   
 
703  1 toggle @Override
704    public boolean hasWarningMessage()
705    {
706  1 return false;
707    }
708   
 
709  0 toggle @Override
710    public String getWarningMessage()
711    {
712  0 return "warning message";
713    }
714   
715    }
716