Clover icon

Coverage Report

  1. Project Clover database Mon Dec 1 2025 13:17:41 GMT
  2. Package jalview.ext.htsjdk

File HtsContigDb.java

 

Coverage histogram

../../../img/srcFileCovDistChart4.png
49% of files have more coverage

Code metrics

20
47
11
1
257
147
25
0.53
4.27
11
2.27

Classes

Class Line # Actions
HtsContigDb 50 47 25
0.384615438.5%
 

Contributing tests

This file is covered by 4 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.ext.htsjdk;
22   
23    import java.io.File;
24    import java.io.IOException;
25    import java.math.BigInteger;
26    import java.nio.file.Path;
27    import java.util.ArrayList;
28    import java.util.HashSet;
29    import java.util.List;
30    import java.util.Set;
31   
32    import htsjdk.samtools.SAMException;
33    import htsjdk.samtools.SAMSequenceDictionary;
34    import htsjdk.samtools.SAMSequenceRecord;
35    import htsjdk.samtools.reference.FastaSequenceIndexCreator;
36    import htsjdk.samtools.reference.ReferenceSequence;
37    import htsjdk.samtools.reference.ReferenceSequenceFile;
38    import htsjdk.samtools.reference.ReferenceSequenceFileFactory;
39    import htsjdk.samtools.util.StringUtil;
40    import jalview.datamodel.Sequence;
41    import jalview.datamodel.SequenceI;
42    import jalview.util.DigestUtils;
43   
44    /**
45    * a source of sequence data accessed via the HTSJDK
46    *
47    * @author jprocter
48    *
49    */
 
50    public class HtsContigDb
51    {
52    private String name;
53   
54    private File dbLocation;
55   
56    private htsjdk.samtools.reference.ReferenceSequenceFile refFile = null;
57   
 
58  2 toggle public static void createFastaSequenceIndex(Path path, boolean overwrite)
59    throws IOException
60    {
61  2 try
62    {
63  2 FastaSequenceIndexCreator.create(path, overwrite);
64    } catch (SAMException e)
65    {
66  1 throw new IOException(e.getMessage());
67    }
68    }
69   
 
70  7 toggle public HtsContigDb(String name, File descriptor)
71    {
72  7 if (descriptor.isFile())
73    {
74  7 this.name = name;
75  7 dbLocation = descriptor;
76    }
77  7 initSource();
78    }
79   
 
80  5 toggle public void close()
81    {
82  5 if (refFile != null)
83    {
84  5 try
85    {
86  5 refFile.close();
87    } catch (IOException e)
88    {
89    // ignore
90    }
91    }
92    }
93   
 
94  7 toggle private void initSource()
95    {
96  7 if (refFile != null)
97    {
98  0 return;
99    }
100   
101  7 refFile = ReferenceSequenceFileFactory
102    .getReferenceSequenceFile(dbLocation, true);
103  6 if (refFile == null || refFile.getSequenceDictionary() == null)
104    {
105    // refFile = initSequenceDictionaryFor(dbLocation);
106    }
107   
108    }
109   
110    SAMSequenceDictionary rrefDict = null;
111   
 
112  0 toggle private ReferenceSequenceFile initSequenceDictionaryFor(File dbLocation2)
113    throws Exception
114    {
115  0 rrefDict = getDictionary(dbLocation2, true);
116  0 if (rrefDict != null)
117    {
118  0 ReferenceSequenceFile rrefFile = ReferenceSequenceFileFactory
119    .getReferenceSequenceFile(dbLocation2, true);
120  0 return rrefFile;
121    }
122  0 return null;
123    }
124   
125    /**
126    * code below hacked out from picard ----
127    *
128    * picard/src/java/picard/sam/CreateSequenceDictionary.java
129    * https://github.com/
130    * broadinstitute/picard/commit/270580d3e28123496576f0b91b3433179bb5d876
131    */
132   
133    /*
134    * The MIT License
135    *
136    * Copyright (c) 2009 The Broad Institute
137    *
138    * Permission is hereby granted, free of charge, to any person obtaining a
139    * copy of this software and associated documentation files (the "Software"),
140    * to deal in the Software without restriction, including without limitation
141    * the rights to use, copy, modify, merge, publish, distribute, sublicense,
142    * and/or sell copies of the Software, and to permit persons to whom the
143    * Software is furnished to do so, subject to the following conditions:
144    *
145    * The above copyright notice and this permission notice shall be included in
146    * all copies or substantial portions of the Software.
147    *
148    * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
149    * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
150    * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
151    * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
152    * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
153    * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
154    * DEALINGS IN THE SOFTWARE.
155    */
156    /**
157    *
158    * @param f
159    * @param truncate
160    * @return
161    * @throws Exception
162    */
 
163  0 toggle SAMSequenceDictionary getDictionary(File f, boolean truncate)
164    throws Exception
165    {
166  0 final ReferenceSequenceFile refSeqFile = ReferenceSequenceFileFactory
167    .getReferenceSequenceFile(f, truncate);
168  0 ReferenceSequence refSeq;
169  0 List<SAMSequenceRecord> ret = new ArrayList<>();
170  0 Set<String> sequenceNames = new HashSet<>();
171  0 for (int numSequences = 0; (refSeq = refSeqFile
172    .nextSequence()) != null; ++numSequences)
173    {
174  0 if (sequenceNames.contains(refSeq.getName()))
175    {
176  0 throw new Exception(
177    "Sequence name appears more than once in reference: "
178    + refSeq.getName());
179    }
180  0 sequenceNames.add(refSeq.getName());
181  0 ret.add(makeSequenceRecord(refSeq));
182    }
183  0 return new SAMSequenceDictionary(ret);
184    }
185   
 
186  8 toggle public boolean isValid()
187    {
188  8 return dbLocation != null && refFile != null;
189    }
190   
191    /**
192    * Create one SAMSequenceRecord from a single fasta sequence
193    */
 
194  0 toggle private SAMSequenceRecord makeSequenceRecord(
195    final ReferenceSequence refSeq)
196    {
197   
198  0 final SAMSequenceRecord ret = new SAMSequenceRecord(refSeq.getName(),
199    refSeq.length());
200   
201    // Compute MD5 of upcased bases
202  0 final byte[] bases = refSeq.getBases();
203  0 for (int i = 0; i < bases.length; ++i)
204    {
205  0 bases[i] = StringUtil.toUpperCase(bases[i]);
206    }
207   
208  0 ret.setAttribute(SAMSequenceRecord.MD5_TAG, md5Hash(bases));
209    // if (GENOME_ASSEMBLY != null) {
210    // ret.setAttribute(SAMSequenceRecord.ASSEMBLY_TAG, GENOME_ASSEMBLY);
211    // }
212    // ret.setAttribute(SAMSequenceRecord.URI_TAG, URI);
213    // if (SPECIES != null) {
214    // ret.setAttribute(SAMSequenceRecord.SPECIES_TAG, SPECIES);
215    // }
216  0 return ret;
217    }
218   
 
219  0 toggle private String md5Hash(final byte[] bytes)
220    {
221  0 String s = new BigInteger(1, DigestUtils.computeMD5(bytes)).toString(16);
222  0 if (s.length() != 32)
223    {
224  0 final String zeros = "00000000000000000000000000000000";
225  0 s = zeros.substring(0, 32 - s.length()) + s;
226    }
227  0 return s;
228    }
229   
230    // ///// end of hts bits.
231   
232    /**
233    * Reads the contig with the given id and returns as a Jalview SequenceI
234    * object. Note the database must be indexed for this operation to succeed.
235    *
236    * @param id
237    * @return
238    */
 
239  5 toggle public SequenceI getSequenceProxy(String id)
240    {
241  5 if (!isValid() || !refFile.isIndexed())
242    {
243  0 jalview.bin.Console.errPrintln(
244    "Cannot read contig as file is invalid or not indexed");
245  0 return null;
246    }
247   
248  5 ReferenceSequence sseq = refFile.getSequence(id);
249  5 return new Sequence(sseq.getName(), new String(sseq.getBases()));
250    }
251   
 
252  3 toggle public boolean isIndexed()
253    {
254  3 return refFile != null && refFile.isIndexed();
255    }
256   
257    }