1. Project Clover database Wed Nov 13 2024 18:27:33 GMT
  2. Package jalview.ext.htsjdk

File HtsContigDb.java

 

Coverage histogram

../../../img/srcFileCovDistChart4.png
48% of files have more coverage

Code metrics

22
54
12
1
278
165
28
0.52
4.5
12
2.33

Classes

Class
Line #
Actions
HtsContigDb 52 54 28
0.340909134.1%
 

Contributing tests

This file is covered by 4 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.ext.htsjdk;
22   
23    import jalview.datamodel.Sequence;
24    import jalview.datamodel.SequenceI;
25   
26    import java.io.File;
27    import java.io.IOException;
28    import java.math.BigInteger;
29    import java.nio.file.Path;
30    import java.security.MessageDigest;
31    import java.security.NoSuchAlgorithmException;
32    import java.util.ArrayList;
33    import java.util.HashSet;
34    import java.util.List;
35    import java.util.Set;
36   
37    import htsjdk.samtools.SAMException;
38    import htsjdk.samtools.SAMSequenceDictionary;
39    import htsjdk.samtools.SAMSequenceRecord;
40    import htsjdk.samtools.reference.FastaSequenceIndexCreator;
41    import htsjdk.samtools.reference.ReferenceSequence;
42    import htsjdk.samtools.reference.ReferenceSequenceFile;
43    import htsjdk.samtools.reference.ReferenceSequenceFileFactory;
44    import htsjdk.samtools.util.StringUtil;
45   
46    /**
47    * a source of sequence data accessed via the HTSJDK
48    *
49    * @author jprocter
50    *
51    */
 
52    public class HtsContigDb
53    {
54    private String name;
55   
56    private File dbLocation;
57   
58    private htsjdk.samtools.reference.ReferenceSequenceFile refFile = null;
59   
 
60  2 toggle public static void createFastaSequenceIndex(Path path, boolean overwrite)
61    throws IOException
62    {
63  2 try
64    {
65  2 FastaSequenceIndexCreator.create(path, overwrite);
66    } catch (SAMException e)
67    {
68  1 throw new IOException(e.getMessage());
69    }
70    }
71   
 
72  7 toggle public HtsContigDb(String name, File descriptor)
73    {
74  7 if (descriptor.isFile())
75    {
76  7 this.name = name;
77  7 dbLocation = descriptor;
78    }
79  7 initSource();
80    }
81   
 
82  5 toggle public void close()
83    {
84  5 if (refFile != null)
85    {
86  5 try
87    {
88  5 refFile.close();
89    } catch (IOException e)
90    {
91    // ignore
92    }
93    }
94    }
95   
 
96  7 toggle private void initSource()
97    {
98  7 if (refFile != null)
99    {
100  0 return;
101    }
102   
103  7 refFile = ReferenceSequenceFileFactory
104    .getReferenceSequenceFile(dbLocation, true);
105  6 if (refFile == null || refFile.getSequenceDictionary() == null)
106    {
107    // refFile = initSequenceDictionaryFor(dbLocation);
108    }
109   
110    }
111   
112    SAMSequenceDictionary rrefDict = null;
113   
 
114  0 toggle private ReferenceSequenceFile initSequenceDictionaryFor(File dbLocation2)
115    throws Exception
116    {
117  0 rrefDict = getDictionary(dbLocation2, true);
118  0 if (rrefDict != null)
119    {
120  0 ReferenceSequenceFile rrefFile = ReferenceSequenceFileFactory
121    .getReferenceSequenceFile(dbLocation2, true);
122  0 return rrefFile;
123    }
124  0 return null;
125    }
126   
127    /**
128    * code below hacked out from picard ----
129    *
130    * picard/src/java/picard/sam/CreateSequenceDictionary.java
131    * https://github.com/
132    * broadinstitute/picard/commit/270580d3e28123496576f0b91b3433179bb5d876
133    */
134   
135    /*
136    * The MIT License
137    *
138    * Copyright (c) 2009 The Broad Institute
139    *
140    * Permission is hereby granted, free of charge, to any person obtaining a
141    * copy of this software and associated documentation files (the "Software"),
142    * to deal in the Software without restriction, including without limitation
143    * the rights to use, copy, modify, merge, publish, distribute, sublicense,
144    * and/or sell copies of the Software, and to permit persons to whom the
145    * Software is furnished to do so, subject to the following conditions:
146    *
147    * The above copyright notice and this permission notice shall be included in
148    * all copies or substantial portions of the Software.
149    *
150    * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
151    * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
152    * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
153    * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
154    * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
155    * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
156    * DEALINGS IN THE SOFTWARE.
157    */
158    /**
159    *
160    * @param f
161    * @param truncate
162    * @return
163    * @throws Exception
164    */
 
165  0 toggle SAMSequenceDictionary getDictionary(File f, boolean truncate)
166    throws Exception
167    {
168  0 if (md5 == null)
169    {
170  0 initCreateSequenceDictionary();
171    }
172  0 final ReferenceSequenceFile refSeqFile = ReferenceSequenceFileFactory
173    .getReferenceSequenceFile(f, truncate);
174  0 ReferenceSequence refSeq;
175  0 List<SAMSequenceRecord> ret = new ArrayList<>();
176  0 Set<String> sequenceNames = new HashSet<>();
177  0 for (int numSequences = 0; (refSeq = refSeqFile
178    .nextSequence()) != null; ++numSequences)
179    {
180  0 if (sequenceNames.contains(refSeq.getName()))
181    {
182  0 throw new Exception(
183    "Sequence name appears more than once in reference: "
184    + refSeq.getName());
185    }
186  0 sequenceNames.add(refSeq.getName());
187  0 ret.add(makeSequenceRecord(refSeq));
188    }
189  0 return new SAMSequenceDictionary(ret);
190    }
191   
 
192  8 toggle public boolean isValid()
193    {
194  8 return dbLocation != null && refFile != null;
195    }
196   
197    /**
198    * Create one SAMSequenceRecord from a single fasta sequence
199    */
 
200  0 toggle private SAMSequenceRecord makeSequenceRecord(
201    final ReferenceSequence refSeq)
202    {
203   
204  0 final SAMSequenceRecord ret = new SAMSequenceRecord(refSeq.getName(),
205    refSeq.length());
206   
207    // Compute MD5 of upcased bases
208  0 final byte[] bases = refSeq.getBases();
209  0 for (int i = 0; i < bases.length; ++i)
210    {
211  0 bases[i] = StringUtil.toUpperCase(bases[i]);
212    }
213   
214  0 ret.setAttribute(SAMSequenceRecord.MD5_TAG, md5Hash(bases));
215    // if (GENOME_ASSEMBLY != null) {
216    // ret.setAttribute(SAMSequenceRecord.ASSEMBLY_TAG, GENOME_ASSEMBLY);
217    // }
218    // ret.setAttribute(SAMSequenceRecord.URI_TAG, URI);
219    // if (SPECIES != null) {
220    // ret.setAttribute(SAMSequenceRecord.SPECIES_TAG, SPECIES);
221    // }
222  0 return ret;
223    }
224   
225    private MessageDigest md5;
226   
 
227  0 toggle public void initCreateSequenceDictionary() throws Exception
228    {
229  0 try
230    {
231  0 md5 = MessageDigest.getInstance("MD5");
232    } catch (NoSuchAlgorithmException e)
233    {
234  0 throw new Exception("MD5 algorithm not found", e);
235    }
236    }
237   
 
238  0 toggle private String md5Hash(final byte[] bytes)
239    {
240  0 md5.reset();
241  0 md5.update(bytes);
242  0 String s = new BigInteger(1, md5.digest()).toString(16);
243  0 if (s.length() != 32)
244    {
245  0 final String zeros = "00000000000000000000000000000000";
246  0 s = zeros.substring(0, 32 - s.length()) + s;
247    }
248  0 return s;
249    }
250   
251    // ///// end of hts bits.
252   
253    /**
254    * Reads the contig with the given id and returns as a Jalview SequenceI
255    * object. Note the database must be indexed for this operation to succeed.
256    *
257    * @param id
258    * @return
259    */
 
260  5 toggle public SequenceI getSequenceProxy(String id)
261    {
262  5 if (!isValid() || !refFile.isIndexed())
263    {
264  0 jalview.bin.Console.errPrintln(
265    "Cannot read contig as file is invalid or not indexed");
266  0 return null;
267    }
268   
269  5 ReferenceSequence sseq = refFile.getSequence(id);
270  5 return new Sequence(sseq.getName(), new String(sseq.getBases()));
271    }
272   
 
273  3 toggle public boolean isIndexed()
274    {
275  3 return refFile != null && refFile.isIndexed();
276    }
277   
278    }