1 |
|
|
2 |
|
|
3 |
|
|
4 |
|
|
5 |
|
|
6 |
|
|
7 |
|
|
8 |
|
|
9 |
|
|
10 |
|
|
11 |
|
|
12 |
|
|
13 |
|
|
14 |
|
|
15 |
|
|
16 |
|
|
17 |
|
|
18 |
|
|
19 |
|
|
20 |
|
|
21 |
|
package jalview.ext.htsjdk; |
22 |
|
|
23 |
|
import jalview.datamodel.Sequence; |
24 |
|
import jalview.datamodel.SequenceI; |
25 |
|
|
26 |
|
import java.io.File; |
27 |
|
import java.io.IOException; |
28 |
|
import java.math.BigInteger; |
29 |
|
import java.nio.file.Path; |
30 |
|
import java.security.MessageDigest; |
31 |
|
import java.security.NoSuchAlgorithmException; |
32 |
|
import java.util.ArrayList; |
33 |
|
import java.util.HashSet; |
34 |
|
import java.util.List; |
35 |
|
import java.util.Set; |
36 |
|
|
37 |
|
import htsjdk.samtools.SAMException; |
38 |
|
import htsjdk.samtools.SAMSequenceDictionary; |
39 |
|
import htsjdk.samtools.SAMSequenceRecord; |
40 |
|
import htsjdk.samtools.reference.FastaSequenceIndexCreator; |
41 |
|
import htsjdk.samtools.reference.ReferenceSequence; |
42 |
|
import htsjdk.samtools.reference.ReferenceSequenceFile; |
43 |
|
import htsjdk.samtools.reference.ReferenceSequenceFileFactory; |
44 |
|
import htsjdk.samtools.util.StringUtil; |
45 |
|
|
46 |
|
|
47 |
|
|
48 |
|
|
49 |
|
@author |
50 |
|
|
51 |
|
|
|
|
| 34.1% |
Uncovered Elements: 58 (88) |
Complexity: 28 |
Complexity Density: 0.52 |
|
52 |
|
public class HtsContigDb |
53 |
|
{ |
54 |
|
private String name; |
55 |
|
|
56 |
|
private File dbLocation; |
57 |
|
|
58 |
|
private htsjdk.samtools.reference.ReferenceSequenceFile refFile = null; |
59 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (3) |
Complexity: 2 |
Complexity Density: 0.67 |
|
60 |
2 |
public static void createFastaSequenceIndex(Path path, boolean overwrite)... |
61 |
|
throws IOException |
62 |
|
{ |
63 |
2 |
try |
64 |
|
{ |
65 |
2 |
FastaSequenceIndexCreator.create(path, overwrite); |
66 |
|
} catch (SAMException e) |
67 |
|
{ |
68 |
1 |
throw new IOException(e.getMessage()); |
69 |
|
} |
70 |
|
} |
71 |
|
|
|
|
| 83.3% |
Uncovered Elements: 1 (6) |
Complexity: 2 |
Complexity Density: 0.5 |
|
72 |
7 |
public HtsContigDb(String name, File descriptor)... |
73 |
|
{ |
74 |
7 |
if (descriptor.isFile()) |
75 |
|
{ |
76 |
7 |
this.name = name; |
77 |
7 |
dbLocation = descriptor; |
78 |
|
} |
79 |
7 |
initSource(); |
80 |
|
} |
81 |
|
|
|
|
| 80% |
Uncovered Elements: 1 (5) |
Complexity: 3 |
Complexity Density: 1 |
|
82 |
5 |
public void close()... |
83 |
|
{ |
84 |
5 |
if (refFile != null) |
85 |
|
{ |
86 |
5 |
try |
87 |
|
{ |
88 |
5 |
refFile.close(); |
89 |
|
} catch (IOException e) |
90 |
|
{ |
91 |
|
|
92 |
|
} |
93 |
|
} |
94 |
|
} |
95 |
|
|
|
|
| 62.5% |
Uncovered Elements: 3 (8) |
Complexity: 4 |
Complexity Density: 1 |
|
96 |
7 |
private void initSource()... |
97 |
|
{ |
98 |
7 |
if (refFile != null) |
99 |
|
{ |
100 |
0 |
return; |
101 |
|
} |
102 |
|
|
103 |
7 |
refFile = ReferenceSequenceFileFactory |
104 |
|
.getReferenceSequenceFile(dbLocation, true); |
105 |
6 |
if (refFile == null || refFile.getSequenceDictionary() == null) |
106 |
|
{ |
107 |
|
|
108 |
|
} |
109 |
|
|
110 |
|
} |
111 |
|
|
112 |
|
SAMSequenceDictionary rrefDict = null; |
113 |
|
|
|
|
| 0% |
Uncovered Elements: 7 (7) |
Complexity: 2 |
Complexity Density: 0.4 |
|
114 |
0 |
private ReferenceSequenceFile initSequenceDictionaryFor(File dbLocation2)... |
115 |
|
throws Exception |
116 |
|
{ |
117 |
0 |
rrefDict = getDictionary(dbLocation2, true); |
118 |
0 |
if (rrefDict != null) |
119 |
|
{ |
120 |
0 |
ReferenceSequenceFile rrefFile = ReferenceSequenceFileFactory |
121 |
|
.getReferenceSequenceFile(dbLocation2, true); |
122 |
0 |
return rrefFile; |
123 |
|
} |
124 |
0 |
return null; |
125 |
|
} |
126 |
|
|
127 |
|
|
128 |
|
|
129 |
|
|
130 |
|
|
131 |
|
|
132 |
|
|
133 |
|
|
134 |
|
|
135 |
|
|
136 |
|
|
137 |
|
|
138 |
|
|
139 |
|
|
140 |
|
|
141 |
|
|
142 |
|
|
143 |
|
|
144 |
|
|
145 |
|
|
146 |
|
|
147 |
|
|
148 |
|
|
149 |
|
|
150 |
|
|
151 |
|
|
152 |
|
|
153 |
|
|
154 |
|
|
155 |
|
|
156 |
|
|
157 |
|
|
158 |
|
|
159 |
|
|
160 |
|
@param |
161 |
|
@param |
162 |
|
@return |
163 |
|
@throws |
164 |
|
|
|
|
| 0% |
Uncovered Elements: 18 (18) |
Complexity: 4 |
Complexity Density: 0.33 |
|
165 |
0 |
SAMSequenceDictionary getDictionary(File f, boolean truncate)... |
166 |
|
throws Exception |
167 |
|
{ |
168 |
0 |
if (md5 == null) |
169 |
|
{ |
170 |
0 |
initCreateSequenceDictionary(); |
171 |
|
} |
172 |
0 |
final ReferenceSequenceFile refSeqFile = ReferenceSequenceFileFactory |
173 |
|
.getReferenceSequenceFile(f, truncate); |
174 |
0 |
ReferenceSequence refSeq; |
175 |
0 |
List<SAMSequenceRecord> ret = new ArrayList<>(); |
176 |
0 |
Set<String> sequenceNames = new HashSet<>(); |
177 |
0 |
for (int numSequences = 0; (refSeq = refSeqFile |
178 |
|
.nextSequence()) != null; ++numSequences) |
179 |
|
{ |
180 |
0 |
if (sequenceNames.contains(refSeq.getName())) |
181 |
|
{ |
182 |
0 |
throw new Exception( |
183 |
|
"Sequence name appears more than once in reference: " |
184 |
|
+ refSeq.getName()); |
185 |
|
} |
186 |
0 |
sequenceNames.add(refSeq.getName()); |
187 |
0 |
ret.add(makeSequenceRecord(refSeq)); |
188 |
|
} |
189 |
0 |
return new SAMSequenceDictionary(ret); |
190 |
|
} |
191 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
192 |
8 |
public boolean isValid()... |
193 |
|
{ |
194 |
8 |
return dbLocation != null && refFile != null; |
195 |
|
} |
196 |
|
|
197 |
|
|
198 |
|
|
199 |
|
|
|
|
| 0% |
Uncovered Elements: 8 (8) |
Complexity: 2 |
Complexity Density: 0.33 |
|
200 |
0 |
private SAMSequenceRecord makeSequenceRecord(... |
201 |
|
final ReferenceSequence refSeq) |
202 |
|
{ |
203 |
|
|
204 |
0 |
final SAMSequenceRecord ret = new SAMSequenceRecord(refSeq.getName(), |
205 |
|
refSeq.length()); |
206 |
|
|
207 |
|
|
208 |
0 |
final byte[] bases = refSeq.getBases(); |
209 |
0 |
for (int i = 0; i < bases.length; ++i) |
210 |
|
{ |
211 |
0 |
bases[i] = StringUtil.toUpperCase(bases[i]); |
212 |
|
} |
213 |
|
|
214 |
0 |
ret.setAttribute(SAMSequenceRecord.MD5_TAG, md5Hash(bases)); |
215 |
|
|
216 |
|
|
217 |
|
|
218 |
|
|
219 |
|
|
220 |
|
|
221 |
|
|
222 |
0 |
return ret; |
223 |
|
} |
224 |
|
|
225 |
|
private MessageDigest md5; |
226 |
|
|
|
|
| 0% |
Uncovered Elements: 3 (3) |
Complexity: 2 |
Complexity Density: 0.67 |
|
227 |
0 |
public void initCreateSequenceDictionary() throws Exception... |
228 |
|
{ |
229 |
0 |
try |
230 |
|
{ |
231 |
0 |
md5 = MessageDigest.getInstance("MD5"); |
232 |
|
} catch (NoSuchAlgorithmException e) |
233 |
|
{ |
234 |
0 |
throw new Exception("MD5 algorithm not found", e); |
235 |
|
} |
236 |
|
} |
237 |
|
|
|
|
| 0% |
Uncovered Elements: 9 (9) |
Complexity: 2 |
Complexity Density: 0.29 |
|
238 |
0 |
private String md5Hash(final byte[] bytes)... |
239 |
|
{ |
240 |
0 |
md5.reset(); |
241 |
0 |
md5.update(bytes); |
242 |
0 |
String s = new BigInteger(1, md5.digest()).toString(16); |
243 |
0 |
if (s.length() != 32) |
244 |
|
{ |
245 |
0 |
final String zeros = "00000000000000000000000000000000"; |
246 |
0 |
s = zeros.substring(0, 32 - s.length()) + s; |
247 |
|
} |
248 |
0 |
return s; |
249 |
|
} |
250 |
|
|
251 |
|
|
252 |
|
|
253 |
|
|
254 |
|
|
255 |
|
|
256 |
|
|
257 |
|
@param |
258 |
|
@return |
259 |
|
|
|
|
| 57.1% |
Uncovered Elements: 3 (7) |
Complexity: 3 |
Complexity Density: 0.6 |
|
260 |
5 |
public SequenceI getSequenceProxy(String id)... |
261 |
|
{ |
262 |
5 |
if (!isValid() || !refFile.isIndexed()) |
263 |
|
{ |
264 |
0 |
jalview.bin.Console.errPrintln( |
265 |
|
"Cannot read contig as file is invalid or not indexed"); |
266 |
0 |
return null; |
267 |
|
} |
268 |
|
|
269 |
5 |
ReferenceSequence sseq = refFile.getSequence(id); |
270 |
5 |
return new Sequence(sseq.getName(), new String(sseq.getBases())); |
271 |
|
} |
272 |
|
|
|
|
| 100% |
Uncovered Elements: 0 (1) |
Complexity: 1 |
Complexity Density: 1 |
|
273 |
3 |
public boolean isIndexed()... |
274 |
|
{ |
275 |
3 |
return refFile != null && refFile.isIndexed(); |
276 |
|
} |
277 |
|
|
278 |
|
} |