Clover icon

Coverage Report

  1. Project Clover database Thu Dec 4 2025 16:11:35 GMT
  2. Package jalview.analysis

File SeqsetUtils.java

 

Coverage histogram

../../img/srcFileCovDistChart8.png
20% of files have more coverage

Code metrics

56
96
9
2
349
221
41
0.43
10.67
4.5
4.56

Classes

Class Line # Actions
SeqsetUtils 46 93 40
0.732484173.2%
SeqsetUtils.SequenceInfo 48 3 1
1.0100%
 

Contributing tests

This file is covered by 105 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.analysis;
22   
23    import jalview.bin.Cache;
24    import jalview.bin.Console;
25    import jalview.datamodel.AlignmentAnnotation;
26    import jalview.datamodel.HiddenMarkovModel;
27    import jalview.datamodel.PDBEntry;
28    import jalview.datamodel.Sequence;
29    import jalview.datamodel.SequenceFeature;
30    import jalview.datamodel.SequenceI;
31   
32    import java.util.ArrayList;
33    import java.util.BitSet;
34    import java.util.Enumeration;
35    import java.util.HashMap;
36    import java.util.Hashtable;
37    import java.util.Iterator;
38    import java.util.List;
39    import java.util.Map;
40    import java.util.Optional;
41    import java.util.Vector;
42    import static java.lang.String.format;
43   
44    import java.nio.CharBuffer;
45   
 
46    public class SeqsetUtils
47    {
 
48    public static class SequenceInfo {
49    private String name;
50    private int start;
51    private int end;
52    private Optional<String> description = Optional.empty();
53    private Optional<List<SequenceFeature>> features = Optional.empty();
54    private Optional<List<PDBEntry>> pdbId = Optional.empty();
55    private Optional<SequenceI> dataset = Optional.empty();
56    private Optional<HiddenMarkovModel> hmm = Optional.empty();
57    private Optional<AlignmentAnnotation[]> searchScores = Optional.empty();
58   
 
59  493 toggle private SequenceInfo(String name, int start, int end) {
60  493 this.name = name;
61  493 this.start = start;
62  493 this.end = end;
63    }
64    }
65   
66    /**
67    * Store essential properties of a sequence in a hashtable for later recovery
68    * Keys are Name, Start, End, SeqFeatures, PdbId, HMM
69    *
70    * @param seq
71    * SequenceI
72    * @return Hashtable
73    */
 
74  493 toggle public static SequenceInfo SeqCharacterHash(SequenceI seq)
75    {
76  493 SequenceInfo sqinfo = new SequenceInfo(seq.getName(), seq.getStart(), seq.getEnd());
77  493 sqinfo.description = Optional.ofNullable(seq.getDescription());
78  493 sqinfo.dataset = Optional.ofNullable(seq.getDatasetSequence());
79  493 if (!sqinfo.dataset.isPresent())
80    {
81  134 ArrayList<SequenceFeature> feats = new ArrayList<>(
82    seq.getFeatures().getAllFeatures());
83  134 sqinfo.features = Optional.of(feats);
84  134 var pdbEntries = seq.getAllPDBEntries();
85  134 sqinfo.pdbId = Optional.of(pdbEntries != null ? pdbEntries : new ArrayList<>());
86    }
87  493 if (seq.hasHMMProfile())
88    {
89  0 sqinfo.hmm = Optional.of(seq.getHMM());
90    }
91  493 sqinfo.searchScores = Optional.ofNullable(seq.getAnnotation("Search Scores"));
92  493 return sqinfo;
93    }
94   
95    /**
96    * Filter the sequence through the mask leaving only characters at positions
97    * where the mask value was true. The length of the resulting array is
98    * the cardinality of the mask from 0 to sequence length.
99    *
100    * @param sequence
101    * input sequence
102    * @param mask
103    * mask used to filter the sequence characters
104    * @return input array filtered through the mask
105    */
 
106  43 toggle public static char[] filterSequence(char[] sequence, BitSet mask)
107    {
108  43 mask = mask.get(0, sequence.length);
109  43 char[] result = new char[mask.cardinality()];
110  493 for (int i = mask.nextSetBit(0), j = 0; i >= 0;)
111    {
112  450 result[j++] = sequence[i];
113  450 if (i == Integer.MAX_VALUE)
114    // prevents integer overflow of (i + 1)
115  0 break;
116  450 i = mask.nextSetBit(i + 1);
117    }
118  43 return result;
119    }
120   
121    /**
122    * Recover essential properties of a sequence from a hashtable TODO: replace
123    * these methods with something more elegant.
124    *
125    * @param sq
126    * SequenceI
127    * @param sqinfo
128    * Hashtable
129    * @return boolean true if name was not updated from sqinfo Name entry
130    */
 
131  325 toggle public static boolean SeqCharacterUnhash(SequenceI sq, SequenceInfo sqinfo)
132    {
133  325 if (sqinfo == null)
134    {
135  0 return false;
136    }
137  325 if (sqinfo.name != null)
138    {
139  325 sq.setName(sqinfo.name);
140    }
141  325 sq.setStart(sqinfo.start);
142  325 sq.setEnd(sqinfo.end);
143  325 if (sqinfo.pdbId.isPresent() && !sqinfo.pdbId.get().isEmpty())
144  0 sq.setPDBId(new Vector<>(sqinfo.pdbId.get()));
145  325 if (sqinfo.features.isPresent() && !sqinfo.features.get().isEmpty())
146  0 sq.setSequenceFeatures(sqinfo.features.get());
147  325 if (sqinfo.description.isPresent())
148  4 sq.setDescription(sqinfo.description.get());
149  325 if (sqinfo.dataset.isPresent())
150    {
151  197 if (sqinfo.features.isPresent())
152    {
153  0 Console.warn("Setting dataset sequence for a sequence which has " +
154    "sequence features. Dataset sequence features will not be visible.");
155    assert false;
156    }
157  197 sq.setDatasetSequence(sqinfo.dataset.get());
158    }
159  325 if (sqinfo.hmm.isPresent())
160  0 sq.setHMM(new HiddenMarkovModel(sqinfo.hmm.get(), sq));
161  325 if (sqinfo.searchScores.isPresent())
162    {
163  0 for (AlignmentAnnotation score : sqinfo.searchScores.get())
164    {
165  0 sq.addAlignmentAnnotation(score);
166    }
167    }
168  325 return sqinfo.name != null;
169    }
170   
171    /**
172    * Form of the unique name used in uniquify for the i'th sequence in an
173    * ordered vector of sequences.
174    *
175    * @param i
176    * int
177    * @return String
178    */
 
179  252 toggle public static String unique_name(int i)
180    {
181  252 return String.format("Sequence%d", i);
182    }
183   
184    /**
185    * Generates a hash of SeqCharacterHash properties for each sequence in a
186    * sequence set, and optionally renames the sequences to an unambiguous 'safe'
187    * name.
188    *
189    * @param sequences
190    * SequenceI[]
191    * @param write_names
192    * boolean set this to rename each of the sequences to its
193    * unique_name(index) name
194    * @return Hashtable to be passed to
195    * @see deuniquify to recover original names (and properties) for renamed
196    * sequences
197    */
 
198  14 toggle public static Map<String, SequenceInfo> uniquify(SequenceI[] sequences,
199    boolean write_names)
200    {
201    // Generate a safely named sequence set and a hash to recover the sequence
202    // names
203  14 HashMap<String, SequenceInfo> map = new HashMap<>();
204    // String[] un_names = new String[sequences.length];
205   
206  142 for (int i = 0; i < sequences.length; i++)
207    {
208  128 String safename = unique_name(i);
209  128 map.put(safename, SeqCharacterHash(sequences[i]));
210   
211  128 if (write_names)
212    {
213  128 sequences[i].setName(safename);
214    }
215    }
216   
217  14 return map;
218    }
219   
220    /**
221    * recover unsafe sequence names and original properties for a sequence set
222    * using a map generated by
223    *
224    * @see uniquify(sequences,true)
225    * @param map
226    * Hashtable
227    * @param sequences
228    * SequenceI[]
229    * @return boolean
230    */
 
231  9 toggle public static boolean deuniquify(Map<String, SequenceInfo> map,
232    SequenceI[] sequences)
233    {
234  9 return deuniquify(map, sequences, true);
235    }
236   
237    /**
238    * recover unsafe sequence names and original properties for a sequence set
239    * using a map generated by
240    *
241    * @see uniquify(sequences,true)
242    * @param map
243    * Hashtable
244    * @param sequences
245    * SequenceI[]
246    * @param quiet
247    * when false, don't complain about sequences without any data in the
248    * map.
249    * @return boolean
250    */
 
251  9 toggle public static boolean deuniquify(Map<String, SequenceInfo> map,
252    SequenceI[] sequences, boolean quiet)
253    {
254  9 jalview.analysis.SequenceIdMatcher matcher = new SequenceIdMatcher(
255    sequences);
256  9 SequenceI msq = null;
257  9 Iterator<String> keys = map.keySet().iterator();
258  9 Vector<SequenceI> unmatched = new Vector<>();
259  35 for (int i = 0, j = sequences.length; i < j; i++)
260    {
261  26 unmatched.addElement(sequences[i]);
262    }
263  35 while (keys.hasNext())
264    {
265  26 String key = keys.next();
266  26 try {
267  ? if ((msq = matcher.findIdMatch((String) key)) != null)
268    {
269  26 SequenceInfo sqinfo = map.get(key);
270  26 unmatched.removeElement(msq);
271  26 SeqCharacterUnhash(msq, sqinfo);
272    }
273    else
274    {
275  0 if (!quiet)
276    {
277  0 Console.warn(format("Can't find '%s' in uniquified alignment",
278    key));
279    }
280    }
281    } catch (ClassCastException ccastex) {
282  0 if (!quiet)
283    {
284  0 Console.error("Unexpected object in SeqSet map : "+ key.getClass());
285    }
286    }
287    }
288  9 if (unmatched.size() > 0 && !quiet)
289    {
290  0 StringBuilder sb = new StringBuilder("Did not find match for sequences: ");
291  0 Enumeration<SequenceI> i = unmatched.elements();
292  0 sb.append(i.nextElement().getName());
293  0 for (; i.hasMoreElements();)
294    {
295  0 sb.append(", " + i.nextElement().getName());
296    }
297  0 Console.warn(sb.toString());
298  0 return false;
299    }
300   
301  9 return true;
302    }
303   
304    /**
305    * returns a subset of the sequenceI seuqences, including only those that
306    * contain at least one residue.
307    *
308    * @param sequences
309    * SequenceI[]
310    * @return SequenceI[]
311    */
 
312  10 toggle public static SequenceI[] getNonEmptySequenceSet(SequenceI[] sequences)
313    {
314    // Identify first row of alignment with residues for prediction
315  10 boolean ungapped[] = new boolean[sequences.length];
316  10 int msflen = 0;
317  37 for (int i = 0, j = sequences.length; i < j; i++)
318    {
319  27 String tempseq = jalview.analysis.AlignSeq.extractGaps(
320    jalview.util.Comparison.GapChars,
321    sequences[i].getSequenceAsString());
322   
323  27 if (tempseq.length() == 0)
324    {
325  1 ungapped[i] = false;
326    }
327    else
328    {
329  26 ungapped[i] = true;
330  26 msflen++;
331    }
332    }
333  10 if (msflen == 0)
334    {
335  0 return null; // no minimal set
336    }
337    // compose minimal set
338  10 SequenceI[] mset = new SequenceI[msflen];
339  37 for (int i = 0, j = sequences.length, k = 0; i < j; i++)
340    {
341  27 if (ungapped[i])
342    {
343  26 mset[k++] = sequences[i];
344    }
345    }
346  10 ungapped = null;
347  10 return mset;
348    }
349    }