Class | Line # | Actions | ||||
---|---|---|---|---|---|---|
SequenceI | 40 | 0 | 0 | 0 |
1 | /* | |
2 | * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) | |
3 | * Copyright (C) $$Year-Rel$$ The Jalview Authors | |
4 | * | |
5 | * This file is part of Jalview. | |
6 | * | |
7 | * Jalview is free software: you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation, either version 3 | |
10 | * of the License, or (at your option) any later version. | |
11 | * | |
12 | * Jalview is distributed in the hope that it will be useful, but | |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty | |
14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR | |
15 | * PURPOSE. See the GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with Jalview. If not, see <http://www.gnu.org/licenses/>. | |
19 | * The Jalview Authors are detailed in the 'AUTHORS' file. | |
20 | */ | |
21 | package jalview.datamodel; | |
22 | ||
23 | import jalview.datamodel.features.SequenceFeaturesI; | |
24 | import jalview.util.MapList; | |
25 | ||
26 | import java.util.BitSet; | |
27 | import java.util.Iterator; | |
28 | import java.util.List; | |
29 | import java.util.Vector; | |
30 | ||
31 | import fr.orsay.lri.varna.models.rna.RNA; | |
32 | ||
33 | /** | |
34 | * Methods for manipulating a sequence, its metadata and related annotation in | |
35 | * an alignment or dataset. | |
36 | * | |
37 | * @author $author$ | |
38 | * @version $Revision$ | |
39 | */ | |
40 | public interface SequenceI extends ASequenceI | |
41 | { | |
42 | /** | |
43 | * Set the display name for the sequence | |
44 | * | |
45 | * @param name | |
46 | */ | |
47 | public void setName(String name); | |
48 | ||
49 | /** | |
50 | * Get the display name | |
51 | */ | |
52 | public String getName(); | |
53 | ||
54 | /** | |
55 | * Set start position of first non-gapped symbol in sequence | |
56 | * | |
57 | * @param start | |
58 | * new start position | |
59 | */ | |
60 | public void setStart(int start); | |
61 | ||
62 | /** | |
63 | * get start position of first non-gapped residue in sequence | |
64 | * | |
65 | * @return | |
66 | */ | |
67 | public int getStart(); | |
68 | ||
69 | /** | |
70 | * get the displayed id of the sequence | |
71 | * | |
72 | * @return true means the id will be returned in the form | |
73 | * DisplayName/Start-End | |
74 | */ | |
75 | public String getDisplayId(boolean jvsuffix); | |
76 | ||
77 | /** | |
78 | * set end position for last residue in sequence | |
79 | * | |
80 | * @param end | |
81 | */ | |
82 | public void setEnd(int end); | |
83 | ||
84 | /** | |
85 | * get end position for last residue in sequence getEnd()>getStart() unless | |
86 | * sequence only consists of gap characters | |
87 | * | |
88 | * @return | |
89 | */ | |
90 | public int getEnd(); | |
91 | ||
92 | /** | |
93 | * @return length of sequence including gaps | |
94 | * | |
95 | */ | |
96 | public int getLength(); | |
97 | ||
98 | /** | |
99 | * Replace the sequence with the given string | |
100 | * | |
101 | * @param sequence | |
102 | * new sequence string | |
103 | */ | |
104 | public void setSequence(String sequence); | |
105 | ||
106 | /** | |
107 | * @return sequence as string | |
108 | */ | |
109 | public String getSequenceAsString(); | |
110 | ||
111 | /** | |
112 | * get a range on the sequence as a string | |
113 | * | |
114 | * @param start | |
115 | * position relative to start of sequence including gaps (from 0) | |
116 | * @param end | |
117 | * position relative to start of sequence including gaps (from 0) | |
118 | * | |
119 | * @return String containing all gap and symbols in specified range | |
120 | */ | |
121 | public String getSequenceAsString(int start, int end); | |
122 | ||
123 | /** | |
124 | * Answers a copy of the sequence as a character array | |
125 | * | |
126 | * @return | |
127 | */ | |
128 | public char[] getSequence(); | |
129 | ||
130 | /** | |
131 | * get stretch of sequence characters in an array | |
132 | * | |
133 | * @param start | |
134 | * absolute index into getSequence() | |
135 | * @param end | |
136 | * exclusive index of last position in segment to be returned. | |
137 | * | |
138 | * @return char[max(0,end-start)]; | |
139 | */ | |
140 | public char[] getSequence(int start, int end); | |
141 | ||
142 | /** | |
143 | * create a new sequence object with a subsequence of this one but sharing the | |
144 | * same dataset sequence | |
145 | * | |
146 | * @param start | |
147 | * int index for start position (base 0, inclusive) | |
148 | * @param end | |
149 | * int index for end position (base 0, exclusive) | |
150 | * | |
151 | * @return SequenceI | |
152 | * @note implementations may use getSequence to get the sequence data | |
153 | */ | |
154 | public SequenceI getSubSequence(int start, int end); | |
155 | ||
156 | /** | |
157 | * get the i'th character in this sequence's local reference frame (ie from | |
158 | * 0-number of characters lying from start-end) | |
159 | * | |
160 | * @param i | |
161 | * index | |
162 | * @return character or ' ' | |
163 | */ | |
164 | public char getCharAt(int i); | |
165 | ||
166 | /** | |
167 | * DOCUMENT ME! | |
168 | * | |
169 | * @param desc | |
170 | * DOCUMENT ME! | |
171 | */ | |
172 | public void setDescription(String desc); | |
173 | ||
174 | /** | |
175 | * DOCUMENT ME! | |
176 | * | |
177 | * @return DOCUMENT ME! | |
178 | */ | |
179 | public String getDescription(); | |
180 | ||
181 | /** | |
182 | * Return the alignment column (from 1..) for a sequence position | |
183 | * | |
184 | * @param pos | |
185 | * lying from start to end | |
186 | * | |
187 | * @return aligned column for residue (0 if residue is upstream from | |
188 | * alignment, -1 if residue is downstream from alignment) note. | |
189 | * Sequence object returns sequence.getEnd() for positions upstream | |
190 | * currently. TODO: change sequence for | |
191 | * assert(findIndex(seq.getEnd()+1)==-1) and fix incremental bugs | |
192 | * | |
193 | */ | |
194 | public int findIndex(int pos); | |
195 | ||
196 | /** | |
197 | * Returns the sequence position for an alignment (column) position. If at a | |
198 | * gap, returns the position of the next residue to the right. If beyond the | |
199 | * end of the sequence, returns 1 more than the last residue position. | |
200 | * | |
201 | * @param i | |
202 | * column index in alignment (from 0..<length) | |
203 | * | |
204 | * @return | |
205 | */ | |
206 | public int findPosition(int i); | |
207 | ||
208 | /** | |
209 | * Returns the from-to sequence positions (start..) for the given column | |
210 | * positions (1..), or null if no residues are included in the range | |
211 | * | |
212 | * @param fromColum | |
213 | * @param toColumn | |
214 | * @return | |
215 | */ | |
216 | public Range findPositions(int fromColum, int toColumn); | |
217 | ||
218 | /** | |
219 | * Returns an int array where indices correspond to each residue in the | |
220 | * sequence and the element value gives its position in the alignment | |
221 | * | |
222 | * @return int[SequenceI.getEnd()-SequenceI.getStart()+1] or null if no | |
223 | * residues in SequenceI object | |
224 | */ | |
225 | public int[] gapMap(); | |
226 | ||
227 | /** | |
228 | * Build a bitset corresponding to sequence gaps | |
229 | * | |
230 | * @return a BitSet where set values correspond to gaps in the sequence | |
231 | */ | |
232 | public BitSet gapBitset(); | |
233 | ||
234 | /** | |
235 | * Returns an int array where indices correspond to each position in sequence | |
236 | * char array and the element value gives the result of findPosition for that | |
237 | * index in the sequence. | |
238 | * | |
239 | * @return int[SequenceI.getLength()] | |
240 | */ | |
241 | public int[] findPositionMap(); | |
242 | ||
243 | /** | |
244 | * Answers true if the sequence is composed of amino acid characters. Note | |
245 | * that implementations may use heuristic methods which are not guaranteed to | |
246 | * give the biologically 'right' answer. | |
247 | * | |
248 | * @return | |
249 | */ | |
250 | public boolean isProtein(); | |
251 | ||
252 | /** | |
253 | * Delete a range of aligned sequence columns, creating a new dataset sequence | |
254 | * if necessary and adjusting start and end positions accordingly. | |
255 | * | |
256 | * @param i | |
257 | * first column in range to delete (inclusive) | |
258 | * @param j | |
259 | * last column in range to delete (exclusive) | |
260 | */ | |
261 | public void deleteChars(int i, int j); | |
262 | ||
263 | /** | |
264 | * DOCUMENT ME! | |
265 | * | |
266 | * @param i | |
267 | * alignment column number | |
268 | * @param c | |
269 | * character to insert | |
270 | */ | |
271 | public void insertCharAt(int i, char c); | |
272 | ||
273 | /** | |
274 | * insert given character at alignment column position | |
275 | * | |
276 | * @param position | |
277 | * alignment column number | |
278 | * @param count | |
279 | * length of insert | |
280 | * @param ch | |
281 | * character to insert | |
282 | */ | |
283 | public void insertCharAt(int position, int count, char ch); | |
284 | ||
285 | /** | |
286 | * Answers a list of all sequence features associated with this sequence. The | |
287 | * list may be held by the sequence's dataset sequence if that is defined. | |
288 | * | |
289 | * @return | |
290 | */ | |
291 | public List<SequenceFeature> getSequenceFeatures(); | |
292 | ||
293 | /** | |
294 | * Answers the object holding features for the sequence | |
295 | * | |
296 | * @return | |
297 | */ | |
298 | SequenceFeaturesI getFeatures(); | |
299 | ||
300 | /** | |
301 | * Replaces the sequence features associated with this sequence with the given | |
302 | * features. If this sequence has a dataset sequence, then this method will | |
303 | * update the dataset sequence's features instead. | |
304 | * | |
305 | * @param features | |
306 | */ | |
307 | public void setSequenceFeatures(List<SequenceFeature> features); | |
308 | ||
309 | /** | |
310 | * DOCUMENT ME! | |
311 | * | |
312 | * @param id | |
313 | * DOCUMENT ME! | |
314 | */ | |
315 | public void setPDBId(Vector<PDBEntry> ids); | |
316 | ||
317 | /** | |
318 | * Returns a list | |
319 | * | |
320 | * @return DOCUMENT ME! | |
321 | */ | |
322 | public Vector<PDBEntry> getAllPDBEntries(); | |
323 | ||
324 | /** | |
325 | * Adds the entry to the *normalised* list of PDBIds. | |
326 | * | |
327 | * If a PDBEntry is passed with the same entry.getID() string as one already | |
328 | * in the list, or one is added that appears to be the same but has a chain ID | |
329 | * appended, then the existing PDBEntry will be updated with the new | |
330 | * attributes instead, unless the entries have distinct chain codes or | |
331 | * associated structure files. | |
332 | * | |
333 | * @param entry | |
334 | * @return true if the entry was added, false if updated | |
335 | */ | |
336 | public boolean addPDBId(PDBEntry entry); | |
337 | ||
338 | /** | |
339 | * update the list of PDBEntrys to include any DBRefEntrys citing structural | |
340 | * databases | |
341 | * | |
342 | * @return true if PDBEntry list was modified | |
343 | */ | |
344 | public boolean updatePDBIds(); | |
345 | ||
346 | public String getVamsasId(); | |
347 | ||
348 | public void setVamsasId(String id); | |
349 | ||
350 | /** | |
351 | * set the array of Database references for the sequence. | |
352 | * | |
353 | * @param dbs | |
354 | * @deprecated - use is discouraged since side-effects may occur if DBRefEntry | |
355 | * set are not normalised. | |
356 | */ | |
357 | @Deprecated | |
358 | public void setDBRefs(DBRefEntry[] dbs); | |
359 | ||
360 | public DBRefEntry[] getDBRefs(); | |
361 | ||
362 | /** | |
363 | * add the given entry to the list of DBRefs for this sequence, or replace a | |
364 | * similar one if entry contains a map object and the existing one doesnt. | |
365 | * | |
366 | * @param entry | |
367 | */ | |
368 | public void addDBRef(DBRefEntry entry); | |
369 | ||
370 | /** | |
371 | * Adds the given sequence feature and returns true, or returns false if it is | |
372 | * already present on the sequence, or if the feature type is null. | |
373 | * | |
374 | * @param sf | |
375 | * @return | |
376 | */ | |
377 | public boolean addSequenceFeature(SequenceFeature sf); | |
378 | ||
379 | public void deleteFeature(SequenceFeature sf); | |
380 | ||
381 | public void setDatasetSequence(SequenceI seq); | |
382 | ||
383 | public SequenceI getDatasetSequence(); | |
384 | ||
385 | /** | |
386 | * Returns a new array containing this sequence's annotations, or null. | |
387 | */ | |
388 | public AlignmentAnnotation[] getAnnotation(); | |
389 | ||
390 | /** | |
391 | * Returns true if this sequence has the given annotation (by object | |
392 | * identity). | |
393 | */ | |
394 | public boolean hasAnnotation(AlignmentAnnotation ann); | |
395 | ||
396 | /** | |
397 | * Add the given annotation, if not already added, and set its sequence ref to | |
398 | * be this sequence. Does nothing if this sequence's annotations already | |
399 | * include this annotation (by identical object reference). | |
400 | */ | |
401 | public void addAlignmentAnnotation(AlignmentAnnotation annotation); | |
402 | ||
403 | public void removeAlignmentAnnotation(AlignmentAnnotation annotation); | |
404 | ||
405 | /** | |
406 | * Derive a sequence (using this one's dataset or as the dataset) | |
407 | * | |
408 | * @return duplicate sequence with valid dataset sequence | |
409 | */ | |
410 | public SequenceI deriveSequence(); | |
411 | ||
412 | /** | |
413 | * set the array of associated AlignmentAnnotation for this sequenceI | |
414 | * | |
415 | * @param revealed | |
416 | */ | |
417 | public void setAlignmentAnnotation(AlignmentAnnotation[] annotation); | |
418 | ||
419 | /** | |
420 | * Get one or more alignment annotations with a particular label. | |
421 | * | |
422 | * @param label | |
423 | * string which each returned annotation must have as a label. | |
424 | * @return null or array of annotations. | |
425 | */ | |
426 | public AlignmentAnnotation[] getAnnotation(String label); | |
427 | ||
428 | /** | |
429 | * Returns a (possibly empty) list of any annotations that match on given | |
430 | * calcId (source) and label (type). Null values do not match. | |
431 | * | |
432 | * @param calcId | |
433 | * @param label | |
434 | */ | |
435 | public List<AlignmentAnnotation> getAlignmentAnnotations(String calcId, | |
436 | String label); | |
437 | ||
438 | /** | |
439 | * create a new dataset sequence (if necessary) for this sequence and sets | |
440 | * this sequence to refer to it. This call will move any features or | |
441 | * references on the sequence onto the dataset. It will also make a duplicate | |
442 | * of existing annotation rows for the dataset sequence, rather than relocate | |
443 | * them in order to preserve external references (since 2.8.2). | |
444 | * | |
445 | * @return dataset sequence for this sequence | |
446 | */ | |
447 | public SequenceI createDatasetSequence(); | |
448 | ||
449 | /** | |
450 | * Transfer any database references or annotation from entry under a sequence | |
451 | * mapping. <br/> | |
452 | * <strong>Note: DOES NOT transfer sequence associated alignment annotation | |
453 | * </strong><br/> | |
454 | * | |
455 | * @param entry | |
456 | * @param mp | |
457 | * null or mapping from entry's numbering to local start/end | |
458 | */ | |
459 | public void transferAnnotation(SequenceI entry, Mapping mp); | |
460 | ||
461 | /** | |
462 | * @return The RNA of the sequence in the alignment | |
463 | */ | |
464 | ||
465 | public RNA getRNA(); | |
466 | ||
467 | /** | |
468 | * @param rna | |
469 | * The RNA. | |
470 | */ | |
471 | public void setRNA(RNA rna); | |
472 | ||
473 | /** | |
474 | * | |
475 | * @return list of insertions (gap characters) in sequence | |
476 | */ | |
477 | public List<int[]> getInsertions(); | |
478 | ||
479 | /** | |
480 | * Given a pdbId String, return the equivalent PDBEntry if available in the | |
481 | * given sequence | |
482 | * | |
483 | * @param pdbId | |
484 | * @return | |
485 | */ | |
486 | public PDBEntry getPDBEntry(String pdbId); | |
487 | ||
488 | /** | |
489 | * Get all primary database/accessions for this sequence's data. These | |
490 | * DBRefEntry are expected to resolve to a valid record in the associated | |
491 | * external database, either directly or via a provided 1:1 Mapping. | |
492 | * | |
493 | * @return just the primary references (if any) for this sequence, or an empty | |
494 | * list | |
495 | */ | |
496 | public List<DBRefEntry> getPrimaryDBRefs(); | |
497 | ||
498 | /** | |
499 | * Returns a (possibly empty) list of sequence features that overlap the given | |
500 | * alignment column range, optionally restricted to one or more specified | |
501 | * feature types. If the range is all gaps, then features which enclose it are | |
502 | * included (but not contact features). | |
503 | * | |
504 | * @param fromCol | |
505 | * start column of range inclusive (1..) | |
506 | * @param toCol | |
507 | * end column of range inclusive (1..) | |
508 | * @param types | |
509 | * optional feature types to restrict results to | |
510 | * @return | |
511 | */ | |
512 | List<SequenceFeature> findFeatures(int fromCol, int toCol, String... types); | |
513 | ||
514 | /** | |
515 | * Method to call to indicate that the sequence (characters or alignment/gaps) | |
516 | * has been modified. Provided to allow any cursors on residue/column | |
517 | * positions to be invalidated. | |
518 | */ | |
519 | void sequenceChanged(); | |
520 | ||
521 | /** | |
522 | * | |
523 | * @return BitSet corresponding to index [0,length) where Comparison.isGap() | |
524 | * returns true. | |
525 | */ | |
526 | BitSet getInsertionsAsBits(); | |
527 | ||
528 | /** | |
529 | * Replaces every occurrence of c1 in the sequence with c2 and returns the | |
530 | * number of characters changed | |
531 | * | |
532 | * @param c1 | |
533 | * @param c2 | |
534 | */ | |
535 | public int replace(char c1, char c2); | |
536 | ||
537 | /** | |
538 | * Answers the GeneLociI, or null if not known | |
539 | * | |
540 | * @return | |
541 | */ | |
542 | GeneLociI getGeneLoci(); | |
543 | ||
544 | /** | |
545 | * Sets the mapping to gene loci for the sequence | |
546 | * | |
547 | * @param speciesId | |
548 | * @param assemblyId | |
549 | * @param chromosomeId | |
550 | * @param map | |
551 | */ | |
552 | void setGeneLoci(String speciesId, String assemblyId, | |
553 | String chromosomeId, MapList map); | |
554 | ||
555 | ||
556 | /** | |
557 | * Returns the sequence string constructed from the substrings of a sequence | |
558 | * defined by the int[] ranges provided by an iterator. E.g. the iterator | |
559 | * could iterate over all visible regions of the alignment | |
560 | * | |
561 | * @param it | |
562 | * the iterator to use | |
563 | * @return a String corresponding to the sequence | |
564 | */ | |
565 | public String getSequenceStringFromIterator(Iterator<int[]> it); | |
566 | ||
567 | /** | |
568 | * Locate the first position in this sequence which is not contained in an | |
569 | * iterator region. If no such position exists, return 0 | |
570 | * | |
571 | * @param it | |
572 | * iterator over regions | |
573 | * @return first residue not contained in regions | |
574 | */ | |
575 | public int firstResidueOutsideIterator(Iterator<int[]> it); | |
576 | } |