| Class | Line # | Actions | |||
|---|---|---|---|---|---|
| SequenceI | 41 | 0 | 0 |
| 1 | /* | |
| 2 | * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) | |
| 3 | * Copyright (C) $$Year-Rel$$ The Jalview Authors | |
| 4 | * | |
| 5 | * This file is part of Jalview. | |
| 6 | * | |
| 7 | * Jalview is free software: you can redistribute it and/or | |
| 8 | * modify it under the terms of the GNU General Public License | |
| 9 | * as published by the Free Software Foundation, either version 3 | |
| 10 | * of the License, or (at your option) any later version. | |
| 11 | * | |
| 12 | * Jalview is distributed in the hope that it will be useful, but | |
| 13 | * WITHOUT ANY WARRANTY; without even the implied warranty | |
| 14 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR | |
| 15 | * PURPOSE. See the GNU General Public License for more details. | |
| 16 | * | |
| 17 | * You should have received a copy of the GNU General Public License | |
| 18 | * along with Jalview. If not, see <http://www.gnu.org/licenses/>. | |
| 19 | * The Jalview Authors are detailed in the 'AUTHORS' file. | |
| 20 | */ | |
| 21 | package jalview.datamodel; | |
| 22 | ||
| 23 | import java.util.BitSet; | |
| 24 | import java.util.Iterator; | |
| 25 | import java.util.List; | |
| 26 | import java.util.Vector; | |
| 27 | ||
| 28 | import fr.orsay.lri.varna.models.rna.RNA; | |
| 29 | import jalview.datamodel.Sequence.DBModList; | |
| 30 | import jalview.datamodel.features.SequenceFeaturesI; | |
| 31 | import jalview.util.MapList; | |
| 32 | import jalview.ws.params.InvalidArgumentException; | |
| 33 | ||
| 34 | /** | |
| 35 | * Methods for manipulating a sequence, its metadata and related annotation in | |
| 36 | * an alignment or dataset. | |
| 37 | * | |
| 38 | * @author $author$ | |
| 39 | * @version $Revision$ | |
| 40 | */ | |
| 41 | public interface SequenceI extends ASequenceI, ContactMapHolderI | |
| 42 | { | |
| 43 | /** | |
| 44 | * Set the display name for the sequence | |
| 45 | * | |
| 46 | * @param name | |
| 47 | */ | |
| 48 | public void setName(String name); | |
| 49 | ||
| 50 | /** | |
| 51 | * Get the display name | |
| 52 | */ | |
| 53 | public String getName(); | |
| 54 | ||
| 55 | /** | |
| 56 | * Set start position of first non-gapped symbol in sequence | |
| 57 | * | |
| 58 | * @param start | |
| 59 | * new start position | |
| 60 | */ | |
| 61 | public void setStart(int start); | |
| 62 | ||
| 63 | /** | |
| 64 | * get start position of first non-gapped residue in sequence | |
| 65 | * | |
| 66 | * @return | |
| 67 | */ | |
| 68 | public int getStart(); | |
| 69 | ||
| 70 | /** | |
| 71 | * get the displayed id of the sequence | |
| 72 | * | |
| 73 | * @return true means the id will be returned in the form | |
| 74 | * DisplayName/Start-End | |
| 75 | */ | |
| 76 | public String getDisplayId(boolean jvsuffix); | |
| 77 | ||
| 78 | /** | |
| 79 | * set end position for last residue in sequence | |
| 80 | * | |
| 81 | * @param end | |
| 82 | */ | |
| 83 | public void setEnd(int end); | |
| 84 | ||
| 85 | /** | |
| 86 | * get end position for last residue in sequence getEnd()>getStart() unless | |
| 87 | * sequence only consists of gap characters | |
| 88 | * | |
| 89 | * @return | |
| 90 | */ | |
| 91 | public int getEnd(); | |
| 92 | ||
| 93 | /** | |
| 94 | * @return length of sequence including gaps | |
| 95 | * | |
| 96 | */ | |
| 97 | public int getLength(); | |
| 98 | ||
| 99 | /** | |
| 100 | * Replace the sequence with the given string | |
| 101 | * | |
| 102 | * @param sequence | |
| 103 | * new sequence string | |
| 104 | */ | |
| 105 | public void setSequence(String sequence); | |
| 106 | ||
| 107 | /** | |
| 108 | * @return sequence as string | |
| 109 | */ | |
| 110 | public String getSequenceAsString(); | |
| 111 | ||
| 112 | /** | |
| 113 | * get a range on the sequence as a string | |
| 114 | * | |
| 115 | * @param start | |
| 116 | * (inclusive) position relative to start of sequence including gaps | |
| 117 | * (from 0) | |
| 118 | * @param end | |
| 119 | * (exclusive) position relative to start of sequence including gaps | |
| 120 | * (from 0) | |
| 121 | * | |
| 122 | * @return String containing all gap and symbols in specified range | |
| 123 | */ | |
| 124 | public String getSequenceAsString(int start, int end); | |
| 125 | ||
| 126 | /** | |
| 127 | * Answers a copy of the sequence as a character array | |
| 128 | * | |
| 129 | * @return | |
| 130 | */ | |
| 131 | public char[] getSequence(); | |
| 132 | ||
| 133 | /** | |
| 134 | * get stretch of sequence characters in an array | |
| 135 | * | |
| 136 | * @param start | |
| 137 | * absolute index into getSequence() | |
| 138 | * @param end | |
| 139 | * exclusive index of last position in segment to be returned. | |
| 140 | * | |
| 141 | * @return char[max(0,end-start)]; | |
| 142 | */ | |
| 143 | public char[] getSequence(int start, int end); | |
| 144 | ||
| 145 | /** | |
| 146 | * create a new sequence object with a subsequence of this one but sharing the | |
| 147 | * same dataset sequence | |
| 148 | * | |
| 149 | * @param start | |
| 150 | * int index for start position (base 0, inclusive) | |
| 151 | * @param end | |
| 152 | * int index for end position (base 0, exclusive) | |
| 153 | * | |
| 154 | * @return SequenceI | |
| 155 | * @note implementations may use getSequence to get the sequence data | |
| 156 | */ | |
| 157 | public SequenceI getSubSequence(int start, int end); | |
| 158 | ||
| 159 | /** | |
| 160 | * get the i'th character in this sequence's local reference frame (ie from | |
| 161 | * 0-number of characters lying from start-end) | |
| 162 | * | |
| 163 | * @param i | |
| 164 | * index | |
| 165 | * @return character or ' ' | |
| 166 | */ | |
| 167 | public char getCharAt(int i); | |
| 168 | ||
| 169 | /** | |
| 170 | * DOCUMENT ME! | |
| 171 | * | |
| 172 | * @param desc | |
| 173 | * DOCUMENT ME! | |
| 174 | */ | |
| 175 | public void setDescription(String desc); | |
| 176 | ||
| 177 | /** | |
| 178 | * DOCUMENT ME! | |
| 179 | * | |
| 180 | * @return DOCUMENT ME! | |
| 181 | */ | |
| 182 | public String getDescription(); | |
| 183 | ||
| 184 | /** | |
| 185 | * Return the alignment column (from 1..) for a sequence position | |
| 186 | * | |
| 187 | * @param pos | |
| 188 | * lying from start to end | |
| 189 | * | |
| 190 | * @return aligned column for residue (0 if residue is upstream from | |
| 191 | * alignment, -1 if residue is downstream from alignment) note. | |
| 192 | * Sequence object returns sequence.getEnd() for positions upstream | |
| 193 | * currently. TODO: change sequence for | |
| 194 | * assert(findIndex(seq.getEnd()+1)==-1) and fix incremental bugs | |
| 195 | * | |
| 196 | */ | |
| 197 | public int findIndex(int pos); | |
| 198 | ||
| 199 | /** | |
| 200 | * Returns the sequence position for an alignment (column) position. If at a | |
| 201 | * gap, returns the position of the next residue to the right. If beyond the | |
| 202 | * end of the sequence, returns 1 more than the last residue position. | |
| 203 | * | |
| 204 | * @param i | |
| 205 | * column index in alignment (from 0..<length) | |
| 206 | * | |
| 207 | * @return | |
| 208 | */ | |
| 209 | public int findPosition(int i); | |
| 210 | ||
| 211 | /** | |
| 212 | * Returns the sequence positions for first and last residues lying within the | |
| 213 | * given column positions [fromColum,toColumn] (where columns are numbered | |
| 214 | * from 1), or null if no residues are included in the range | |
| 215 | * | |
| 216 | * @param fromColum | |
| 217 | * - first column base 1 | |
| 218 | * @param toColumn | |
| 219 | * - last column, base 1 | |
| 220 | * @return | |
| 221 | */ | |
| 222 | public ContiguousI findPositions(int fromColum, int toColumn); | |
| 223 | ||
| 224 | /** | |
| 225 | * Returns an int array where indices correspond to each residue in the | |
| 226 | * sequence and the element value gives its position in the alignment | |
| 227 | * | |
| 228 | * @return int[SequenceI.getEnd()-SequenceI.getStart()+1] or null if no | |
| 229 | * residues in SequenceI object | |
| 230 | */ | |
| 231 | public int[] gapMap(); | |
| 232 | ||
| 233 | /** | |
| 234 | * Build a bitset corresponding to sequence gaps | |
| 235 | * | |
| 236 | * @return a BitSet where set values correspond to gaps in the sequence | |
| 237 | */ | |
| 238 | public BitSet gapBitset(); | |
| 239 | ||
| 240 | /** | |
| 241 | * Returns an int array where indices correspond to each position in sequence | |
| 242 | * char array and the element value gives the result of findPosition for that | |
| 243 | * index in the sequence. | |
| 244 | * | |
| 245 | * @return int[SequenceI.getLength()] | |
| 246 | */ | |
| 247 | public int[] findPositionMap(); | |
| 248 | ||
| 249 | /** | |
| 250 | * Answers true if the sequence is composed of amino acid characters. Note | |
| 251 | * that implementations may use heuristic methods which are not guaranteed to | |
| 252 | * give the biologically 'right' answer. | |
| 253 | * | |
| 254 | * @return | |
| 255 | */ | |
| 256 | public boolean isProtein(); | |
| 257 | ||
| 258 | /** | |
| 259 | * Delete a range of aligned sequence columns, creating a new dataset sequence | |
| 260 | * if necessary and adjusting start and end positions accordingly. | |
| 261 | * | |
| 262 | * @param i | |
| 263 | * first column in range to delete (inclusive) | |
| 264 | * @param j | |
| 265 | * last column in range to delete (exclusive) | |
| 266 | */ | |
| 267 | public void deleteChars(int i, int j); | |
| 268 | ||
| 269 | /** | |
| 270 | * DOCUMENT ME! | |
| 271 | * | |
| 272 | * @param i | |
| 273 | * alignment column number | |
| 274 | * @param c | |
| 275 | * character to insert | |
| 276 | */ | |
| 277 | public void insertCharAt(int i, char c); | |
| 278 | ||
| 279 | /** | |
| 280 | * insert given character at alignment column position | |
| 281 | * | |
| 282 | * @param position | |
| 283 | * alignment column number | |
| 284 | * @param count | |
| 285 | * length of insert | |
| 286 | * @param ch | |
| 287 | * character to insert | |
| 288 | */ | |
| 289 | public void insertCharAt(int position, int count, char ch); | |
| 290 | ||
| 291 | /** | |
| 292 | * Answers a list of all sequence features associated with this sequence. The | |
| 293 | * list may be held by the sequence's dataset sequence if that is defined. | |
| 294 | * | |
| 295 | * @return | |
| 296 | */ | |
| 297 | public List<SequenceFeature> getSequenceFeatures(); | |
| 298 | ||
| 299 | /** | |
| 300 | * Answers the object holding features for the sequence | |
| 301 | * | |
| 302 | * @return | |
| 303 | */ | |
| 304 | SequenceFeaturesI getFeatures(); | |
| 305 | ||
| 306 | /** | |
| 307 | * Replaces the sequence features associated with this sequence with the given | |
| 308 | * features. If this sequence has a dataset sequence, then this method will | |
| 309 | * update the dataset sequence's features instead. | |
| 310 | * | |
| 311 | * @param features | |
| 312 | */ | |
| 313 | public void setSequenceFeatures(List<SequenceFeature> features); | |
| 314 | ||
| 315 | /** | |
| 316 | * DOCUMENT ME! | |
| 317 | * | |
| 318 | * @param id | |
| 319 | * DOCUMENT ME! | |
| 320 | */ | |
| 321 | public void setPDBId(Vector<PDBEntry> ids); | |
| 322 | ||
| 323 | /** | |
| 324 | * Returns a list | |
| 325 | * | |
| 326 | * @return DOCUMENT ME! | |
| 327 | */ | |
| 328 | public Vector<PDBEntry> getAllPDBEntries(); | |
| 329 | ||
| 330 | /** | |
| 331 | * Adds the entry to the *normalised* list of PDBIds. | |
| 332 | * | |
| 333 | * If a PDBEntry is passed with the same entry.getID() string as one already | |
| 334 | * in the list, or one is added that appears to be the same but has a chain ID | |
| 335 | * appended, then the existing PDBEntry will be updated with the new | |
| 336 | * attributes instead, unless the entries have distinct chain codes or | |
| 337 | * associated structure files. | |
| 338 | * | |
| 339 | * @param entry | |
| 340 | * @return true if the entry was added, false if updated | |
| 341 | */ | |
| 342 | public boolean addPDBId(PDBEntry entry); | |
| 343 | ||
| 344 | /** | |
| 345 | * update the list of PDBEntrys to include any DBRefEntrys citing structural | |
| 346 | * databases | |
| 347 | * | |
| 348 | * @return true if PDBEntry list was modified | |
| 349 | */ | |
| 350 | public boolean updatePDBIds(); | |
| 351 | ||
| 352 | public String getVamsasId(); | |
| 353 | ||
| 354 | public void setVamsasId(String id); | |
| 355 | ||
| 356 | /** | |
| 357 | * set the array of Database references for the sequence. | |
| 358 | * | |
| 359 | * BH 2019.02.04 changes param to DBModlist | |
| 360 | * | |
| 361 | * @param dbs | |
| 362 | * @deprecated - use is discouraged since side-effects may occur if DBRefEntry | |
| 363 | * set are not normalised. | |
| 364 | * @throws InvalidArgumentException | |
| 365 | * if the is not one created by Sequence itself | |
| 366 | */ | |
| 367 | @Deprecated | |
| 368 | public void setDBRefs(DBModList<DBRefEntry> dbs); | |
| 369 | ||
| 370 | public DBModList<DBRefEntry> getDBRefs(); | |
| 371 | ||
| 372 | /** | |
| 373 | * add the given entry to the list of DBRefs for this sequence, or replace a | |
| 374 | * similar one if entry contains a map object and the existing one doesnt. | |
| 375 | * | |
| 376 | * @param entry | |
| 377 | */ | |
| 378 | public void addDBRef(DBRefEntry entry); | |
| 379 | ||
| 380 | /** | |
| 381 | * Adds the given sequence feature and returns true, or returns false if it is | |
| 382 | * already present on the sequence, or if the feature type is null. | |
| 383 | * | |
| 384 | * @param sf | |
| 385 | * @return | |
| 386 | */ | |
| 387 | public boolean addSequenceFeature(SequenceFeature sf); | |
| 388 | ||
| 389 | public void deleteFeature(SequenceFeature sf); | |
| 390 | ||
| 391 | public void setDatasetSequence(SequenceI seq); | |
| 392 | ||
| 393 | public SequenceI getDatasetSequence(); | |
| 394 | ||
| 395 | /** | |
| 396 | * Returns a new array containing this sequence's annotations, or null. | |
| 397 | */ | |
| 398 | public AlignmentAnnotation[] getAnnotation(); | |
| 399 | ||
| 400 | /** | |
| 401 | * Returns true if this sequence has the given annotation (by object | |
| 402 | * identity). | |
| 403 | */ | |
| 404 | public boolean hasAnnotation(AlignmentAnnotation ann); | |
| 405 | ||
| 406 | /** | |
| 407 | * Add the given annotation, if not already added, and set its sequence ref to | |
| 408 | * be this sequence. Does nothing if this sequence's annotations already | |
| 409 | * include this annotation (by identical object reference). | |
| 410 | */ | |
| 411 | public void addAlignmentAnnotation(AlignmentAnnotation annotation); | |
| 412 | ||
| 413 | public void removeAlignmentAnnotation(AlignmentAnnotation annotation); | |
| 414 | ||
| 415 | /** | |
| 416 | * Derive a sequence (using this one's dataset or as the dataset) | |
| 417 | * | |
| 418 | * @return duplicate sequence and any annotation present with valid dataset | |
| 419 | * sequence | |
| 420 | */ | |
| 421 | public SequenceI deriveSequence(); | |
| 422 | ||
| 423 | /** | |
| 424 | * set the array of associated AlignmentAnnotation for this sequenceI | |
| 425 | * | |
| 426 | * @param revealed | |
| 427 | */ | |
| 428 | public void setAlignmentAnnotation(AlignmentAnnotation[] annotation); | |
| 429 | ||
| 430 | /** | |
| 431 | * Get one or more alignment annotations with a particular label. | |
| 432 | * | |
| 433 | * @param label | |
| 434 | * string which each returned annotation must have as a label. | |
| 435 | * @return null or array of annotations. | |
| 436 | */ | |
| 437 | public AlignmentAnnotation[] getAnnotation(String label); | |
| 438 | ||
| 439 | /** | |
| 440 | * Returns a (possibly empty) list of any annotations that match on given | |
| 441 | * calcId (source) and label (type). Null values do not match. | |
| 442 | * | |
| 443 | * @param calcId | |
| 444 | * @param label | |
| 445 | */ | |
| 446 | public List<AlignmentAnnotation> getAlignmentAnnotations(String calcId, | |
| 447 | String label); | |
| 448 | ||
| 449 | /** | |
| 450 | * Returns a (possibly empty) list of any annotations that match on given | |
| 451 | * calcId (source), label (type) and description (observation instance). Null | |
| 452 | * values do not match. | |
| 453 | * | |
| 454 | * @param calcId | |
| 455 | * @param label | |
| 456 | * @param description | |
| 457 | */ | |
| 458 | public List<AlignmentAnnotation> getAlignmentAnnotations(String calcId, | |
| 459 | String label, String description); | |
| 460 | ||
| 461 | /** | |
| 462 | * create a new dataset sequence (if necessary) for this sequence and sets | |
| 463 | * this sequence to refer to it. This call will move any features or | |
| 464 | * references on the sequence onto the dataset. It will also make a duplicate | |
| 465 | * of existing annotation rows for the dataset sequence, rather than relocate | |
| 466 | * them in order to preserve external references (since 2.8.2). | |
| 467 | * | |
| 468 | * @return dataset sequence for this sequence | |
| 469 | */ | |
| 470 | public SequenceI createDatasetSequence(); | |
| 471 | ||
| 472 | /** | |
| 473 | * Transfer any database references or annotation from entry under a sequence | |
| 474 | * mapping. <br/> | |
| 475 | * <strong>Note: DOES NOT transfer sequence associated alignment annotation | |
| 476 | * </strong><br/> | |
| 477 | * | |
| 478 | * @param entry | |
| 479 | * @param mp | |
| 480 | * null or mapping from entry's numbering to local start/end | |
| 481 | */ | |
| 482 | public void transferAnnotation(SequenceI entry, Mapping mp); | |
| 483 | ||
| 484 | /** | |
| 485 | * @return The RNA of the sequence in the alignment | |
| 486 | */ | |
| 487 | ||
| 488 | public RNA getRNA(); | |
| 489 | ||
| 490 | /** | |
| 491 | * @param rna | |
| 492 | * The RNA. | |
| 493 | */ | |
| 494 | public void setRNA(RNA rna); | |
| 495 | ||
| 496 | /** | |
| 497 | * | |
| 498 | * @return list of insertions (gap characters) in sequence | |
| 499 | */ | |
| 500 | public List<int[]> getInsertions(); | |
| 501 | ||
| 502 | /** | |
| 503 | * Given a pdbId String, return the equivalent PDBEntry if available in the | |
| 504 | * given sequence | |
| 505 | * | |
| 506 | * @param pdbId | |
| 507 | * @return | |
| 508 | */ | |
| 509 | public PDBEntry getPDBEntry(String pdbId); | |
| 510 | ||
| 511 | /** | |
| 512 | * Get all primary database/accessions for this sequence's data. These | |
| 513 | * DBRefEntry are expected to resolve to a valid record in the associated | |
| 514 | * external database, either directly or via a provided 1:1 Mapping. | |
| 515 | * | |
| 516 | * @return just the primary references (if any) for this sequence, or an empty | |
| 517 | * list | |
| 518 | */ | |
| 519 | public List<DBRefEntry> getPrimaryDBRefs(); | |
| 520 | ||
| 521 | /** | |
| 522 | * Returns a (possibly empty) list of sequence features that overlap the given | |
| 523 | * alignment column range, optionally restricted to one or more specified | |
| 524 | * feature types. If the range is all gaps, then features which enclose it are | |
| 525 | * included (but not contact features). | |
| 526 | * | |
| 527 | * @param fromCol | |
| 528 | * start column of range inclusive (1..) | |
| 529 | * @param toCol | |
| 530 | * end column of range inclusive (1..) | |
| 531 | * @param types | |
| 532 | * optional feature types to restrict results to | |
| 533 | * @return | |
| 534 | */ | |
| 535 | List<SequenceFeature> findFeatures(int fromCol, int toCol, | |
| 536 | String... types); | |
| 537 | ||
| 538 | /** | |
| 539 | * Method to call to indicate that the sequence (characters or alignment/gaps) | |
| 540 | * has been modified. Provided to allow any cursors on residue/column | |
| 541 | * positions to be invalidated. | |
| 542 | */ | |
| 543 | void sequenceChanged(); | |
| 544 | ||
| 545 | /** | |
| 546 | * | |
| 547 | * @return BitSet corresponding to index [0,length) where Comparison.isGap() | |
| 548 | * returns true. | |
| 549 | */ | |
| 550 | BitSet getInsertionsAsBits(); | |
| 551 | ||
| 552 | /** | |
| 553 | * Replaces every occurrence of c1 in the sequence with c2 and returns the | |
| 554 | * number of characters changed | |
| 555 | * | |
| 556 | * @param c1 | |
| 557 | * @param c2 | |
| 558 | */ | |
| 559 | public int replace(char c1, char c2); | |
| 560 | ||
| 561 | /** | |
| 562 | * Answers the GeneLociI, or null if not known | |
| 563 | * | |
| 564 | * @return | |
| 565 | */ | |
| 566 | GeneLociI getGeneLoci(); | |
| 567 | ||
| 568 | /** | |
| 569 | * Sets the mapping to gene loci for the sequence | |
| 570 | * | |
| 571 | * @param speciesId | |
| 572 | * @param assemblyId | |
| 573 | * @param chromosomeId | |
| 574 | * @param map | |
| 575 | */ | |
| 576 | void setGeneLoci(String speciesId, String assemblyId, String chromosomeId, | |
| 577 | MapList map); | |
| 578 | ||
| 579 | /** | |
| 580 | * Returns the sequence string constructed from the substrings of a sequence | |
| 581 | * defined by the int[] ranges provided by an iterator. E.g. the iterator | |
| 582 | * could iterate over all visible regions of the alignment | |
| 583 | * | |
| 584 | * @param it | |
| 585 | * the iterator to use | |
| 586 | * @return a String corresponding to the sequence | |
| 587 | */ | |
| 588 | public String getSequenceStringFromIterator(Iterator<int[]> it); | |
| 589 | ||
| 590 | /** | |
| 591 | * Locate the first position in this sequence which is not contained in an | |
| 592 | * iterator region. If no such position exists, return 0 | |
| 593 | * | |
| 594 | * @param it | |
| 595 | * iterator over regions | |
| 596 | * @return first residue not contained in regions | |
| 597 | */ | |
| 598 | public int firstResidueOutsideIterator(Iterator<int[]> it); | |
| 599 | ||
| 600 | public void addContactListFor(AlignmentAnnotation annotation, | |
| 601 | ContactMatrixI cm); | |
| 602 | ||
| 603 | } |