Clover icon

Coverage Report

  1. Project Clover database Thu Dec 4 2025 14:43:25 GMT
  2. Package jalview.analysis

File AlignmentGenerator.java

 

Coverage histogram

../../img/srcFileCovDistChart8.png
20% of files have more coverage

Code metrics

38
84
8
1
319
176
28
0.33
10.5
8
3.5

Classes

Class Line # Actions
AlignmentGenerator 47 84 28
0.730769273.1%
 

Contributing tests

This file is covered by 17 tests. .

Source view

1    /*
2    * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3    * Copyright (C) $$Year-Rel$$ The Jalview Authors
4    *
5    * This file is part of Jalview.
6    *
7    * Jalview is free software: you can redistribute it and/or
8    * modify it under the terms of the GNU General Public License
9    * as published by the Free Software Foundation, either version 3
10    * of the License, or (at your option) any later version.
11    *
12    * Jalview is distributed in the hope that it will be useful, but
13    * WITHOUT ANY WARRANTY; without even the implied warranty
14    * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15    * PURPOSE. See the GNU General Public License for more details.
16    *
17    * You should have received a copy of the GNU General Public License
18    * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19    * The Jalview Authors are detailed in the 'AUTHORS' file.
20    */
21    package jalview.analysis;
22   
23    import java.util.Locale;
24   
25    import java.io.File;
26    import java.io.FileNotFoundException;
27    import java.io.PrintStream;
28    import java.util.Arrays;
29    import java.util.Random;
30   
31    import jalview.datamodel.Alignment;
32    import jalview.datamodel.AlignmentI;
33    import jalview.datamodel.Sequence;
34    import jalview.datamodel.SequenceI;
35    import jalview.io.FastaFile;
36   
37    /**
38    * Generates, and outputs in Fasta format, a random peptide or nucleotide
39    * alignment for given sequence length and count. Will regenerate the same
40    * alignment each time if the same random seed is used (so may be used for
41    * reproducible unit tests). Not guaranteed to reproduce the same results
42    * between versions, as the rules may get tweaked to produce more 'realistic'
43    * results.
44    *
45    * @author gmcarstairs
46    */
 
47    public class AlignmentGenerator
48    {
49    private static final char GAP = '-';
50   
51    private static final char ZERO = '0';
52   
53    private static final char[] NUCS = "GTCA".toCharArray();
54   
55    private static final char[] PEPS = "MILVFYWHKRDEQNTCGASNP".toCharArray();
56   
57    private static char[] BASES;
58   
59    private Random random;
60   
61    private PrintStream ps;
62   
63    /**
64    * Outputs a pseudo-randomly generated nucleotide or peptide alignment
65    * Arguments:
66    * <ul>
67    * <li>n (for nucleotide) or p (for peptide)</li>
68    * <li>length (number of bases in each sequence)</li>
69    * <li>height (number of sequences)</li>
70    * <li>a whole number random seed</li>
71    * <li>percentage of gaps to include (0-100)</li>
72    * <li>percentage chance of variation of each position (0-100)</li>
73    * <li>(optional) path to a file to write the alignment to</li>
74    * </ul>
75    *
76    *
77    * @param args
78    * @throws FileNotFoundException
79    */
 
80  0 toggle public static void main(String[] args) throws FileNotFoundException
81    {
82  0 if (args.length != 6 && args.length != 7)
83    {
84  0 usage();
85  0 return;
86    }
87   
88  0 PrintStream ps = System.out;
89  0 if (args.length == 7)
90    {
91  0 ps = new PrintStream(new File(args[6]));
92    }
93   
94  0 boolean nucleotide = args[0].toLowerCase(Locale.ROOT).startsWith("n");
95  0 int width = Integer.parseInt(args[1]);
96  0 int height = Integer.parseInt(args[2]);
97  0 long randomSeed = Long.valueOf(args[3]);
98  0 int gapPercentage = Integer.valueOf(args[4]);
99  0 int changePercentage = Integer.valueOf(args[5]);
100   
101  0 ps.println("; " + height + " sequences of " + width + " bases with "
102    + gapPercentage + "% gaps and " + changePercentage
103    + "% mutations (random seed = " + randomSeed + ")");
104   
105  0 new AlignmentGenerator(nucleotide, ps).generate(width, height,
106    randomSeed, gapPercentage, changePercentage);
107   
108  0 if (ps != System.out)
109    {
110  0 ps.close();
111    }
112    }
113   
114    /**
115    * Prints parameter help
116    */
 
117  0 toggle private static void usage()
118    {
119  0 System.out.println("Usage:");
120  0 System.out.println("arg0: n (for nucleotide) or p (for peptide)");
121  0 System.out.println("arg1: number of (non-gap) bases per sequence");
122  0 System.out.println("arg2: number of sequences");
123  0 System.out.println(
124    "arg3: an integer as random seed (same seed = same results)");
125  0 System.out.println("arg4: percentage of gaps to (randomly) generate");
126  0 System.out.println(
127    "arg5: percentage of 'mutations' to (randomly) generate");
128  0 System.out.println(
129    "arg6: (optional) path to output file (default is sysout)");
130  0 System.out.println("Example: AlignmentGenerator n 12 15 387 10 5");
131  0 System.out.println(
132    "- 15 nucleotide sequences of 12 bases each, approx 10% gaps and 5% mutations, random seed = 387");
133   
134    }
135   
136    /**
137    * Constructor that sets nucleotide or peptide symbol set, and also writes the
138    * generated alignment to sysout
139    */
 
140  20 toggle public AlignmentGenerator(boolean nuc)
141    {
142  20 this(nuc, System.out);
143    }
144   
145    /**
146    * Constructor that sets nucleotide or peptide symbol set, and also writes the
147    * generated alignment to the specified output stream (if not null). This can
148    * be used to write the alignment to a file or sysout.
149    */
 
150  20 toggle public AlignmentGenerator(boolean nucleotide, PrintStream printStream)
151    {
152  20 BASES = nucleotide ? NUCS : PEPS;
153  20 ps = printStream;
154    }
155   
156    /**
157    * Outputs an 'alignment' of given width and height, where each position is a
158    * random choice from the symbol alphabet, or - for gap
159    *
160    * @param width
161    * @param height
162    * @param randomSeed
163    * @param changePercentage
164    * @param gapPercentage
165    */
 
166  36 toggle public AlignmentI generate(int width, int height, long randomSeed,
167    int gapPercentage, int changePercentage)
168    {
169  36 SequenceI[] seqs = new SequenceI[height];
170  36 random = new Random(randomSeed);
171  36 seqs[0] = generateSequence(1, width, gapPercentage);
172  1688 for (int seqno = 1; seqno < height; seqno++)
173    {
174  1652 seqs[seqno] = generateAnotherSequence(seqs[0].getSequence(),
175    seqno + 1, width, changePercentage);
176    }
177  36 AlignmentI al = new Alignment(seqs);
178   
179  36 if (ps != null)
180    {
181  36 ps.println(new FastaFile().print(al.getSequencesArray(), true));
182    }
183   
184  36 return al;
185    }
186   
187    /**
188    * Outputs a DNA 'sequence' of given length, with some random gaps included.
189    *
190    * @param seqno
191    * @param length
192    * @param gapPercentage
193    */
 
194  36 toggle private SequenceI generateSequence(int seqno, int length,
195    int gapPercentage)
196    {
197  36 StringBuilder seq = new StringBuilder(length);
198   
199    /*
200    * Loop till we've added 'length' bases (excluding gaps)
201    */
202  1699 for (int count = 0; count < length;)
203    {
204  1663 boolean addGap = random.nextInt(100) < gapPercentage;
205  1663 char c = addGap ? GAP
206    : BASES[random.nextInt(Integer.MAX_VALUE) % BASES.length];
207  1663 seq.append(c);
208  1663 if (!addGap)
209    {
210  1618 count++;
211    }
212    }
213  36 final String seqName = "SEQ" + seqno;
214  36 final String seqString = seq.toString();
215  36 SequenceI sq = new Sequence(seqName, seqString);
216  36 sq.createDatasetSequence();
217  36 return sq;
218    }
219   
220    /**
221    * Generate a sequence approximately aligned to the first one.
222    *
223    * @param ds
224    * @param seqno
225    * @param width
226    * number of bases
227    * @param changePercentage
228    * @return
229    */
 
230  1652 toggle private SequenceI generateAnotherSequence(char[] ds, int seqno, int width,
231    int changePercentage)
232    {
233  1652 int length = ds.length;
234  1652 char[] seq = new char[length];
235  1652 Arrays.fill(seq, ZERO);
236  1652 int gapsWanted = length - width;
237  1652 int gapsAdded = 0;
238   
239    /*
240    * First 'randomly' mimic gaps in model sequence.
241    */
242  200600 for (int pos = 0; pos < length; pos++)
243    {
244  198948 if (ds[pos] == GAP)
245    {
246    /*
247    * Add a gap at the same position with changePercentage likelihood
248    */
249  7945 seq[pos] = randomCharacter(GAP, changePercentage);
250  7945 if (seq[pos] == GAP)
251    {
252  7564 gapsAdded++;
253    }
254    }
255    }
256   
257    /*
258    * Next scatter any remaining gaps (if any) at random. This gives an even
259    * distribution.
260    */
261  2033 while (gapsAdded < gapsWanted)
262    {
263  381 boolean added = false;
264  772 while (!added)
265    {
266  391 int pos = random.nextInt(length);
267  391 if (seq[pos] != GAP)
268    {
269  381 seq[pos] = GAP;
270  381 added = true;
271  381 gapsAdded++;
272    }
273    }
274    }
275   
276    /*
277    * Finally fill in the rest with randomly mutated bases.
278    */
279  200600 for (int pos = 0; pos < length; pos++)
280    {
281  198948 if (seq[pos] == ZERO)
282    {
283  190632 char c = randomCharacter(ds[pos], changePercentage);
284  190632 seq[pos] = c;
285    }
286    }
287  1652 final String seqName = "SEQ" + seqno;
288  1652 final String seqString = new String(seq);
289  1652 SequenceI sq = new Sequence(seqName, seqString);
290  1652 sq.createDatasetSequence();
291  1652 return sq;
292    }
293   
294    /**
295    * Returns a random character that is changePercentage% likely to match the
296    * given type (as base or gap).
297    *
298    * @param changePercentage
299    *
300    * @param c
301    * @return
302    */
 
303  198577 toggle private char randomCharacter(char c, int changePercentage)
304    {
305  198577 final boolean mutation = random.nextInt(100) < changePercentage;
306   
307  198577 if (!mutation)
308    {
309  188969 return c;
310    }
311   
312  9608 char newchar = c;
313  19733 while (newchar == c)
314    {
315  10125 newchar = BASES[random.nextInt(Integer.MAX_VALUE) % BASES.length];
316    }
317  9608 return newchar;
318    }
319    }