/*
 * Decompiled with CFR 0.152.
 */
package compbio.data.sequence;

import compbio.data.sequence.Alignment;
import compbio.data.sequence.AlignmentMetadata;
import compbio.data.sequence.ClustalAlignmentUtil;
import compbio.data.sequence.ConservationMethod;
import compbio.data.sequence.DisemblResult;
import compbio.data.sequence.DisorderMethod;
import compbio.data.sequence.FastaReader;
import compbio.data.sequence.FastaSequence;
import compbio.data.sequence.GlobProtResult;
import compbio.data.sequence.IUPredResult;
import compbio.data.sequence.Program;
import compbio.data.sequence.Range;
import compbio.data.sequence.Score;
import compbio.data.sequence.UnknownFileFormatException;
import compbio.util.Util;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import java.util.Set;
import java.util.TreeSet;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public final class SequenceUtil {
    public static final Pattern WHITE_SPACE = Pattern.compile("\\s");
    public static final Pattern DIGIT = Pattern.compile("\\d");
    public static final Pattern NONWORD = Pattern.compile("\\W");
    public static final Pattern AA = Pattern.compile("[ARNDCQEGHILKMFPSTWYV]+", 2);
    public static final Pattern NON_AA = Pattern.compile("[^ARNDCQEGHILKMFPSTWYV]+", 2);
    public static final Pattern AMBIGUOUS_AA = Pattern.compile("[ARNDCQEGHILKMFPSTWYVXU]+", 2);
    public static final Pattern NUCLEOTIDE = Pattern.compile("[AGTCU]+", 2);
    public static final Pattern AMBIGUOUS_NUCLEOTIDE = Pattern.compile("[AGTCRYMKSWHBVDNU]+", 2);
    public static final Pattern NON_NUCLEOTIDE = Pattern.compile("[^AGTCU]+", 2);
    private static final String JRONN_WRONG_FORMAT_MESSAGE = "Jronn file must be in the following format:\n>sequence_name\n M\tV\tS\n0.43\t0.22\t0.65\nWhere first line is the sequence name,\nsecond line is the tab delimited sequence,\nthird line contains tab delimited disorder prediction values.\nNo lines are allowed between these three. Additionally, the number of  sequence residues must be equal to the number of the disorder values.";

    private SequenceUtil() {
    }

    public static boolean isNucleotideSequence(FastaSequence s) {
        return SequenceUtil.isNonAmbNucleotideSequence(s.getSequence());
    }

    public static boolean isNonAmbNucleotideSequence(String sequence) {
        if (DIGIT.matcher(sequence = SequenceUtil.cleanSequence(sequence)).find()) {
            return false;
        }
        if (NON_NUCLEOTIDE.matcher(sequence).find()) {
            return false;
        }
        Matcher DNAmatcher = NUCLEOTIDE.matcher(sequence);
        return DNAmatcher.find();
    }

    public static String cleanSequence(String sequence) {
        assert (sequence != null);
        Matcher m = WHITE_SPACE.matcher(sequence);
        sequence = m.replaceAll("").toUpperCase();
        return sequence;
    }

    public static String deepCleanSequence(String sequence) {
        sequence = SequenceUtil.cleanSequence(sequence);
        sequence = DIGIT.matcher(sequence).replaceAll("");
        sequence = NONWORD.matcher(sequence).replaceAll("");
        Pattern othernonSeqChars = Pattern.compile("[_-]+");
        sequence = othernonSeqChars.matcher(sequence).replaceAll("");
        return sequence;
    }

    public static String cleanProteinSequence(String sequence) {
        return NON_AA.matcher(sequence).replaceAll("");
    }

    public static boolean isProteinSequence(String sequence) {
        if (SequenceUtil.isNonAmbNucleotideSequence(sequence = SequenceUtil.cleanSequence(sequence))) {
            return false;
        }
        if (DIGIT.matcher(sequence).find()) {
            return false;
        }
        if (NON_AA.matcher(sequence).find()) {
            return false;
        }
        Matcher protmatcher = AA.matcher(sequence);
        return protmatcher.find();
    }

    public static boolean isAmbiguosProtein(String sequence) {
        if (SequenceUtil.isNonAmbNucleotideSequence(sequence = SequenceUtil.cleanSequence(sequence))) {
            return false;
        }
        if (DIGIT.matcher(sequence).find()) {
            return false;
        }
        if (NON_AA.matcher(sequence).find()) {
            return false;
        }
        if (AA.matcher(sequence).find()) {
            return false;
        }
        Matcher amb_prot = AMBIGUOUS_AA.matcher(sequence);
        return amb_prot.find();
    }

    public static void writeFasta(OutputStream outstream, List<FastaSequence> sequences, int width) throws IOException {
        SequenceUtil.writeFastaKeepTheStream(outstream, sequences, width);
        outstream.close();
    }

    public static void writeFastaKeepTheStream(OutputStream outstream, List<FastaSequence> sequences, int width) throws IOException {
        OutputStreamWriter writer = new OutputStreamWriter(outstream);
        BufferedWriter fastawriter = new BufferedWriter(writer);
        for (FastaSequence fs : sequences) {
            fastawriter.write(">" + fs.getId() + "\n");
            fastawriter.write(fs.getFormatedSequence(width));
            fastawriter.write("\n");
        }
        fastawriter.flush();
        writer.flush();
    }

    public static List<FastaSequence> readFasta(InputStream inStream) throws IOException {
        ArrayList<FastaSequence> seqs = new ArrayList<FastaSequence>();
        FastaReader reader = new FastaReader(inStream);
        while (reader.hasNext()) {
            seqs.add(reader.next());
        }
        inStream.close();
        return seqs;
    }

    public static void writeFasta(OutputStream os, List<FastaSequence> sequences) throws IOException {
        OutputStreamWriter outWriter = new OutputStreamWriter(os);
        BufferedWriter fasta_out = new BufferedWriter(outWriter);
        for (FastaSequence fs : sequences) {
            fasta_out.write(fs.getOnelineFasta());
        }
        fasta_out.close();
        outWriter.close();
    }

    public static Map<String, Score> readIUPred(File result) throws IOException, UnknownFileFormatException {
        FileInputStream input = new FileInputStream(result);
        Map<String, Score> sequences = SequenceUtil.readIUPred(input, IUPredResult.getType(result));
        ((InputStream)input).close();
        return sequences;
    }

    private static Map<String, Score> readIUPred(InputStream input, IUPredResult type) throws IOException, UnknownFileFormatException {
        Score score = null;
        HashMap<String, Score> seqs = new HashMap<String, Score>();
        Scanner scan = new Scanner(input);
        scan.useDelimiter("#");
        while (scan.hasNext()) {
            String nextEntry = scan.next();
            Scanner entry = new Scanner(nextEntry);
            String name = entry.nextLine().trim();
            if (IUPredResult.Glob == type) {
                TreeSet<Range> ranges = SequenceUtil.parseIUPredDomains(entry);
                score = new Score(type, ranges);
            } else {
                float[] scores = SequenceUtil.parseIUPredScores(entry);
                score = new Score(type, scores);
            }
            entry.close();
            seqs.put(name, score);
        }
        scan.close();
        return seqs;
    }

    private static TreeSet<Range> parseIUPredDomains(Scanner scan) {
        String header = "Number of globular domains:";
        String domainPref = "globular domain";
        TreeSet<Range> ranges = new TreeSet<Range>();
        String line = scan.nextLine().trim();
        assert (line.startsWith(header));
        int domainNum = Integer.parseInt(line = line.substring(header.length()).trim());
        if (domainNum == 0) {
            return ranges;
        }
        for (int i = 0; i < domainNum; ++i) {
            assert (scan.hasNextLine());
            line = scan.nextLine();
            assert (line.trim().startsWith(domainPref));
            line = line.substring(line.indexOf(".") + 1).trim();
            Range r = new Range(line.split("-"));
            ranges.add(r);
        }
        return ranges;
    }

    private static float[] parseIUPredScores(Scanner scan) throws UnknownFileFormatException {
        ArrayList<String> annotation = new ArrayList<String>();
        while (scan.hasNextLine()) {
            String line = scan.nextLine().trim();
            String[] val = line.split("\\s+");
            annotation.add(val[2]);
        }
        return SequenceUtil.convertToNumber(annotation.toArray(new String[annotation.size()]));
    }

    public static Map<String, Score> readJRonn(File result) throws IOException, UnknownFileFormatException {
        FileInputStream input = new FileInputStream(result);
        Map<String, Score> sequences = SequenceUtil.readJRonn(input);
        ((InputStream)input).close();
        return sequences;
    }

    public static Map<String, Score> readJRonn(InputStream inStream) throws IOException, UnknownFileFormatException {
        String line;
        HashMap<String, Score> seqs = new HashMap<String, Score>();
        BufferedReader infasta = new BufferedReader(new InputStreamReader(inStream, "UTF8"), 16000);
        String sname = "";
        do {
            if ((line = infasta.readLine()) == null || line.isEmpty() || !line.startsWith(">")) continue;
            sname = line.trim().substring(1);
            line = infasta.readLine();
            String sequence = line.replace("\t", "");
            line = infasta.readLine();
            String[] annotValues = line.split("\t");
            float[] annotation = SequenceUtil.convertToNumber(annotValues);
            if (annotation.length != sequence.length()) {
                throw new UnknownFileFormatException("File does not look like Jronn horizontally formatted output file!\nJronn file must be in the following format:\n>sequence_name\n M\tV\tS\n0.43\t0.22\t0.65\nWhere first line is the sequence name,\nsecond line is the tab delimited sequence,\nthird line contains tab delimited disorder prediction values.\nNo lines are allowed between these three. Additionally, the number of  sequence residues must be equal to the number of the disorder values.");
            }
            seqs.put(sname, new Score(DisorderMethod.JRonn, annotation));
        } while (line != null);
        infasta.close();
        return seqs;
    }

    private static float[] convertToNumber(String[] annotValues) throws UnknownFileFormatException {
        float[] annotation = new float[annotValues.length];
        try {
            for (int i = 0; i < annotation.length; ++i) {
                annotation[i] = Float.parseFloat(annotValues[i]);
            }
        }
        catch (NumberFormatException e) {
            throw new UnknownFileFormatException(JRONN_WRONG_FORMAT_MESSAGE, e.getCause());
        }
        return annotation;
    }

    public static final void closeSilently(Logger log, Closeable stream) {
        if (stream != null) {
            try {
                stream.close();
            }
            catch (IOException e) {
                log.log(Level.WARNING, e.getLocalizedMessage(), e.getCause());
            }
        }
    }

    public static HashMap<String, Set<Score>> readDisembl(InputStream input) throws IOException, UnknownFileFormatException {
        Scanner scan = new Scanner(input);
        scan.useDelimiter(">");
        if (!scan.hasNext()) {
            throw new UnknownFileFormatException("In Disembl score format each sequence score is expected to start from the line: >Sequence name  No such line was found!");
        }
        HashMap<String, Set<Score>> results = new HashMap<String, Set<Score>>();
        int seqCounter = 0;
        while (scan.hasNext()) {
            ++seqCounter;
            String singleSeq = scan.next();
            Scanner scansingle = new Scanner(singleSeq);
            if (!scansingle.hasNextLine()) {
                throw new RuntimeException("The input looks like an incomplete disembl file - cannot parse!");
            }
            StringBuffer seqbuffer = new StringBuffer();
            ArrayList<Float> coils = new ArrayList<Float>();
            ArrayList<Float> rem = new ArrayList<Float>();
            ArrayList<Float> hotloops = new ArrayList<Float>();
            String sequenceName = scansingle.nextLine().trim();
            TreeSet<Range> coilsR = SequenceUtil.parseRanges(DisemblResult.COILS, scansingle.nextLine());
            TreeSet<Range> rem465R = SequenceUtil.parseRanges(DisemblResult.REM465, scansingle.nextLine());
            TreeSet<Range> loopsR = SequenceUtil.parseRanges(DisemblResult.HOTLOOPS, scansingle.nextLine());
            String title = scansingle.nextLine();
            assert (title.startsWith("# RESIDUE COILS REM465 HOTLOOPS")) : ">Sequence_name must follow column title: # RESIDUE COILS REM465 HOTLOOPS!";
            while (scansingle.hasNext()) {
                seqbuffer.append(scansingle.next());
                coils.add(Float.valueOf(scansingle.nextFloat()));
                rem.add(Float.valueOf(scansingle.nextFloat()));
                hotloops.add(Float.valueOf(scansingle.nextFloat()));
            }
            HashSet<Score> scores = new HashSet<Score>();
            scores.add(new Score(DisemblResult.COILS, coils, coilsR));
            scores.add(new Score(DisemblResult.REM465, rem, rem465R));
            scores.add(new Score(DisemblResult.HOTLOOPS, hotloops, loopsR));
            results.put(sequenceName, scores);
            scansingle.close();
        }
        scan.close();
        input.close();
        return results;
    }

    private static TreeSet<Range> parseRanges(Enum resultType, String lines) {
        TreeSet<Range> ranges = new TreeSet<Range>();
        Scanner scan = new Scanner(lines);
        assert (scan.hasNext());
        String del = scan.next();
        assert ("#".equals(del));
        String type = scan.next();
        assert (resultType.toString().equalsIgnoreCase(type)) : "Unknown result type: " + resultType.toString();
        scan.useDelimiter(",");
        while (scan.hasNext()) {
            String range = scan.next();
            if (Util.isEmpty(range)) continue;
            ranges.add(new Range(range.split("-")));
        }
        return ranges;
    }

    public static HashMap<String, Set<Score>> readGlobPlot(InputStream input) throws IOException, UnknownFileFormatException {
        Scanner scan = new Scanner(input);
        scan.useDelimiter(">");
        if (!scan.hasNext()) {
            throw new UnknownFileFormatException("In GlobPlot score format each sequence score is expected to start from the line: >Sequence name  No such line was found!");
        }
        HashMap<String, Set<Score>> results = new HashMap<String, Set<Score>>();
        int seqCounter = 0;
        while (scan.hasNext()) {
            ++seqCounter;
            String singleSeq = scan.next();
            Scanner scansingle = new Scanner(singleSeq);
            if (!scansingle.hasNextLine()) {
                throw new RuntimeException("The input looks like an incomplete GlobPlot file - cannot parse!");
            }
            StringBuffer seqbuffer = new StringBuffer();
            ArrayList<Float> dydxScore = new ArrayList<Float>();
            ArrayList<Float> rawScore = new ArrayList<Float>();
            ArrayList<Float> smoothedScore = new ArrayList<Float>();
            String sequenceName = scansingle.nextLine().trim();
            TreeSet<Range> domsR = SequenceUtil.parseRanges(GlobProtResult.GlobDoms, scansingle.nextLine());
            TreeSet<Range> disorderR = SequenceUtil.parseRanges(GlobProtResult.Disorder, scansingle.nextLine());
            String title = scansingle.nextLine();
            assert (title.startsWith("# RESIDUE\tDYDX")) : ">Sequence_name must follow column title: # RESIDUE DYDX RAW SMOOTHED!";
            while (scansingle.hasNext()) {
                seqbuffer.append(scansingle.next());
                dydxScore.add(Float.valueOf(scansingle.nextFloat()));
                rawScore.add(Float.valueOf(scansingle.nextFloat()));
                smoothedScore.add(Float.valueOf(scansingle.nextFloat()));
            }
            TreeSet<Score> scores = new TreeSet<Score>();
            scores.add(new Score(GlobProtResult.Disorder, disorderR));
            scores.add(new Score(GlobProtResult.GlobDoms, domsR));
            scores.add(new Score(GlobProtResult.Dydx, dydxScore));
            scores.add(new Score(GlobProtResult.RawScore, rawScore));
            scores.add(new Score(GlobProtResult.SmoothedScore, smoothedScore));
            results.put(sequenceName, scores);
            scansingle.close();
        }
        scan.close();
        input.close();
        return results;
    }

    public static HashSet<Score> readAAConResults(InputStream results) {
        if (results == null) {
            throw new NullPointerException("InputStream with results must be provided");
        }
        HashSet<Score> annotations = new HashSet<Score>();
        Scanner sc = new Scanner(results);
        sc.useDelimiter("#");
        while (sc.hasNext()) {
            String line = sc.next();
            int spacePos = line.indexOf(" ");
            assert (spacePos > 0) : "Space is expected as delimited between method name and values!";
            String methodLine = line.substring(0, spacePos);
            ConservationMethod method = ConservationMethod.getMethod(methodLine);
            assert (method != null) : "Method " + methodLine + " is not recognized! ";
            Scanner valuesScanner = new Scanner(line.substring(spacePos));
            ArrayList<Float> values = new ArrayList<Float>();
            while (valuesScanner.hasNextDouble()) {
                Double value = valuesScanner.nextDouble();
                values.add(Float.valueOf(value.floatValue()));
            }
            annotations.add(new Score(method, values));
        }
        return annotations;
    }

    public static List<FastaSequence> openInputStream(String inFilePath) throws IOException, UnknownFileFormatException {
        FileInputStream inStrForValidation = new FileInputStream(inFilePath);
        FileInputStream inStr = new FileInputStream(inFilePath);
        List<FastaSequence> fastaSeqs = null;
        if (ClustalAlignmentUtil.isValidClustalFile(inStrForValidation)) {
            Alignment al = ClustalAlignmentUtil.readClustalFile(inStr);
            fastaSeqs = al.getSequences();
        } else {
            fastaSeqs = SequenceUtil.readFasta(inStr);
        }
        return fastaSeqs;
    }

    public static void writeClustal(OutputStream outStream, List<FastaSequence> sequences, char gapChar) throws IOException {
        BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(outStream));
        AlignmentMetadata al = new AlignmentMetadata(Program.CLUSTAL, gapChar);
        ClustalAlignmentUtil.writeClustalAlignment(writer, new Alignment(sequences, al));
    }
}

