# ------------------------------------------------------------------
# The Computer Language Shootout
# http://shootout.alioth.debian.org/
#
# Contributed by Leo Osvald
# ------------------------------------------------------------------

pattern1 <- c(
    "agggtaaa|tttaccct",
    "[cgt]gggtaaa|tttaccc[acg]",
    "a[act]ggtaaa|tttacc[agt]t",
    "ag[act]gtaaa|tttac[agt]ct",
    "agg[act]taaa|ttta[agt]cct",
    "aggg[acg]aaa|ttt[cgt]ccct",
    "agggt[cgt]aa|tt[acg]accct",
    "agggta[cgt]a|t[acg]taccct",
    "agggtaa[cgt]|[acg]ttaccct")

pattern2 <- matrix(c(
        c("B", "(c|g|t)"),
        c("D", "(a|g|t)"),
        c("H", "(a|c|t)"),
        c("K", "(g|t)"),
        c("M", "(a|c)"),
        c("N", "(a|c|g|t)"),
        c("R", "(a|g)"),
        c("S", "(c|g)"),
        c("V", "(a|c|g)"),
        c("W", "(a|t)"), 
        c("Y", "(c|t)")
), ncol=2, byrow=TRUE)

match_count <- function(ms) {
    l <- length(ms[[1]])
    fst <- ms[[1]][[1]]
    return(if (l > 1) l else if (fst != -1L) fst else 0)
}

regexdna <- function(args) {
    in_filename = args[[1]]
    f <- file(in_filename, "r")
    str <- paste(c(readLines(f), ""), collapse="\n")
    close(f)

    len1 <- nchar(str)
    str <- gsub(">.*\n|\n", "", str, perl=TRUE, useBytes=TRUE)
    len2 <- nchar(str)

    for (pat in pattern1)
        cat(pat, match_count(gregexpr(pat, str, useBytes=TRUE)), "\n")

    for (i in 1:nrow(pattern2))
        str <- gsub(pattern2[[i, 1]], pattern2[[i, 2]], str, perl=TRUE, 
                    useBytes=TRUE)

    cat("", len1, len2, nchar(str), sep="\n")
}

if (!exists("i_am_wrapper"))
    regexdna(commandArgs(trailingOnly=TRUE))