Skip to content

Commit

Permalink
Added the missing build files for last
Browse files Browse the repository at this point in the history
  • Loading branch information
Dfupa committed Oct 18, 2023
1 parent eb22439 commit 1a4f4ca
Show file tree
Hide file tree
Showing 5 changed files with 195 additions and 0 deletions.
20 changes: 20 additions & 0 deletions bin/last/build/gc-inc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#! /bin/sh

# This generates source code from genetic codes.

cat <<EOF
const struct {
const char *name;
const char *text;
} geneticCodes[] = {
EOF

cat "$@" | tr -d '",' |
awk '
$1 == "id" {print "{\"" $2 "\", \"\\"}
$1 == "ncbieaa" {print " AAs = " $2 "\\n\\"}
/-- Base/ {print $2 " = " $3 "\\n\\"}
/-- Base3/ {print "\"},"}
'

echo "};"
29 changes: 29 additions & 0 deletions bin/last/build/mat-doc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#! /bin/sh

# This generates text documentation from substitution score matrices.

cat <<EOF
LAST built-in scoring schemes
=============================
EOF

for i in "$@"
do
name=$(basename $i .mat)
nick=$(grep '^#nickname' $i | cut -d' ' -f2)
test "$nick" && name="$name or $nick"
echo $name
echo $name | sed 's/./-/g' # underline
echo
grep '^# ' $i | cut -d' ' -f2-
echo It uses this matrix::
echo
grep -v '^#' $i | awk NF | sed 's/^/ /'
echo
grep -q '^#last' $i && {
echo It sets these default lastal parameter values:
grep '^#last' $i | cut -d' ' -f2-
echo
}
done
33 changes: 33 additions & 0 deletions bin/last/build/mat-inc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#! /bin/sh

# This generates source code from substitution score matrices.

cat <<EOF
const struct {
const char *nickname;
const char *realname;
} scoreMatrixNicknames [] = {
EOF
for i in "$@"
do
name=$(basename $i .mat)
grep '^#nickname' $i | cut -d' ' -f2 | sed 's/.*/{"&", "'$name'"},/'
done
echo "};"

echo

cat <<EOF
const struct {
const char *name;
const char *text;
} scoreMatrices[] = {
EOF
for i in "$@"
do
basename $i .mat | sed 's/.*/{"&", "\\/'
grep -v '^#[n ]' $i | awk NF | sed 's/$/\\n\\/'
echo '"},'
echo
done
echo "};"
80 changes: 80 additions & 0 deletions bin/last/build/seed-doc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#! /bin/sh

# This generates text documentation from subset seeds.

cat <<'EOF'
LAST seeding schemes
====================
LAST's critical first step is to find *seeds*, i.e. initial matches
between query and reference sequences. It can use various seeding
schemes, which allow different kinds of mismatches at different seed
positions.
A seeding scheme consists of a seed alphabet, such as::
1 A C G T
0 ACGT
T AG CT
and one or more patterns, such as this one::
1T1T10T1101101
Each symbol in a pattern represents a grouping of sequence letters:
in this example, ``T`` represents the grouping ``AG CT``. At each
position in an initial match, mismatches are allowed between
letters that are grouped at that position in the pattern.
Although the patterns have fixed lengths, LAST's initial matches do
not. LAST finds shorter matches by using a prefix of the pattern,
and longer matches by cyclically repeating the pattern.
A *restricted* symbol omits letters of the main sequence alphabet,
which are then forbidden at those positions::
r AG
An *exact* symbol groups no letters::
Y C T
In 2nd and subsequent cycles, restricted symbols are made
unrestricted: if it is exact then the omitted letters are added as
separate groups, else they are added as one group.
EOF

for i in "$@"
do
name=$(basename $i .seed)
abbr=$(grep '^#abbreviation' $i | cut -d' ' -f2)
test "$abbr" && name="$name (abbreviation: $abbr)"
echo $name
echo $name | sed 's/./-/g' # underline
echo
grep '^# ' $i | cut -d' ' -f2-
echo It uses this seed alphabet::
echo
awk '!/^#/ && NF > 1 && length($1) == 1' $i | sed 's/^/ /'
echo
if [ $(awk '!/^#/ && length($1) > 1 || NF == 1' $i | wc -w) = 1 ]
then echo And this pattern::
else echo And these patterns::
fi
echo
awk '!/^#/ && length($1) > 1 || NF == 1' $i | sed 's/^/ /'
echo
grep -q '^#lastdb' $i && {
echo It sets this lastdb default:
grep '^#lastdb' $i | cut -d' ' -f2-
echo
}
grep -q '^#lastal' $i && {
echo It sets this lastal default:
grep '^#lastal' $i | cut -d' ' -f2-
echo
}
done

exit 0
33 changes: 33 additions & 0 deletions bin/last/build/seed-inc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#! /bin/sh

# This generates source code from subset seeds.

cat <<EOF
const struct {
const char *nickname;
const char *realname;
} subsetSeedNicknames [] = {
EOF
for i in "$@"
do
name=$(basename $i .seed)
grep '^#abbreviation' $i | cut -d' ' -f2 | sed 's/.*/{"&", "'$name'"},/'
done
echo "};"

echo

cat <<EOF
const struct {
const char *name;
const char *text;
} subsetSeeds[] = {
EOF
for i in "$@"
do
basename $i .seed | sed 's/.*/{"&", "\\/'
grep -v '^#[a ]' $i | awk NF | sed 's/$/\\n\\/'
echo '"},'
echo
done
echo "};"

0 comments on commit 1a4f4ca

Please sign in to comment.