#!/bin/sh
|
|
|
|
# NAME
|
|
#
|
|
# letter_freq.sh - analyze letter frequency in a string
|
|
#
|
|
# SYNOPSIS
|
|
#
|
|
# letter_freq.sh -f filename
|
|
# letter_freq.sh "string to analyze"
|
|
#
|
|
# INSTALLATION
|
|
#
|
|
# Just copy this into a file called letter_freq.sh, and then:
|
|
#
|
|
# $ chmod +x ./letter_freq.sh
|
|
#
|
|
# CAVEATS
|
|
#
|
|
# Shell is terrible and Brennen is terrible at shell, so this
|
|
# is doubly likely to contain traps for the unwary.
|
|
#
|
|
# AUTHORS
|
|
#
|
|
# https://mspsocial.net/@earthtopus
|
|
# https://mastodon.social/@brennen
|
|
|
|
# A function for squawking at the user with some helptext:
|
|
print_usage () {
|
|
cat <<END
|
|
|
|
USAGE
|
|
|
|
# Examine a string directly:
|
|
$ ./letter_freq.sh "Quick zephyrs blow, vexing daft Jim."
|
|
|
|
# Examine a string from a file:
|
|
$ ./letter_freq.sh -f pangram.txt
|
|
|
|
END
|
|
}
|
|
|
|
# Encapsulate the sort operation in a function for less cruft below:
|
|
do_sort () {
|
|
sort | uniq -c | sort -rn;
|
|
}
|
|
|
|
# The script really starts here.
|
|
|
|
# First, we'll handle arguments, hackily. $1 is the first parameter to the
|
|
# command, $2 is the second, and so on. There's a trap here - if you say
|
|
# something like
|
|
#
|
|
# ./letter_freq.sh foo bar
|
|
#
|
|
# ...then $1 will be 'foo' and $2 will be 'bar'. To have an entire string with
|
|
# spaces come in as the first param, use quotes like so:
|
|
#
|
|
# ./letter_freq.sh "foo bar"
|
|
#
|
|
# The same sort of thing applies all over the place. When in doubt, put double
|
|
# quotes around variables to avoid weirdness.
|
|
|
|
if [ "$1" = '-f' ]; then
|
|
if [ -z "$2" ]; then
|
|
# Zero-length string for second param - didn't get a filename
|
|
echo "Expected a filename to read string from."
|
|
print_usage
|
|
# These arbitrary-seeming exit codes are taken from:
|
|
# https://www.freebsd.org/cgi/man.cgi?query=sysexits&sektion=3
|
|
# (I don't actually know whether this is a good idea.)
|
|
exit 64
|
|
fi
|
|
if [ ! -f "$2" ]; then
|
|
echo "No such file: $2"
|
|
print_usage
|
|
exit 64
|
|
fi
|
|
|
|
# Read the file into a variable:
|
|
string=$(cat "$2")
|
|
elif [ -z "$1" ]; then
|
|
echo "Expected a string to analyze."
|
|
print_usage
|
|
exit 66
|
|
else
|
|
# Use the provided string:
|
|
string="$1"
|
|
fi
|
|
|
|
# Next, we actually Do the Stuff:
|
|
|
|
count=$(echo "$string" | tr -cd '[:alpha:]' | wc -m)
|
|
echo "$count total chars"
|
|
|
|
echo "$string" | grep -oE '[[:alpha:]]' | \
|
|
tr '[:upper:]' '[:lower:]' | \
|
|
do_sort | \
|
|
awk -v count="$count" '{ print ($1 / count), $1, $2; }'
|
|
|
|
# Success!
|
|
exit 0
|