Dotfiles, utilities, and other apparatus.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

102 lines
2.3 KiB

  1. #!/bin/sh
  2. # NAME
  3. #
  4. # letter_freq.sh - analyze letter frequency in a string
  5. #
  6. # SYNOPSIS
  7. #
  8. # letter_freq.sh -f filename
  9. # letter_freq.sh "string to analyze"
  10. #
  11. # INSTALLATION
  12. #
  13. # Just copy this into a file called letter_freq.sh, and then:
  14. #
  15. # $ chmod +x ./letter_freq.sh
  16. #
  17. # CAVEATS
  18. #
  19. # Shell is terrible and Brennen is terrible at shell, so this
  20. # is doubly likely to contain traps for the unwary.
  21. #
  22. # AUTHORS
  23. #
  24. # https://mspsocial.net/@earthtopus
  25. # https://mastodon.social/@brennen
  26. # A function for squawking at the user with some helptext:
  27. print_usage () {
  28. cat <<END
  29. USAGE
  30. # Examine a string directly:
  31. $ ./letter_freq.sh "Quick zephyrs blow, vexing daft Jim."
  32. # Examine a string from a file:
  33. $ ./letter_freq.sh -f pangram.txt
  34. END
  35. }
  36. # Encapsulate the sort operation in a function for less cruft below:
  37. do_sort () {
  38. sort | uniq -c | sort -rn;
  39. }
  40. # The script really starts here.
  41. # First, we'll handle arguments, hackily. $1 is the first parameter to the
  42. # command, $2 is the second, and so on. There's a trap here - if you say
  43. # something like
  44. #
  45. # ./letter_freq.sh foo bar
  46. #
  47. # ...then $1 will be 'foo' and $2 will be 'bar'. To have an entire string with
  48. # spaces come in as the first param, use quotes like so:
  49. #
  50. # ./letter_freq.sh "foo bar"
  51. #
  52. # The same sort of thing applies all over the place. When in doubt, put double
  53. # quotes around variables to avoid weirdness.
  54. if [ "$1" = '-f' ]; then
  55. if [ -z "$2" ]; then
  56. # Zero-length string for second param - didn't get a filename
  57. echo "Expected a filename to read string from."
  58. print_usage
  59. # These arbitrary-seeming exit codes are taken from:
  60. # https://www.freebsd.org/cgi/man.cgi?query=sysexits&sektion=3
  61. # (I don't actually know whether this is a good idea.)
  62. exit 64
  63. fi
  64. if [ ! -f "$2" ]; then
  65. echo "No such file: $2"
  66. print_usage
  67. exit 64
  68. fi
  69. # Read the file into a variable:
  70. string=$(cat "$2")
  71. elif [ -z "$1" ]; then
  72. echo "Expected a string to analyze."
  73. print_usage
  74. exit 66
  75. else
  76. # Use the provided string:
  77. string="$1"
  78. fi
  79. # Next, we actually Do the Stuff:
  80. count=$(echo "$string" | tr -cd '[:alpha:]' | wc -m)
  81. echo "$count total chars"
  82. echo "$string" | grep -oE '[[:alpha:]]' | \
  83. tr '[:upper:]' '[:lower:]' | \
  84. do_sort | \
  85. awk -v count="$count" '{ print ($1 / count), $1, $2; }'
  86. # Success!
  87. exit 0