|
#!/bin/bash
|
|
#
|
|
# AUTONYM
|
|
#
|
|
# disapparator.sh - fifthglyphful word disapparator
|
|
#
|
|
# INSTRUCTION
|
|
#
|
|
# chmod +x ./disapparator.sh
|
|
# "./disapparator.sh $1 $2 (input.txt output.txt)
|
|
#
|
|
# AUTHORS
|
|
#
|
|
# Quinapalus
|
|
# Vurbositor
|
|
#
|
|
# MODIFICATION LOG
|
|
#
|
|
# 0.2 chain of individual commands multiplying output .txts in profusion
|
|
# 0.4 script unification, dash handling
|
|
# 0.6 coauthor awk translation of main command
|
|
# 0.A juggling. Ignoring dash-handling and smart quotation. Latin-unum, Latin-plus A and Latin plus-B support
|
|
|
|
## Starting two hash mark annotations by Vurbositor
|
|
|
|
## Pull this out to aid visibility:
|
|
allow_xtra="ÀÁÂÃÄÅÇÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåçìíîïðñòóôõö÷øùúûüýþÿĀāĂ㥹ĆćĈĉĊċČčĎďĐđĜĝĞğĠġĢģĤĥĦħĨĩĪīĬĭĮįİıIJijĴĵĶķĸĹĺĻļĽľĿŀŁłŃńŅņŇňʼnŊŋŌōŎŏŐőŔŕŖŗŘřŚśŜŝŞşŠšŢţŤťŦŧŨũŪūŬŭŮůŰűŲųŴŵŶŷŸŹźŻżŽžſƀƁƂƃƄƅƆƇƈƉƊƋƌƍƏƑƒƓƔƕƖƗƘƙƚƛƜƝƞƟƠơƢƣƤƥƦƧƨƩƪƫƬƭƮƯưƱƲƳƴƵƶƷƸƹƺƻƼƽƿǀǁǂǃDŽDždžLJLjljNJNjnjǍǎǏǐǑǒǓǔǕǖǗǝǞǟǠǡǤǥǦǧǨǩǪǫǬǭǮǯǰDZDzdzǴǵǶǷǸǹǺǻǾǿȀȁȂȃȈȉȊȋȌȍȎȏȐȑȒȓȔȕȖȗȘșȚțȜȝȞȟȠȡȢȣȤȥȦȧȪȫȬȭȮȯȰȱȲȳȴȵȶȷȸȹȺȻȼȽȾȿɀɁɂɃɄɅɈɉɊɋɌɍɎɏ"
|
|
|
|
# unpunctuation (saving - and ')
|
|
cat "$1" | tr -d '!\"%\(\)*,./:;<=>?[\\]^_`{|}¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿•' | tr '\n' ' ' | \
|
|
|
|
## -v assigns a variabl - allow, for this 'un
|
|
## a rgular xprssion,
|
|
## ^ for start
|
|
## a char class which might match various alpha-things
|
|
## put $allow_xtra bash var in it
|
|
## and a blank char
|
|
## + says 1 or mo' of that char class
|
|
## $ for nd of lin
|
|
|
|
## -v RS says what splits things (look in `man awk`)
|
|
## this is a rgx too, 1 or mo' blank chars
|
|
|
|
## -- says no mo' args, just awk program txt
|
|
|
|
## '$0 ~ allow' will print things what match
|
|
|
|
# annihilation of fifthglyphful words - awk magic by Vurbositor
|
|
awk -v allow="^[0-9A-DF-Za-df-z$allow_xtra ]+$" -v RS='\\s+' -- '$0 ~ allow' | \
|
|
|
|
# disdigitification, uncapitalization (RIP onomastics for now)
|
|
tr -d '[:digit:]' | tr "A-Z" "a-z" | \
|
|
|
|
# sort and count totals, top to bottom
|
|
sort | uniq -c | sort -bnr > "${2}"
|
|
|
|
# FUZZY PLANS
|
|
# 'shakspar' singular-quotation-hug bug
|
|
# chomping £ "pound symbol" µ = "micro-" ¶ = "pilcrow" and so on with nonfifthglyphful long forms?)
|