#!/bin/bash # # AUTONYM # # disapparator.sh - fifthglyphful word disapparator # # INSTRUCTION # # chmod +x ./disapparator.sh # "./disapparator.sh $1 $2 (input.txt output.txt) # # AUTHORS # # Quinapalus # Vurbositor # # MODIFICATION LOG # # 0.2 chain of individual commands multiplying output .txts in profusion # 0.4 script unification, dash handling # 0.6 coauthor awk translation of main command # 0.A juggling. Ignoring dash-handling and smart quotation. Latin-unum, Latin-plus A and Latin plus-B support ## Starting two hash mark annotations by Vurbositor ## Pull this out to aid visibility: allow_xtra="ÀÁÂÃÄÅÇÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåçìíîïðñòóôõö÷øùúûüýþÿĀāĂ㥹ĆćĈĉĊċČčĎďĐđĜĝĞğĠġĢģĤĥĦħĨĩĪīĬĭĮįİıIJijĴĵĶķĸĹĺĻļĽľĿŀŁłŃńŅņŇňʼnŊŋŌōŎŏŐőŔŕŖŗŘřŚśŜŝŞşŠšŢţŤťŦŧŨũŪūŬŭŮůŰűŲųŴŵŶŷŸŹźŻżŽžſƀƁƂƃƄƅƆƇƈƉƊƋƌƍƏƑƒƓƔƕƖƗƘƙƚƛƜƝƞƟƠơƢƣƤƥƦƧƨƩƪƫƬƭƮƯưƱƲƳƴƵƶƷƸƹƺƻƼƽƿǀǁǂǃDŽDždžLJLjljNJNjnjǍǎǏǐǑǒǓǔǕǖǗǝǞǟǠǡǤǥǦǧǨǩǪǫǬǭǮǯǰDZDzdzǴǵǶǷǸǹǺǻǾǿȀȁȂȃȈȉȊȋȌȍȎȏȐȑȒȓȔȕȖȗȘșȚțȜȝȞȟȠȡȢȣȤȥȦȧȪȫȬȭȮȯȰȱȲȳȴȵȶȷȸȹȺȻȼȽȾȿɀɁɂɃɄɅɈɉɊɋɌɍɎɏ" # unpunctuation (saving - and ') cat "$1" | tr -d '!\"%\(\)*,./:;<=>?[\\]^_`{|}¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿•' | tr '\n' ' ' | \ ## -v assigns a variabl - allow, for this 'un ## a rgular xprssion, ## ^ for start ## a char class which might match various alpha-things ## put $allow_xtra bash var in it ## and a blank char ## + says 1 or mo' of that char class ## $ for nd of lin ## -v RS says what splits things (look in `man awk`) ## this is a rgx too, 1 or mo' blank chars ## -- says no mo' args, just awk program txt ## '$0 ~ allow' will print things what match # annihilation of fifthglyphful words - awk magic by Vurbositor awk -v allow="^[0-9A-DF-Za-df-z$allow_xtra ]+$" -v RS='\\s+' -- '$0 ~ allow' | \ # disdigitification, uncapitalization (RIP onomastics for now) tr -d '[:digit:]' | tr "A-Z" "a-z" | \ # sort and count totals, top to bottom sort | uniq -c | sort -bnr > "${2}" # FUZZY PLANS # 'shakspar' singular-quotation-hug bug # chomping £ "pound symbol" µ = "micro-" ¶ = "pilcrow" and so on with nonfifthglyphful long forms?)