A book about the command line for humans.

index.html 112KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183
  1. <!DOCTYPE html>
  2. <html lang=en>
  3. <head>
  4. <meta charset="utf-8">
  5. <title>userland: a book about the command line for humans</title>
  6. <link rel=stylesheet href="userland.css" />
  7. <link rel="alternate" type="application/atom+xml" title="changes" href="//p1k3.com/userland-book/feed.xml" />
  8. <script src="js/jquery.js" type="text/javascript"></script>
  9. </head>
  10. <body>
  11. <h1 class=bigtitle>userland</h1>
  12. <hr />
  13. <h1><a name=a-book-about-the-command-line-for-humans href=#a-book-about-the-command-line-for-humans>#</a> a book about the command line for humans</h1>
  14. <p>In the fall of 2013, <a href="//p1k3.com/2013/8/4">thinking about</a> text utilities got
  15. me thinking in turn about how my writing habits depend on the Linux command
  16. line. This seems like a good hook for explaining some tools I use every day,
  17. so now I&rsquo;m writing a short, haphazard book.</p>
  18. <p>This isn&rsquo;t a book about system administration, writing complex software, or
  19. becoming a wizard. I am not a wizard, and I don&rsquo;t subscribe to the idea that
  20. wizardry is required to use these tools. In fact, I barely know what I&rsquo;m doing
  21. most of the time. I still get some stuff done.</p>
  22. <p>This is a work in progress. It probably gets some stuff wrong.</p>
  23. <p>&ndash; bpb / <a href="https://p1k3.com">p1k3</a> / <a href="https://twitter.com/brennen">@brennen</a></p>
  24. <div class=details>
  25. <h2 class=clicker><a name=contents href=#contents>#</a> contents</h2>
  26. <div class=full>
  27. <div class=contents><ul>
  28. <li><a href="#a-book-about-the-command-line-for-humans">a book about the command line for humans</a>
  29. <ul>
  30. <li><a href="#contents">contents</a></li>
  31. </ul>
  32. </li>
  33. <li><a href="#get-you-a-shell">0. get you a shell</a>
  34. <ul>
  35. <li><a href="#get-an-account-on-a-social-unix-server">get an account on a social unix server</a></li>
  36. <li><a href="#use-a-raspberry-pi-or-beaglebone">use a raspberry pi or beaglebone</a></li>
  37. <li><a href="#use-a-virtual-machine">use a virtual machine</a></li>
  38. </ul>
  39. </li>
  40. <li><a href="#the-command-line-as-literary-environment">1. the command line as literary environment</a>
  41. <ul>
  42. <li><a href="#terms-and-definitions">terms and definitions</a></li>
  43. <li><a href="#twisty-little-passages">twisty little passages</a></li>
  44. <li><a href="#cat">cat</a></li>
  45. <li><a href="#wildcards">wildcards</a></li>
  46. <li><a href="#sort">sort</a></li>
  47. <li><a href="#options">options</a></li>
  48. <li><a href="#uniq">uniq</a></li>
  49. <li><a href="#standard-IO">standard IO</a></li>
  50. <li><a href="#code-help-code-and-man-pages"><code>&ndash;help</code> and man pages</a></li>
  51. <li><a href="#wc">wc</a></li>
  52. <li><a href="#head-tail-and-cut">head, tail, and cut</a></li>
  53. <li><a href="#tab-separated-values">tab separated values</a></li>
  54. <li><a href="#finding-text-grep">finding text: grep</a></li>
  55. <li><a href="#now-you-have-n-problems">now you have n problems</a></li>
  56. </ul>
  57. </li>
  58. <li><a href="#a-literary-problem">2. a literary problem</a></li>
  59. <li><a href="#programmerthink">3. programmerthink</a></li>
  60. <li><a href="#script">4. script</a>
  61. <ul>
  62. <li><a href="#learn-you-an-editor">learn you an editor</a></li>
  63. <li><a href="#d-i-y-utilities">d.i.y. utilities</a></li>
  64. <li><a href="#heavy-lifting">heavy lifting</a></li>
  65. <li><a href="#generality">generality</a></li>
  66. </ul>
  67. </li>
  68. <li><a href="#general-purpose-programmering">5. general purpose programmering</a></li>
  69. <li><a href="#one-of-these-things-is-not-like-the-others">6. one of these things is not like the others</a>
  70. <ul>
  71. <li><a href="#diff">diff</a></li>
  72. <li><a href="#wdiff">wdiff</a></li>
  73. </ul>
  74. </li>
  75. <li><a href="#the-command-line-as-as-a-shared-world">7. the command line as as a shared world</a></li>
  76. <li><a href="#the-command-line-and-the-web">8. the command line and the web</a></li>
  77. <li><a href="#a-miscellany-of-tools-and-techniques">9. a miscellany of tools and techniques</a>
  78. <ul>
  79. <li><a href="#dict">dict</a></li>
  80. <li><a href="#aspell">aspell</a></li>
  81. <li><a href="#mostcommon">mostcommon</a></li>
  82. <li><a href="#cal-and-ncal">cal and ncal</a></li>
  83. <li><a href="#seq">seq</a></li>
  84. <li><a href="#shuf">shuf</a></li>
  85. <li><a href="#ptx">ptx</a></li>
  86. <li><a href="#figlet">figlet</a></li>
  87. <li><a href="#cowsay">cowsay</a></li>
  88. </ul>
  89. </li>
  90. <li><a href="#endmatter">endmatter</a>
  91. <ul>
  92. <li><a href="#further-reading">further reading</a></li>
  93. <li><a href="#code">code</a></li>
  94. <li><a href="#copying">copying</a></li>
  95. </ul>
  96. </li>
  97. </ul>
  98. </div>
  99. </div>
  100. </div>
  101. <hr />
  102. <h1><a name=get-you-a-shell href=#get-you-a-shell>#</a> 0. get you a shell</h1>
  103. <p>You don&rsquo;t have to have a shell at hand to get something out of this book.
  104. Still, as with most practical subjects, you&rsquo;ll learn more if you try things out
  105. as you go. You shouldn&rsquo;t feel guilty about skipping this section. It will
  106. always be here later if you need it.</p>
  107. <p>Not so long ago, it was common for schools and ISPs to hand out shell accounts
  108. on big shared systems. People learned the command line as a side effect of
  109. reading their e-mail.</p>
  110. <p>That doesn&rsquo;t happen as often now, but in the meanwhile computers have become
  111. relatively cheap and free software is abundant. If you&rsquo;re reading this on the
  112. web, you can probably get access to a shell. Some options follow.</p>
  113. <h2><a name=get-an-account-on-a-social-unix-server href=#get-an-account-on-a-social-unix-server>#</a> get an account on a social unix server</h2>
  114. <p>Check out <a href="https://tilde.town/">tilde.town</a>:</p>
  115. <blockquote><p>tilde.town is an intentional digital community for making art, socializing, and
  116. learning. Unlike many online spaces, users interact with tilde.town through a
  117. direct connection instead of a web site. This means using a tool called ssh and
  118. other text based tools.</p></blockquote>
  119. <h2><a name=use-a-raspberry-pi-or-beaglebone href=#use-a-raspberry-pi-or-beaglebone>#</a> use a raspberry pi or beaglebone</h2>
  120. <p>Do you have a single-board computer laying around? Perfect. If you already
  121. run the standard Raspbian, Debian on a BeagleBone, or a similar-enough Linux,
  122. you don&rsquo;t need much else. I wrote most of this text on a Raspberry Pi, and the
  123. example commands should all work there.</p>
  124. <h2><a name=use-a-virtual-machine href=#use-a-virtual-machine>#</a> use a virtual machine</h2>
  125. <p>A few options:</p>
  126. <ul>
  127. <li><a href="https://docs.vagrantup.com/v2/getting-started/index.html">Use Vagrant to spin up a machine in Virtualbox</a></li>
  128. <li><a href="https://www.digitalocean.com/community/tutorials/how-to-create-your-first-digitalocean-droplet-virtual-server">Use DigitalOcean to create a remotely-hosted VM running Linux</a></li>
  129. </ul>
  130. <hr />
  131. <h1><a name=the-command-line-as-literary-environment href=#the-command-line-as-literary-environment>#</a> 1. the command line as literary environment</h1>
  132. <p>There&rsquo;re a lot of ways to structure an introduction to the command line. I&rsquo;m
  133. going to start with writing as a point of departure because, aside from web
  134. development, it&rsquo;s what I use a computer for most. I want to shine a light on
  135. the humane potential of ideas that are usually understood as nerd trivia.
  136. Computers have utterly transformed the practice of writing within the space of
  137. my lifetime, but it seems to me that writers as a class miss out on many of the
  138. software tools and patterns taken as a given in more &ldquo;technical&rdquo; fields.</p>
  139. <p>Writing, particularly writing of any real scope or complexity, is very much a
  140. technical task. It makes demands, both physical and psychological, of its
  141. practitioners. As with woodworkers, graphic artists, and farmers, writers
  142. exhibit strong preferences in their tools, materials, and environment, and they
  143. do so because they&rsquo;re engaged in a physically and cognitively challenging task.</p>
  144. <p>My thesis is that the modern Linux command line is a pretty good environment
  145. for working with English prose and prosody, and that maybe this will illuminate
  146. the ways it could be useful in your own work with a computer, whatever that
  147. work happens to be.</p>
  148. <h2><a name=terms-and-definitions href=#terms-and-definitions>#</a> terms and definitions</h2>
  149. <p>What software are we actually talking about when we say &ldquo;the command line&rdquo;?</p>
  150. <p>For the purposes of this discussion, we&rsquo;re talking about an environment built
  151. on a very old paradigm called Unix.</p>
  152. <p style="text-align:center;"> <img src="images/jp_unix.jpg" height=320 width=470></p>
  153. <p>&hellip;except what classical Unix really looks like is this:</p>
  154. <p style="text-align:center;"> <img src="images/blinking.gif" width=470></p>
  155. <p>The Unix-like environment we&rsquo;re going to use isn&rsquo;t very classical, really.
  156. It&rsquo;s an operating system kernel called Linux, combined with a bunch of things
  157. written by other people (people in the GNU and Debian projects, and many
  158. others). Purists will tell you that this isn&rsquo;t properly Unix at all. In
  159. strict historical terms they&rsquo;re right, or at least a certain kind of right, but
  160. for the purposes of my cultural agenda I&rsquo;m going to ignore them right now.</p>
  161. <p style="text-align:center;"> <img src="images/debian.png"></p>
  162. <p>This is what&rsquo;s called a shell. There are many different shells, but they
  163. pretty much all operate on the same idea: You navigate a filesystem and run
  164. programs by typing commands. Commands can be combined in various ways to make
  165. programs of their own, and in fact the way you use the computer is often just
  166. to write little programs that invoke other programs, turtles-all-the-way-down
  167. style.</p>
  168. <p>The standard shell these days is something called Bash, so we&rsquo;ll use Bash.
  169. It&rsquo;s what you&rsquo;ll most often see in the wild. Like most shells, Bash is ugly
  170. and stupid in more ways than it is possible to easily summarize. It&rsquo;s also an
  171. incredibly powerful and expressive piece of software.</p>
  172. <h2><a name=twisty-little-passages href=#twisty-little-passages>#</a> twisty little passages</h2>
  173. <p>Have you ever played a text-based adventure game or MUD, of the kind that
  174. describes a setting and takes commands for movement and so on? Readers of a
  175. certain age and temperament might recognize the opening of Crowther &amp; Woods'
  176. <em>Adventure</em>, the great-granddaddy of text adventure games:</p>
  177. <pre><code>YOU ARE STANDING AT THE END OF A ROAD BEFORE A SMALL BRICK BUILDING.
  178. AROUND YOU IS A FOREST. A SMALL STREAM FLOWS OUT OF THE BUILDING ANd
  179. DOWN A GULLY.
  180. &gt; GO EAST
  181. YOU ARE INSIDE A BUILDING, A WELL HOUSE FOR A LARGE SPRING.
  182. THERE ARE SOME KEYS ON THE GROUND HERE.
  183. THERE IS A SHINY BRASS LAMP NEARBY.
  184. THERE IS FOOD HERE.
  185. THERE IS A BOTTLE OF WATER HERE.
  186. </code></pre>
  187. <p>You can think of the shell as a kind of environment you inhabit, in much the
  188. way your character inhabits an adventure game. The difference is that instead
  189. of navigating around virtual rooms and hallways with commands like <code>LOOK</code> and
  190. <code>EAST</code>, you navigate between directories by typing commands like <code>ls</code> and <code>cd
  191. notes</code>:</p>
  192. <pre><code>$ ls
  193. code Downloads notes p1k3 photos scraps userland-book
  194. $ cd notes
  195. $ ls
  196. notes.txt sparkfun TODO.txt
  197. </code></pre>
  198. <p><code>ls</code> lists files. Some files are directories, which means they can contain
  199. other files, and you can step inside of them by typing <code>cd</code> (for <strong>c</strong>hange
  200. <strong>d</strong>irectory).</p>
  201. <p>In the Macintosh and Windows world, directories have been called
  202. &ldquo;folders&rdquo; for a long time now. This isn&rsquo;t the <em>worst</em> metaphor for what&rsquo;s
  203. going on, and it&rsquo;s so pervasive by now that it&rsquo;s not worth fighting about.
  204. It&rsquo;s also not exactly a <em>great</em> metaphor, since computer filesystems aren&rsquo;t
  205. built very much like the filing cabinets of yore. A directory acts a lot like
  206. a container of some sort, but it&rsquo;s an infinitely expandable one which may
  207. contain nested sub-spaces much larger than itself. Directories are frequently
  208. like the TARDIS: Bigger on the inside.</p>
  209. <h2><a name=cat href=#cat>#</a> cat</h2>
  210. <p>When you&rsquo;re in the shell, you have many tools at your disposal - programs that
  211. can be used on many different files, or chained together with other programs.
  212. They tend to have weird, cryptic names, but a lot of them do very simple
  213. things. Tasks that might be a menu item in a big program like Word, like
  214. counting the number of words in a document or finding a particular phrase, are
  215. often programs unto themselves. We&rsquo;ll start with something even more basic
  216. than that.</p>
  217. <p>Suppose you have some files, and you&rsquo;re curious what&rsquo;s in them. For example,
  218. suppose you&rsquo;ve got a list of authors you&rsquo;re planning to reference, and you just
  219. want to check its contents real quick-like. This is where our friend <code>cat</code>
  220. comes in:</p>
  221. <!-- exec -->
  222. <pre><code>$ cat authors_sff
  223. Ursula K. Le Guin
  224. Jo Walton
  225. Pat Cadigan
  226. John Ronald Reuel Tolkien
  227. Vanessa Veselka
  228. James Tiptree, Jr.
  229. John Brunner
  230. </code></pre>
  231. <!-- end -->
  232. <p>&ldquo;Why,&rdquo; you might be asking, &ldquo;is the command to dump out the contents of a file
  233. to a screen called <code>cat</code>? What do felines have to do with anything?&rdquo;</p>
  234. <p>It turns out that <code>cat</code> is actually short for &ldquo;catenate&rdquo;, which is a long
  235. word basically meaning &ldquo;stick things together&rdquo;. In programming, we usually
  236. refer to sticking two bits of text together as &ldquo;string concatenation&rdquo;, probably
  237. because programmers like to feel like they&rsquo;re being very precise about very
  238. simple actions.</p>
  239. <p>Suppose you wanted to see the contents of a <em>set</em> of author lists:</p>
  240. <!-- exec -->
  241. <pre><code>$ cat authors_sff authors_contemporary_fic authors_nat_hist
  242. Ursula K. Le Guin
  243. Jo Walton
  244. Pat Cadigan
  245. John Ronald Reuel Tolkien
  246. Vanessa Veselka
  247. James Tiptree, Jr.
  248. John Brunner
  249. Eden Robinson
  250. Vanessa Veselka
  251. Miriam Toews
  252. Gwendolyn L. Waring
  253. </code></pre>
  254. <!-- end -->
  255. <h2><a name=wildcards href=#wildcards>#</a> wildcards</h2>
  256. <p>We&rsquo;re working with three filenames: <code>authors_sff</code>, <code>authors_contemporary_fic</code>,
  257. and <code>authors_nat_hist</code>. That&rsquo;s an awful lot of typing every time we want to do
  258. something to all three files. Fortunately, our shell offers a shorthand for
  259. &ldquo;all the files that start with <code>authors_</code>&rdquo;:</p>
  260. <!-- exec -->
  261. <pre><code>$ cat authors_*
  262. Eden Robinson
  263. Vanessa Veselka
  264. Miriam Toews
  265. Gwendolyn L. Waring
  266. Ursula K. Le Guin
  267. Jo Walton
  268. Pat Cadigan
  269. John Ronald Reuel Tolkien
  270. Vanessa Veselka
  271. James Tiptree, Jr.
  272. John Brunner
  273. </code></pre>
  274. <!-- end -->
  275. <p>In Bash-land, <code>*</code> basically means &ldquo;anything&rdquo;, and is known in the vernacular,
  276. somewhat poetically, as a &ldquo;wildcard&rdquo;. You should always be careful with
  277. wildcards, especially if you&rsquo;re doing anything destructive. They can and will
  278. surprise the unwary. Still, once you&rsquo;re used to the idea, they will save you a
  279. lot of RSI.</p>
  280. <h2><a name=sort href=#sort>#</a> sort</h2>
  281. <p>There&rsquo;s a problem here. Our author list is out of order, and thus confusing to
  282. reference. Fortunately, since one of the most basic things you can do to a
  283. list is to sort it, someone else has already solved this problem for us.
  284. Here&rsquo;s a command that will give us some organization:</p>
  285. <!-- exec -->
  286. <pre><code>$ sort authors_*
  287. Eden Robinson
  288. Gwendolyn L. Waring
  289. James Tiptree, Jr.
  290. John Brunner
  291. John Ronald Reuel Tolkien
  292. Jo Walton
  293. Miriam Toews
  294. Pat Cadigan
  295. Ursula K. Le Guin
  296. Vanessa Veselka
  297. Vanessa Veselka
  298. </code></pre>
  299. <!-- end -->
  300. <p>Does it bother you that they aren&rsquo;t sorted by last name? Me too. As a partial
  301. solution, we can ask <code>sort</code> to use the second &ldquo;field&rdquo; in each line as its sort
  302. <strong>k</strong>ey (by default, sort treats whitespace as a division between fields):</p>
  303. <!-- exec -->
  304. <pre><code>$ sort -k2 authors_*
  305. John Brunner
  306. Pat Cadigan
  307. Ursula K. Le Guin
  308. Gwendolyn L. Waring
  309. Eden Robinson
  310. John Ronald Reuel Tolkien
  311. James Tiptree, Jr.
  312. Miriam Toews
  313. Vanessa Veselka
  314. Vanessa Veselka
  315. Jo Walton
  316. </code></pre>
  317. <!-- end -->
  318. <p>That&rsquo;s closer, right? It sorted on &ldquo;Cadigan&rdquo; and &ldquo;Veselka&rdquo; instead of &ldquo;Pat&rdquo;
  319. and &ldquo;Vanessa&rdquo;. (Of course, it&rsquo;s still far from perfect, because the
  320. second field in each line isn&rsquo;t necessarily the person&rsquo;s last name.)</p>
  321. <h2><a name=options href=#options>#</a> options</h2>
  322. <p>Above, when we wanted to ask <code>sort</code> to behave differently, we gave it what is
  323. known as an option. Most programs with command-line interfaces will allow
  324. their behavior to be changed by adding various options. Options usually
  325. (but not always!) look like <code>-o</code> or <code>--option</code>.</p>
  326. <p>For example, if we wanted to see just the unique lines, irrespective of case,
  327. for a file called colors:</p>
  328. <!-- exec -->
  329. <pre><code>$ cat colors
  330. RED
  331. blue
  332. red
  333. BLUE
  334. Green
  335. green
  336. GREEN
  337. </code></pre>
  338. <!-- end -->
  339. <p>We could write this:</p>
  340. <!-- exec -->
  341. <pre><code>$ sort -uf colors
  342. blue
  343. Green
  344. RED
  345. </code></pre>
  346. <!-- end -->
  347. <p>Here <code>-u</code> stands for <strong>u</strong>nique and <code>-f</code> stands for <strong>f</strong>old case, which means
  348. to treat upper- and lower-case letters as the same for comparison purposes. You&rsquo;ll
  349. often see a group of short options following the <code>-</code> like this.</p>
  350. <h2><a name=uniq href=#uniq>#</a> uniq</h2>
  351. <p>Did you notice how Vanessa Veselka shows up twice in our list of authors?
  352. That&rsquo;s useful if we want to remember that she&rsquo;s in more than one category, but
  353. it&rsquo;s redundant if we&rsquo;re just worried about membership in the overall set of
  354. authors. We can make sure our list doesn&rsquo;t contain repeating lines by using
  355. <code>sort</code>, just like with that list of colors:</p>
  356. <!-- exec -->
  357. <pre><code>$ sort -u -k2 authors_*
  358. John Brunner
  359. Pat Cadigan
  360. Ursula K. Le Guin
  361. Gwendolyn L. Waring
  362. Eden Robinson
  363. John Ronald Reuel Tolkien
  364. James Tiptree, Jr.
  365. Miriam Toews
  366. Vanessa Veselka
  367. Jo Walton
  368. </code></pre>
  369. <!-- end -->
  370. <p>But there&rsquo;s another approach to this &mdash; <code>sort</code> is good at only displaying a line
  371. once, but suppose we wanted to see a count of how many different lists an
  372. author shows up on? <code>sort</code> doesn&rsquo;t do that, but a command called <code>uniq</code> does,
  373. if you give it the option <code>-c</code> for <strong>c</strong>ount.</p>
  374. <p><code>uniq</code> moves through the lines in its input, and if it sees a line more than
  375. once in sequence, it will only print that line once. If you have a bunch of
  376. files and you just want to see the unique lines across all of those files, you
  377. probably need to run them through <code>sort</code> first. How do you do that?</p>
  378. <!-- exec -->
  379. <pre><code>$ sort authors_* | uniq -c
  380. 1 Eden Robinson
  381. 1 Gwendolyn L. Waring
  382. 1 James Tiptree, Jr.
  383. 1 John Brunner
  384. 1 John Ronald Reuel Tolkien
  385. 1 Jo Walton
  386. 1 Miriam Toews
  387. 1 Pat Cadigan
  388. 1 Ursula K. Le Guin
  389. 2 Vanessa Veselka
  390. </code></pre>
  391. <!-- end -->
  392. <h2><a name=standard-IO href=#standard-IO>#</a> standard IO</h2>
  393. <p>The <code>|</code> is called a &ldquo;pipe&rdquo;. In the command above, it tells your shell that
  394. instead of printing the output of <code>sort authors_*</code> right to your terminal, it
  395. should send it to <code>uniq -c</code>.</p>
  396. <p style="text-align:center;"> <img src="images/pipe.gif"></p>
  397. <p>Pipes are some of the most important magic in the shell. When the people who
  398. built Unix in the first place give interviews about the stuff they remember
  399. from the early days, a lot of them reminisce about the invention of pipes and
  400. all of the new stuff it immediately made possible.</p>
  401. <p>Pipes help you control a thing called &ldquo;standard IO&rdquo;. In the world of the
  402. command line, programs take <strong>i</strong>nput and produce <strong>o</strong>utput. A pipe is a way
  403. to hook the output from one program to the input of another.</p>
  404. <p>Unlike a lot of the weirdly named things you&rsquo;ll encounter in software, the
  405. metaphor here is obvious and makes pretty good sense. It even kind of looks
  406. like a physical pipe.</p>
  407. <p>What if, instead of sending the output of one program to the input of another,
  408. you&rsquo;d like to store it in a file for later use?</p>
  409. <p>Check it out:</p>
  410. <!-- exec -->
  411. <pre><code>$ sort authors_* | uniq &gt; ./all_authors
  412. </code></pre>
  413. <!-- end -->
  414. <!-- exec -->
  415. <pre><code>$ cat all_authors
  416. Eden Robinson
  417. Gwendolyn L. Waring
  418. James Tiptree, Jr.
  419. John Brunner
  420. John Ronald Reuel Tolkien
  421. Jo Walton
  422. Miriam Toews
  423. Pat Cadigan
  424. Ursula K. Le Guin
  425. Vanessa Veselka
  426. </code></pre>
  427. <!-- end -->
  428. <p>I like to think of the <code>&gt;</code> as looking like a little funnel. It can be
  429. dangerous &mdash; you should always make sure that you&rsquo;re not going to clobber
  430. an existing file you actually want to keep.</p>
  431. <p>If you want to tack more stuff on to the end of an existing file, you can use
  432. <code>&gt;&gt;</code> instead. To test that, let&rsquo;s use <code>echo</code>, which prints out whatever string
  433. you give it on a line by itself:</p>
  434. <!-- exec -->
  435. <pre><code>$ echo 'hello' &gt; hello_world
  436. </code></pre>
  437. <!-- end -->
  438. <!-- exec -->
  439. <pre><code>$ echo 'world' &gt;&gt; hello_world
  440. </code></pre>
  441. <!-- end -->
  442. <!-- exec -->
  443. <pre><code>$ cat hello_world
  444. hello
  445. world
  446. </code></pre>
  447. <!-- end -->
  448. <p>You can also take a file and pull it directly back into the input of a given
  449. program, which is a bit like a funnel going the other direction:</p>
  450. <!-- exec -->
  451. <pre><code>$ nl &lt; all_authors
  452. 1 Eden Robinson
  453. 2 Gwendolyn L. Waring
  454. 3 James Tiptree, Jr.
  455. 4 John Brunner
  456. 5 John Ronald Reuel Tolkien
  457. 6 Jo Walton
  458. 7 Miriam Toews
  459. 8 Pat Cadigan
  460. 9 Ursula K. Le Guin
  461. 10 Vanessa Veselka
  462. </code></pre>
  463. <!-- end -->
  464. <p><code>nl</code> is just a way to <strong>n</strong>umber <strong>l</strong>ines. This command accomplishes pretty much
  465. the same thing as <code>cat all_authors | nl</code>, or <code>nl all_authors</code>. You won&rsquo;t see
  466. it used as often as <code>|</code> and <code>&gt;</code>, since most utilities can read files on their
  467. own, but it can save you typing <code>cat</code> quite as often.</p>
  468. <p>We&rsquo;ll use these features liberally from here on out.</p>
  469. <h2><a name=code-help-code-and-man-pages href=#code-help-code-and-man-pages>#</a> <code>--help</code> and man pages</h2>
  470. <p>You can change the behavior of most tools by giving them different options.
  471. This is all well and good if you already know what options are available,
  472. but what if you don&rsquo;t?</p>
  473. <p>Often, you can ask the tool itself:</p>
  474. <pre><code>$ sort --help
  475. Usage: sort [OPTION]... [FILE]...
  476. or: sort [OPTION]... --files0-from=F
  477. Write sorted concatenation of all FILE(s) to standard output.
  478. Mandatory arguments to long options are mandatory for short options too.
  479. Ordering options:
  480. -b, --ignore-leading-blanks ignore leading blanks
  481. -d, --dictionary-order consider only blanks and alphanumeric characters
  482. -f, --ignore-case fold lower case to upper case characters
  483. -g, --general-numeric-sort compare according to general numerical value
  484. -i, --ignore-nonprinting consider only printable characters
  485. -M, --month-sort compare (unknown) &lt; 'JAN' &lt; ... &lt; 'DEC'
  486. -h, --human-numeric-sort compare human readable numbers (e.g., 2K 1G)
  487. -n, --numeric-sort compare according to string numerical value
  488. -R, --random-sort sort by random hash of keys
  489. --random-source=FILE get random bytes from FILE
  490. -r, --reverse reverse the result of comparisons
  491. </code></pre>
  492. <p>&hellip;and so on. (It goes on for a while in this vein.)</p>
  493. <p>If that doesn&rsquo;t work, or doesn&rsquo;t provide enough info, the next thing to try is
  494. called a man page. (&ldquo;man&rdquo; is short for &ldquo;manual&rdquo;. It&rsquo;s sort of an unfortunate
  495. abbreviation.)</p>
  496. <pre><code>$ man sort
  497. SORT(1) User Commands SORT(1)
  498. NAME
  499. sort - sort lines of text files
  500. SYNOPSIS
  501. sort [OPTION]... [FILE]...
  502. sort [OPTION]... --files0-from=F
  503. DESCRIPTION
  504. Write sorted concatenation of all FILE(s) to standard output.
  505. </code></pre>
  506. <p>&hellip;and so on. Manual pages vary in quality, and it can take a while to get
  507. used to reading them, but they&rsquo;re very often the best place to look for help.</p>
  508. <p>If you&rsquo;re not sure what <em>program</em> you want to use to solve a given problem, you
  509. might try searching all the man pages on the system for a keyword. <code>man</code>
  510. itself has an option to let you do this - <code>man -k keyword</code> - but most systems
  511. also have a shortcut called <code>apropos</code>, which I like to use because it&rsquo;s easy to
  512. remember if you imagine yourself saying &ldquo;apropos of [some problem I have]&hellip;&rdquo;</p>
  513. <!-- exec -->
  514. <pre><code>$ apropos -s1 sort
  515. apt-sortpkgs (1) - Utility to sort package index files
  516. bunzip2 (1) - a block-sorting file compressor, v1.0.6
  517. bzip2 (1) - a block-sorting file compressor, v1.0.6
  518. comm (1) - compare two sorted files line by line
  519. sort (1) - sort lines of text files
  520. tsort (1) - perform topological sort
  521. </code></pre>
  522. <!-- end -->
  523. <p>It&rsquo;s useful to know that the manual represented by <code>man</code> has numbered sections
  524. for different kinds of manual pages. Most of what the average user needs to
  525. know about lives in section 1, &ldquo;User Commands&rdquo;, so you&rsquo;ll often see the names
  526. of different tools written like <code>sort(1)</code> or <code>cat(1)</code>. This can be a good way
  527. to make it clear in writing that you&rsquo;re talking about a specific piece of
  528. software rather than a verb or a small carnivorous mammal. (I specified <code>-s1</code>
  529. for section 1 above just to cut down on clutter, though in practice I usually
  530. don&rsquo;t bother.)</p>
  531. <p>Like other literary traditions, Unix is littered with this sort of convention.
  532. This one just happens to date from a time when the manual was still a physical
  533. book.</p>
  534. <h2><a name=wc href=#wc>#</a> wc</h2>
  535. <p><code>wc</code> stands for <strong>w</strong>ord <strong>c</strong>ount. It does about what you&rsquo;d expect - it
  536. counts the number of words in its input.</p>
  537. <pre><code>$ wc index.md
  538. 736 4117 24944 index.md
  539. </code></pre>
  540. <p>736 is the number of lines, 4117 the number of words, and 24944 the number of
  541. characters in the file I&rsquo;m writing right now. I use this constantly. Most
  542. obviously, it&rsquo;s a good way to get an idea of how much you&rsquo;ve written. <code>wc</code> is
  543. the tool I used to track my progress the last time I tried National Novel
  544. Writing Month:</p>
  545. <pre><code>$ find ~/p1k3/archives/2010/11 -regextype egrep -regex '.*([0-9]+|index)' -type f | xargs wc -w | tail -1
  546. 6585 total
  547. </code></pre>
  548. <!-- exec -->
  549. <pre><code>$ cowsay 'embarrassing.'
  550. _______________
  551. &lt; embarrassing. &gt;
  552. ---------------
  553. \ ^__^
  554. \ (oo)\_______
  555. (__)\ )\/\
  556. ||----w |
  557. || ||
  558. </code></pre>
  559. <!-- end -->
  560. <p>Anyway. The less obvious thing about <code>wc</code> is that you can use it to count the
  561. output of other commands. Want to know <em>how many</em> unique authors we have?</p>
  562. <!-- exec -->
  563. <pre><code>$ sort authors_* | uniq | wc -l
  564. 10
  565. </code></pre>
  566. <!-- end -->
  567. <p>This kind of thing is trivial, but it comes in handy more often than you might
  568. think.</p>
  569. <h2><a name=head-tail-and-cut href=#head-tail-and-cut>#</a> head, tail, and cut</h2>
  570. <p>Remember our old pal <code>cat</code>, which just splats everything it&rsquo;s given back to
  571. standard output?</p>
  572. <p>Sometimes you&rsquo;ve got a piece of output that&rsquo;s more than you actually want to
  573. deal with at once. Maybe you just want to glance at the first few lines in a
  574. file:</p>
  575. <!-- exec -->
  576. <pre><code>$ head -3 colors
  577. RED
  578. blue
  579. red
  580. </code></pre>
  581. <!-- end -->
  582. <p>&hellip;or maybe you want to see the last thing in a list:</p>
  583. <!-- exec -->
  584. <pre><code>$ sort colors | uniq -i | tail -1
  585. red
  586. </code></pre>
  587. <!-- end -->
  588. <p>&hellip;or maybe you&rsquo;re only interested in the first &ldquo;field&rdquo; in some list. You might
  589. use <code>cut</code> here, asking it to treat spaces as delimiters between fields and
  590. return only the first field for each line of its input:</p>
  591. <!-- exec -->
  592. <pre><code>$ cut -d' ' -f1 ./authors_*
  593. Eden
  594. Vanessa
  595. Miriam
  596. Gwendolyn
  597. Ursula
  598. Jo
  599. Pat
  600. John
  601. Vanessa
  602. James
  603. John
  604. </code></pre>
  605. <!-- end -->
  606. <p>Suppose we&rsquo;re curious what the few most commonly occurring first names on our
  607. author list are? Here&rsquo;s an approach, silly but effective, that combines a lot
  608. of what we&rsquo;ve discussed so far and looks like plenty of one-liners I wind up
  609. writing in real life:</p>
  610. <!-- exec -->
  611. <pre><code>$ cut -d' ' -f1 ./authors_* | sort | uniq -ci | sort -n | tail -3
  612. 1 Ursula
  613. 2 John
  614. 2 Vanessa
  615. </code></pre>
  616. <!-- end -->
  617. <p>Let&rsquo;s walk through this one step by step:</p>
  618. <p>First, we have <code>cut</code> extract the first field of each line in our author lists.</p>
  619. <pre><code>cut -d' ' -f1 ./authors_*
  620. </code></pre>
  621. <p>Then we sort these results</p>
  622. <pre><code>| sort
  623. </code></pre>
  624. <p>and pass them to <code>uniq</code>, asking it for a case-insensitive count of each
  625. repeated line</p>
  626. <pre><code>| uniq -ci
  627. </code></pre>
  628. <p>then sort again, numerically,</p>
  629. <pre><code>| sort -n
  630. </code></pre>
  631. <p>and finally, we chop off everything but the last three lines:</p>
  632. <pre><code>| tail -3
  633. </code></pre>
  634. <p>If you wanted to make sure to count an individual author&rsquo;s first name
  635. only once, even if that author appears more than once in the files,
  636. you could instead do:</p>
  637. <!-- exec -->
  638. <pre><code>$ sort -u ./authors_* | cut -d' ' -f1 | uniq -ci | sort -n | tail -3
  639. 1 Ursula
  640. 1 Vanessa
  641. 2 John
  642. </code></pre>
  643. <!-- end -->
  644. <h2><a name=tab-separated-values href=#tab-separated-values>#</a> tab separated values</h2>
  645. <p>Notice above how we had to tell <code>cut</code> that &ldquo;fields&rdquo; in <code>authors_*</code> are
  646. delimited by spaces? It turns out that if you don&rsquo;t use <code>-d</code>, <code>cut</code> defaults
  647. to using tab characters for a delimiter.</p>
  648. <p>Tab characters are sort of weird little animals. You can&rsquo;t usually <em>see</em> them
  649. directly &mdash; they&rsquo;re like a space character that takes up more than one space
  650. when displayed. By convention, one tab is usually rendered as 8 spaces, but
  651. it&rsquo;s up to the software that&rsquo;s displaying the character what it wants to do.</p>
  652. <p>(In fact, it&rsquo;s more complicated than that: Tabs are often rendered as marking
  653. <em>tab stops</em>, which is a concept I remember from 7th grade typing classes, but
  654. haven&rsquo;t actually thought about in my day-to-day life for nearly 20 years.)</p>
  655. <p>Here&rsquo;s a version of our <code>all_authors</code> that&rsquo;s been rearranged so that the first
  656. field is the author&rsquo;s last name, the second is their first name, the third is
  657. their middle name or initial (if we know it) and the fourth is any suffix.
  658. Fields are separated by a single tab character:</p>
  659. <!-- exec -->
  660. <pre><code>$ cat all_authors.tsv
  661. Robinson Eden
  662. Waring Gwendolyn L.
  663. Tiptree James Jr.
  664. Brunner John
  665. Tolkien John Ronald Reuel
  666. Walton Jo
  667. Toews Miriam
  668. Cadigan Pat
  669. Le Guin Ursula K.
  670. Veselka Vanessa
  671. </code></pre>
  672. <!-- end -->
  673. <p>That looks kind of garbled, right? In order to make it a little more obvious
  674. what&rsquo;s happening, let&rsquo;s use <code>cat -T</code>, which displays tab characters as <code>^I</code>:</p>
  675. <!-- exec -->
  676. <pre><code>$ cat -T all_authors.tsv
  677. Robinson^IEden
  678. Waring^IGwendolyn^IL.
  679. Tiptree^IJames^I^IJr.
  680. Brunner^IJohn
  681. Tolkien^IJohn^IRonald Reuel
  682. Walton^IJo
  683. Toews^IMiriam
  684. Cadigan^IPat
  685. Le Guin^IUrsula^IK.
  686. Veselka^IVanessa
  687. </code></pre>
  688. <!-- end -->
  689. <p>It looks odd when displayed because some names are at or nearly at 8 characters long.
  690. &ldquo;Robinson&rdquo;, at 8 characters, overshoots the first tab stop, so &ldquo;Eden&rdquo; gets indented
  691. further than other first names, and so on.</p>
  692. <p>Fortunately, in order to make this more human-readable, we can pass it through
  693. <code>expand</code>, which turns tabs into a given number of spaces (8 by default):</p>
  694. <!-- exec -->
  695. <pre><code>$ expand -t14 all_authors.tsv
  696. Robinson Eden
  697. Waring Gwendolyn L.
  698. Tiptree James Jr.
  699. Brunner John
  700. Tolkien John Ronald Reuel
  701. Walton Jo
  702. Toews Miriam
  703. Cadigan Pat
  704. Le Guin Ursula K.
  705. Veselka Vanessa
  706. </code></pre>
  707. <!-- end -->
  708. <p>Now it&rsquo;s easy to sort by last name:</p>
  709. <!-- exec -->
  710. <pre><code>$ sort -k1 all_authors.tsv | expand -t14
  711. Brunner John
  712. Cadigan Pat
  713. Le Guin Ursula K.
  714. Robinson Eden
  715. Tiptree James Jr.
  716. Toews Miriam
  717. Tolkien John Ronald Reuel
  718. Veselka Vanessa
  719. Walton Jo
  720. Waring Gwendolyn L.
  721. </code></pre>
  722. <!-- end -->
  723. <p>Or just extract middle names and initials:</p>
  724. <!-- exec -->
  725. <pre><code>$ cut -f3 all_authors.tsv
  726. L.
  727. Ronald Reuel
  728. K.
  729. </code></pre>
  730. <!-- end -->
  731. <p>It probably won&rsquo;t surprise you to learn that there&rsquo;s a corresponding <code>paste</code>
  732. command, which takes two or more files and stitches them together with tab
  733. characters. Let&rsquo;s extract a couple of things from our author list and put them
  734. back together in a different order:</p>
  735. <!-- exec -->
  736. <pre><code>$ cut -f1 all_authors.tsv &gt; lastnames
  737. </code></pre>
  738. <!-- end -->
  739. <!-- exec -->
  740. <pre><code>$ cut -f2 all_authors.tsv &gt; firstnames
  741. </code></pre>
  742. <!-- end -->
  743. <!-- exec -->
  744. <pre><code>$ paste firstnames lastnames | sort -k2 | expand -t12
  745. John Brunner
  746. Pat Cadigan
  747. Ursula Le Guin
  748. Eden Robinson
  749. James Tiptree
  750. Miriam Toews
  751. John Tolkien
  752. Vanessa Veselka
  753. Jo Walton
  754. Gwendolyn Waring
  755. </code></pre>
  756. <!-- end -->
  757. <p>As these examples show, TSV is something very like a primitive spreadsheet: A
  758. way to represent information in columns and rows. In fact, it&rsquo;s a close cousin
  759. of CSV, which is often used as a lowest-common-denominator format for
  760. transferring spreadsheets, and which represents data something like this:</p>
  761. <pre><code>last,first,middle,suffix
  762. Tolkien,John,Ronald Reuel,
  763. Tiptree,James,,Jr.
  764. </code></pre>
  765. <p>The advantage of tabs is that they&rsquo;re supported by a bunch of the standard
  766. tools. A disadvantage is that they&rsquo;re kind of ugly and can be weird to deal
  767. with, but they&rsquo;re useful anyway, and character-delimited rows are often a
  768. good-enough way to hack your way through problems that call for basic
  769. structure.</p>
  770. <h2><a name=finding-text-grep href=#finding-text-grep>#</a> finding text: grep</h2>
  771. <p>After all those contortions, what if you actually just want to see <em>which lists</em>
  772. an individual author appears on?</p>
  773. <!-- exec -->
  774. <pre><code>$ grep 'Vanessa' ./authors_*
  775. ./authors_contemporary_fic:Vanessa Veselka
  776. ./authors_sff:Vanessa Veselka
  777. </code></pre>
  778. <!-- end -->
  779. <p><code>grep</code> takes a string to search for and, optionally, a list of files to search
  780. in. If you don&rsquo;t specify files, it&rsquo;ll look through standard input instead:</p>
  781. <!-- exec -->
  782. <pre><code>$ cat ./authors_* | grep 'Vanessa'
  783. Vanessa Veselka
  784. Vanessa Veselka
  785. </code></pre>
  786. <!-- end -->
  787. <p>Most of the time, piping the output of <code>cat</code> to <code>grep</code> is considered silly,
  788. because <code>grep</code> knows how to find things in files on its own. Many thousands of
  789. words have been written on this topic by leading lights of the nerd community.</p>
  790. <p>You&rsquo;ve probably noticed that this result doesn&rsquo;t contain filenames (and thus
  791. isn&rsquo;t very useful to us). That&rsquo;s because all <code>grep</code> saw was the lines in the
  792. files, not the names of the files themselves.</p>
  793. <h2><a name=now-you-have-n-problems href=#now-you-have-n-problems>#</a> now you have n problems</h2>
  794. <p>To close out this introductory chapter, let&rsquo;s spend a little time on a topic
  795. that will likely vex, confound, and (occasionally) delight you for as long as
  796. you are acquainted with the command line.</p>
  797. <p>When I was talking about <code>grep</code> a moment ago, I fudged the details more than a
  798. little by saying that it expects a string to search for. What <code>grep</code>
  799. <em>actually</em> expects is a <em>pattern</em>. Moreover, it expects a specific kind of
  800. pattern, what&rsquo;s known as a <em>regular expression</em>, a cumbersome phrase frequently
  801. shortened to regex.</p>
  802. <p>There&rsquo;s a lot of theory about what makes up a regular expression. Fortunately,
  803. very little of it matters to the short version that will let you get useful
  804. stuff done. The short version is that a regex is like using wildcards in the
  805. shell to match groups of files, but for text in general and with more magic.</p>
  806. <!-- exec -->
  807. <pre><code>$ grep 'Jo.*' ./authors_*
  808. ./authors_sff:Jo Walton
  809. ./authors_sff:John Ronald Reuel Tolkien
  810. ./authors_sff:John Brunner
  811. </code></pre>
  812. <!-- end -->
  813. <p>The pattern <code>Jo.*</code> says that we&rsquo;re looking for lines which contain a literal
  814. <code>Jo</code>, followed by any quantity (including none) of any character. In a regex,
  815. <code>.</code> means &ldquo;anything&rdquo; and <code>*</code> means &ldquo;any amount of the preceding thing&rdquo;.</p>
  816. <p><code>.</code> and <code>*</code> are magical. In the particular dialect of regexen understood
  817. by <code>grep</code>, other magical things include:</p>
  818. <table>
  819. <tr><td><code>^</code> </td> <td>start of a line </td></tr>
  820. <tr><td><code>$</code> </td> <td>end of a line </td></tr>
  821. <tr><td><code>[abc]</code></td> <td>one of a, b, or c </td></tr>
  822. <tr><td><code>[a-z]</code></td> <td>a character in the range a through z</td></tr>
  823. <tr><td><code>[0-9]</code></td> <td>a character in the range 0 through 9</td></tr>
  824. <tr><td><code>+</code> </td> <td>one or more of the preceding thing </td></tr>
  825. <tr><td><code>?</code> </td> <td>0 or 1 of the preceding thing </td></tr>
  826. <tr><td><code>*</code> </td> <td>any number of the preceding thing </td></tr>
  827. <tr><td><code>(foo|bar)</code></td> <td>"foo" or "bar"</td></tr>
  828. <tr><td><code>(foo)?</code></td> <td>optional "foo"</td></tr>
  829. </table>
  830. <p>It&rsquo;s actually a little more complicated than that: By default, if you want to
  831. use a lot of the magical characters, you have to prefix them with <code>\</code>. This is
  832. both ugly and confusing, so unless you&rsquo;re writing a very simple pattern, it&rsquo;s
  833. often easiest to call <code>grep -E</code>, for <strong>E</strong>xtended regular expressions, which
  834. means that lots of characters will have special meanings.</p>
  835. <p>Authors with 4-letter first names:</p>
  836. <!-- exec -->
  837. <pre><code>$ grep -iE '^[a-z]{4} ' ./authors_*
  838. ./authors_contemporary_fic:Eden Robinson
  839. ./authors_sff:John Ronald Reuel Tolkien
  840. ./authors_sff:John Brunner
  841. </code></pre>
  842. <!-- end -->
  843. <p>A count of authors named John:</p>
  844. <!-- exec -->
  845. <pre><code>$ grep -c '^John ' ./all_authors
  846. 2
  847. </code></pre>
  848. <!-- end -->
  849. <p>Lines in this file matching the words &ldquo;magic&rdquo; or &ldquo;magical&rdquo;:</p>
  850. <pre><code>$ grep -iE 'magic(al)?' ./index.md
  851. Pipes are some of the most important magic in the shell. When the people who
  852. shell to match groups of files, but with more magic.
  853. `.` and `*` are magical. In the particular dialect of regexen understood
  854. by `grep`, other magical things include:
  855. use a lot of the magical characters, you have to prefix them with `\`. This is
  856. Lines in this file matching the words "magic" or "magical":
  857. $ grep -iE 'magic(al)?' ./index.md
  858. </code></pre>
  859. <p>Find some &ldquo;-agic&rdquo; words in a big list of words:</p>
  860. <!-- exec -->
  861. <pre><code>$ grep -iE '(m|tr|pel)agic' /usr/share/dict/words
  862. magic
  863. magic's
  864. magical
  865. magically
  866. magician
  867. magician's
  868. magicians
  869. pelagic
  870. tragic
  871. tragically
  872. tragicomedies
  873. tragicomedy
  874. tragicomedy's
  875. </code></pre>
  876. <!-- end -->
  877. <p><code>grep</code> isn&rsquo;t the only - or even the most important - tool that makes use of
  878. regular expressions, but it&rsquo;s a good place to start because it&rsquo;s one of the
  879. fundamental building blocks for so many other operations. Filtering lists of
  880. things, matching patterns within collections, and writing concise descriptions
  881. of how text should be transformed are at the heart of a practical approach to
  882. Unix-like systems. Regexen turn out to be a seductively powerful way to do
  883. these things - so much so that they&rsquo;ve crept their way into text editors,
  884. databases, and full-featured programming languages.</p>
  885. <p>There&rsquo;s a dark side to all of this, for the truth about regular expressions is
  886. that they are ugly, inconsistent, brittle, and <em>incredibly</em> difficult to think
  887. clearly about. They take years to master and reward the wielder with great
  888. power, but they are also a trap: a temptation towards the path of cleverness
  889. masquerading as wisdom.</p>
  890. <p style="text-align:center;"> ✑</p>
  891. <p>I&rsquo;ll be returning to this theme, but for the time being let&rsquo;s move on. Now
  892. that we&rsquo;ve established, however haphazardly, some of the basics, let&rsquo;s consider
  893. their application to a real-world task.</p>
  894. <hr />
  895. <h1><a name=a-literary-problem href=#a-literary-problem>#</a> 2. a literary problem</h1>
  896. <p>The <a href="../literary_environment">previous chapter</a> introduced a bunch of tools
  897. using contrived examples. Now we&rsquo;ll look at a real problem, and work through a
  898. solution by building on tools we&rsquo;ve already covered.</p>
  899. <p>So on to the problem: I write poetry.</p>
  900. <p>{rimshot dot wav}</p>
  901. <p>Most of the poems I have written are not very good, but lately I&rsquo;ve been
  902. thinking that I&rsquo;d like to comb through the last ten years' worth and pull
  903. the least-embarrassing stuff into a single collection.</p>
  904. <p>I&rsquo;ve hinted at how the contents of my blog are stored as files, but let&rsquo;s take
  905. a look at the whole thing:</p>
  906. <pre><code>$ ls -F ~/p1k3/archives/
  907. 1997/ 2003/ 2009/ bones/ meta/
  908. 1998/ 2004/ 2010/ chapbook/ winfield/
  909. 1999/ 2005/ 2011/ cli/ wip/
  910. 2000/ 2006/ 2012/ colophon/
  911. 2001/ 2007/ 2013/ europe/
  912. 2002/ 2008/ 2014/ hack/
  913. </code></pre>
  914. <p>(<code>ls</code>, again, just lists files. <code>-F</code> tells it to append a character that shows
  915. it what type of file we&rsquo;re looking at, such as a trailing / for directories.
  916. <code>~</code> is a shorthand that means &ldquo;my home directory&rdquo;, which in this case is
  917. <code>/home/brennen</code>.)</p>
  918. <p>Each of the directories here holds other directories. The ones for each year
  919. have sub-directories for the months of the year, which in turn contain files
  920. for the days. The files are just little pieces of HTML and Markdown and some
  921. other stuff. Many years ago, before I had much of an idea how to program, I
  922. wrote a script to glue them all together into a web page and serve them up to
  923. visitors. This all sounds complicated, but all it really means is that if I
  924. want to write a blog entry, I just open a file and type some stuff. Here&rsquo;s an
  925. example for March 1st:</p>
  926. <!-- exec -->
  927. <pre><code>$ cat ~/p1k3/archives/2014/3/1
  928. &lt;h1&gt;Saturday, March 1&lt;/h1&gt;
  929. &lt;markdown&gt;
  930. Sometimes I'm going along on a Saturday morning, still a little dazed from the
  931. night before, and I think something like "I should just go write a detailed
  932. analysis of hooded sweatshirts". Mostly these thoughts don't survive contact
  933. with an actual keyboard. It's almost certainly for the best.
  934. &lt;/markdown&gt;
  935. </code></pre>
  936. <!-- end -->
  937. <p>And here&rsquo;s an older one that contains a short poem:</p>
  938. <!-- took this one out of exec block 'cause later i
  939. made a dir out of it... -->
  940. <pre><code>$ cat ~/p1k3/archives/2012/10/9
  941. &lt;h1&gt;tuesday, october 9&lt;/h1&gt;
  942. &lt;freeverse&gt;i am a stateful machine
  943. i exist in a manifold of consequence
  944. a clattering miscellany of impure functions
  945. and side effects&lt;/freeverse&gt;
  946. </code></pre>
  947. <p>Notice that <code>&lt;freeverse&gt;</code> bit? It kind of looks like an HTML tag, but it&rsquo;s
  948. not. What it actually does is tell my blog script that it should format the
  949. text it contains like a poem. The specifics don&rsquo;t matter for our purposes
  950. (yet), but this convention is going to come in handy, because the first thing I
  951. want to do is get a list of all the entries that contain poems.</p>
  952. <p>Remember <code>grep</code>?</p>
  953. <pre><code>$ grep -ri '&lt;freeverse&gt;' ~/p1k3/archives &gt; ~/possible_poems
  954. </code></pre>
  955. <p>Let&rsquo;s step through this bit by bit:</p>
  956. <p>First, I&rsquo;m asking <code>grep</code> to search <strong>r</strong>ecursively, <strong>i</strong>gnoring case.
  957. &ldquo;Recursively&rdquo; just means that every time the program finds a directory, it
  958. should descend into that directory and search in any files there as well.</p>
  959. <pre><code>grep -ri
  960. </code></pre>
  961. <p>Next comes a pattern to search for. It&rsquo;s in single quotes because the
  962. characters <code>&lt;</code> and <code>&gt;</code> have a special meaning to the shell, and here we need
  963. the shell to understand that it should treat them as literal angle brackets
  964. instead.</p>
  965. <pre><code>'&lt;freeverse&gt;'
  966. </code></pre>
  967. <p>This is the path I want to search:</p>
  968. <pre><code>~/p1k3/archives
  969. </code></pre>
  970. <p>Finally, because there are so many entries to search, I know the process will
  971. be slow and produce a large list, so I tell the shell to redirect it to a file
  972. called <code>possible_poems</code> in my home directory:</p>
  973. <pre><code>&gt; ~/possible_poems
  974. </code></pre>
  975. <p>This is quite a few instances&hellip;</p>
  976. <pre><code>$ wc -l ~/possible_poems
  977. 679 /home/brennen/possible_poems
  978. </code></pre>
  979. <p>&hellip;and it&rsquo;s also not super-pretty to look at:</p>
  980. <pre><code>$ head -5 ~/possible_poems
  981. /home/brennen/p1k3/archives/2011/10/14:&lt;freeverse&gt;i've got this friend has a real knack
  982. /home/brennen/p1k3/archives/2011/4/25:&lt;freeverse&gt;i can't claim to strive for it
  983. /home/brennen/p1k3/archives/2011/8/10:&lt;freeverse&gt;one diminishes or becomes greater
  984. /home/brennen/p1k3/archives/2011/8/12:&lt;freeverse&gt;
  985. /home/brennen/p1k3/archives/2011/1/1:&lt;freeverse&gt;six years on
  986. </code></pre>
  987. <p>Still, it&rsquo;s a decent start. I can see paths to the files I have to check, and
  988. usually a first line. Since I use a fancy text editor, I can just go down the
  989. list opening each file in a new window and copying the stuff I&rsquo;m interested in
  990. to a new file.</p>
  991. <p>This is good enough for government work, but what if instead of jumping around
  992. between hundreds of files, I&rsquo;d rather read everything in one file and just weed
  993. out the bad ones as I go?</p>
  994. <pre><code>$ cat `grep -ril '&lt;freeverse&gt;' ~/p1k3/archives` &gt; ~/possible_poems_full
  995. </code></pre>
  996. <p>This probably bears some explaining. <code>grep</code> is still doing all the real work
  997. here. The main difference from before is that <code>-l</code> tells grep to just list any
  998. files it finds which contain a match.</p>
  999. <pre><code>`grep -ril '&lt;freeverse&gt;' ~/p1k3/archives`
  1000. </code></pre>
  1001. <p>Notice those backticks around the grep command? This part is a little
  1002. trippier. It turns out that if you put backticks around something in a
  1003. command, it&rsquo;ll get executed and replaced with its result, which in turn gets
  1004. executed as part of the larger command. So what we&rsquo;re really saying is
  1005. something like:</p>
  1006. <pre><code>$ cat [all of the files in the blog directory with &lt;freeverse&gt; in them]
  1007. </code></pre>
  1008. <p>Did you catch that? I just wrote a command that rewrote itself as a
  1009. <em>different</em>, more specific command. And it appears to have worked on the
  1010. first try:</p>
  1011. <pre><code>$ wc ~/possible_poems_full
  1012. 17628 80980 528699 /home/brennen/possible_poems_full
  1013. </code></pre>
  1014. <p>Welcome to wizard school.</p>
  1015. <hr />
  1016. <h1><a name=programmerthink href=#programmerthink>#</a> 3. programmerthink</h1>
  1017. <p>In the <a href="#a-literary-problem">preceding chapter</a>, I worked through accumulating
  1018. a big piece of text from some other, smaller texts. I started with a bunch of
  1019. files and wound up with one big file called <code>potential_poems_full</code>.</p>
  1020. <p>Let&rsquo;s talk for a minute about how programmers approach problems like this one.
  1021. What I&rsquo;ve just done is sort of an old-school humanities take on things:
  1022. Metaphorically speaking, I took a book off the shelf and hauled it down to the
  1023. copy machine to xerox a bunch of pages, and now I&rsquo;m going to start in on them
  1024. with a highlighter and some Post-Its or something. A process like this will
  1025. often trigger a cascade of questions in the programmer-mind:</p>
  1026. <ul>
  1027. <li>What if, halfway through the project, I realize my selection criteria were all
  1028. wrong and have to backtrack?</li>
  1029. <li>What if I discover corrections that also need to be made in the source documents?</li>
  1030. <li>What if I want to access metadata, like the original location of a file?</li>
  1031. <li>What if I want to quickly re-order the poems according to some new criteria?</li>
  1032. <li>Why am I storing the same text in two different places?</li>
  1033. </ul>
  1034. <p>A unifying theme of these questions is that they could all be answered by
  1035. involving a little more abstraction.</p>
  1036. <p style="text-align:center;"> ★</p>
  1037. <p>Some kinds of abstraction are so common in the physical world that we can
  1038. forget they&rsquo;re part of a sophisticated technology. For example, a good deal of
  1039. bicycle maintenance can be accomplished with a cheap multi-tool containing a
  1040. few different sizes of hex wrench and a couple of screwdrivers.</p>
  1041. <p>A hex wrench or screwdriver doesn&rsquo;t really know anything about bicycles. All
  1042. it <em>really</em> knows about is fitting into a space and allowing torque to be
  1043. applied. Standardized fasteners and adjustment mechanisms on a bicycle ensure
  1044. that the work can be done anywhere, by anyone with a certain set of tools.
  1045. Standard tools mean that if you can work on a particular bike, you can work on
  1046. <em>most</em> bikes, and even on things that aren&rsquo;t bikes at all, but were designed by
  1047. people with the same abstractions in mind.</p>
  1048. <p>The relationship between a wrench, a bolt, and the purpose of a bolt is a lot
  1049. like something we call <em>indirection</em> in software. Programs like <code>grep</code> or
  1050. <code>cat</code> don&rsquo;t really know anything about poetry. All they <em>really</em> know about is
  1051. finding lines of text in input, or sticking inputs together. Files, lines, and
  1052. text are like standardized fasteners that allow a user who can work on one kind
  1053. of data (be it poetry, a list of authors, the source code of a program) to use
  1054. the same tools for other problems and other data.</p>
  1055. <p style="text-align:center;"> ★</p>
  1056. <p>When I first started writing stuff on the web, I edited a page &mdash; a single HTML
  1057. file &mdash; by hand. When the entries on my nascent blog got old, I manually
  1058. cut-and-pasted them to archive files with names like <code>old_main97.html</code>, which
  1059. held all of the stuff I&rsquo;d written in 1997.</p>
  1060. <p>I&rsquo;m not holding this up as an example of youthful folly. In fact, it worked
  1061. fine, and just having a single, static file that you can open in any text
  1062. editor has turned out to be a <em>lot</em> more future-proof than the sophisticated
  1063. blogging software people were starting to write at the time.</p>
  1064. <p>And yet. Something about this habit nagged at my developing programmer mind
  1065. after a few years. It was just a little bit too manual and repetitive, a
  1066. little bit silly to have to write things like a table of contents by hand, or
  1067. move entries around by copy-and-pasting them to different files. Since I knew
  1068. the date for each entry, and wanted to make them navigable on that basis, why
  1069. not define a directory structure for the years and months, and then write a
  1070. file to hold each day? That way, all I&rsquo;d have to do is concatenate the files
  1071. in one directory to display any given month:</p>
  1072. <pre><code>$ cat ~/p1k3/archives/2014/1/* | head -10
  1073. &lt;h1&gt;Sunday, January 12&lt;/h1&gt;
  1074. &lt;h2&gt;the one casey is waiting for&lt;/h2&gt;
  1075. &lt;freeverse&gt;
  1076. after a while
  1077. the thing about drinking
  1078. is that it just feeds
  1079. what you drink to kill
  1080. and kills
  1081. </code></pre>
  1082. <p>I ultimately wound up writing a few thousand lines of Perl to do the actual
  1083. work, but the essential idea of the thing is still little more than invoking
  1084. <code>cat</code> on some stuff.</p>
  1085. <p>I didn&rsquo;t know the word for it at the time, but what I was reaching for was a
  1086. kind of indirection. By putting blog posts in a specific directory layout, I
  1087. was creating a simple model of the temporal structure that I considered their
  1088. most important property. Now, if I want to write commands that ask questions
  1089. about my blog posts or re-combine them in certain ways, I can address my
  1090. concerns to this model. Maybe, for example, I want a rough idea how many words
  1091. I&rsquo;ve written in blog posts so far in 2014:</p>
  1092. <pre><code>$ find ~/p1k3/archives/2014/ -type f | xargs cat | wc -w
  1093. 6677
  1094. </code></pre>
  1095. <p><code>xargs</code> is not the most intuitive command, but it&rsquo;s useful and common enough to
  1096. explain here. At the end of last chapter, when I said:</p>
  1097. <pre><code>$ cat `grep -ril '&lt;freeverse&gt;' ~/p1k3/archives` &gt; ~/possible_poems_full
  1098. </code></pre>
  1099. <p>I could also have written this as:</p>
  1100. <pre><code>$ grep -ril '&lt;freeverse&gt;' ~/p1k3/archives | xargs cat &gt; ~/possible_poems_full
  1101. </code></pre>
  1102. <p>What this does is take its input, which starts like:</p>
  1103. <pre><code>/home/brennen/p1k3/archives/2002/10/16
  1104. /home/brennen/p1k3/archives/2002/10/27
  1105. /home/brennen/p1k3/archives/2002/10/10
  1106. </code></pre>
  1107. <p>&hellip;and run <code>cat</code> on all the things in it:</p>
  1108. <pre><code>cat /home/brennen/p1k3/archives/2002/10/16 /home/brennen/p1k3/archives/2002/10/27 /home/brennen/p1k3/archives/2002/10/10 ...
  1109. </code></pre>
  1110. <p>It can be a better idea to use <code>xargs</code>, because while backticks are
  1111. incredibly useful, they have some limitations. If you&rsquo;re dealing with a very
  1112. large list of files, for example, you might exceed the maximum allowed length
  1113. for arguments to a command on your system. <code>xargs</code> is smart enough to know
  1114. that limit and run <code>cat</code> more than once if needed.</p>
  1115. <p><code>xargs</code> is actually sort of a pain to think about, and will make you jump
  1116. through some irritating hoops if you have spaces or other weirdness in your
  1117. filenames, but I wind up using it quite a bit.</p>
  1118. <p>Maybe I want to see a table of contents:</p>
  1119. <!-- exec -->
  1120. <pre><code>$ find ~/p1k3/archives/2014/ -type d | xargs ls -v | head -10
  1121. /home/brennen/p1k3/archives/2014/:
  1122. 1
  1123. 2
  1124. 3
  1125. 4
  1126. /home/brennen/p1k3/archives/2014/1:
  1127. 5
  1128. 12
  1129. 14
  1130. </code></pre>
  1131. <!-- end -->
  1132. <p>Or find the subtitles I used in 2013:</p>
  1133. <!-- exec -->
  1134. <pre><code>$ find ~/p1k3/archives/2012/ -type f | xargs perl -ne 'print "$1\n" if m{&lt;h2&gt;(.*?)&lt;/h2&gt;}'
  1135. pursuit
  1136. fragment
  1137. this poem again
  1138. i'll do better next time
  1139. timebinding animals
  1140. more observations on gear nerdery &amp;amp; utility fetishism
  1141. thrift
  1142. A miracle, in fact, means work
  1143. &lt;em&gt;technical notes for late october&lt;/em&gt;, or &lt;em&gt;it gets dork out earlier these days&lt;/em&gt;
  1144. radio
  1145. light enough to travel
  1146. 12:06am
  1147. "figures like Heinlein and Gingrich"
  1148. </code></pre>
  1149. <!-- end -->
  1150. <p>The crucial thing about this is that the filesystem <em>itself</em> is just like <code>cat</code>
  1151. and <code>grep</code>: It doesn&rsquo;t know anything about blogs (or poetry), and it&rsquo;s
  1152. basically indifferent to the actual <em>structure</em> of a file like
  1153. <code>~/p1k3/archives/2014/1/12</code>. What the filesystem knows is that there are files
  1154. with certain names in certain places. It need not know anything about the
  1155. <em>meaning</em> of those names in order to be useful; in fact, it&rsquo;s best if it stays
  1156. agnostic about the question, for this enables us to assign our own meaning to a
  1157. structure and manipulate that structure with standard tools.</p>
  1158. <p style="text-align:center;"> ★</p>
  1159. <p>Back to the problem at hand: I have this collection of files, and I know how
  1160. to extract the ones that contain poems. My goal is to see all the poems and
  1161. collect the subset of them that I still find worthwhile. Just knowing how to
  1162. grep and then edit a big file solves my problem, in a basic sort of way. And
  1163. yet: Something about this nags at my mind. I find that, just as I can already
  1164. use standard tools and the filesystem to ask questions about all of my blog
  1165. posts in a given year or month, I would like to be able to ask questions about
  1166. the set of interesting poems.</p>
  1167. <p>If I want the freedom to execute many different sorts of commands against this
  1168. set of poems, it begins to seem that I need a model.</p>
  1169. <p>When programmers talk about models, they often mean something that people in
  1170. the sciences would recognize: We find ways to represent the arrangement of
  1171. facts so that we can think about them. A structured representation of things
  1172. often means that we can <em>change</em> those things, or at least derive new
  1173. understanding of them.</p>
  1174. <p style="text-align:center;"> ★</p>
  1175. <p>At this point in the narrative, I could pretend that my next step is
  1176. immediately obvious, but in fact it&rsquo;s not. I spend a couple of days thinking
  1177. off and on about how to proceed, scribbling notes during bus rides and while
  1178. drinking beers at the pizza joint down the street. I assess and discard ideas
  1179. which fall into a handful of broad approaches:</p>
  1180. <ul>
  1181. <li>Store blog entries in a relational database system which would allow me to
  1182. associate them with data like &ldquo;this entry is in a collection called &lsquo;ok
  1183. poems&rsquo;&rdquo;.</li>
  1184. <li>Selectively build up a file containing the list of files with ok poems, and use
  1185. it to do other tasks.</li>
  1186. <li>Define a format for metadata that lives within entry files.</li>
  1187. <li>Turn each interesting file into a directory of its own which contains a file
  1188. with the original text and another file with metadata.</li>
  1189. </ul>
  1190. <p>I discard the relational database idea immediately: I like working with files,
  1191. and I don&rsquo;t feel like abandoning a model that&rsquo;s served me well for my entire
  1192. adult life.</p>
  1193. <p>Building up an index file to point at the other files I&rsquo;m working with has a
  1194. certain appeal. I&rsquo;m already most of the way there with the <code>grep</code> output in
  1195. <code>potential_poems</code>. It would be easy to write shell commands to add, remove,
  1196. sort, and search entries. Still, it doesn&rsquo;t feel like a very satisfying
  1197. solution unto itself. I&rsquo;d like to know that an entry is part of the collection
  1198. just by looking at the entry, without having to cross-reference it to a list
  1199. somewhere else.</p>
  1200. <p>What about putting some meaningful text in the file itself? I thought about
  1201. a bunch of different ways to do this, some of them really complicated, and
  1202. eventually arrived at this:</p>
  1203. <pre><code>&lt;!-- collection: ok-poems --&gt;
  1204. </code></pre>
  1205. <p>The <code>&lt;!-- --&gt;</code> bits are how you define a comment in HTML, which means that
  1206. neither my blog code nor web browsers nor my text editor have to know anything
  1207. about the format, but I can easily find files with certain values. Check it:</p>
  1208. <pre><code>$ find ~/p1k3/archives -type f | xargs perl -ne 'print "$ARGV[0]: $1 -&gt; $2\n" if m{&lt;!-- ([a-z]+): (.*?) --&gt;};'
  1209. /home/brennen/p1k3/archives/2014/2/9: collection -&gt; ok-poems
  1210. </code></pre>
  1211. <p>That&rsquo;s an ugly one-liner, and I haven&rsquo;t explained half of what it does, but the
  1212. comment format actually seems pretty workable for this. It&rsquo;s a little tacky to
  1213. look at, but it&rsquo;s simple and searchable.</p>
  1214. <p>Before we settle, though, let&rsquo;s turn to the notion of making each entry into a
  1215. directory that can contain some structured metadata in a separate file.
  1216. Imagine something like:</p>
  1217. <pre><code>$ ls ~/p1k3/archives/2013/2/9
  1218. index Meta
  1219. </code></pre>
  1220. <p>Here I use the name &ldquo;index&rdquo; for the main part of the entry because it&rsquo;s a
  1221. convention of web sites for the top-level page in a directory to be called
  1222. something like <code>index.html</code>. As it happens, my blog software already supports
  1223. this kind of file layout for entries which contain multiple parts, image files,
  1224. and so forth.</p>
  1225. <pre><code>$ head ~/p1k3/archives/2013/2/9/index
  1226. &lt;h1&gt;saturday, february 9&lt;/h1&gt;
  1227. &lt;freeverse&gt;
  1228. midwinter midafternoon; depressed as hell
  1229. sitting in a huge cabin in the rich-people mountains
  1230. writing a sprawl, pages, of melancholic midlife bullshit
  1231. outside the snow gives way to broken clouds and the
  1232. clear unyielding light of the high country sun fills
  1233. $ cat ~/p1k3/archives/2013/2/9/Meta
  1234. collection: ok-poems
  1235. </code></pre>
  1236. <p>It would then be easy to <code>find</code> files called <code>Meta</code> and grep them for
  1237. <code>collection: ok-poems</code>.</p>
  1238. <p>What if I put metadata right in the filename itself, and dispense with the grep
  1239. altogether?</p>
  1240. <pre><code>$ ls ~/p1k3/archives/2013/2/9
  1241. index meta-ok-poem
  1242. $ find ~/p1k3/archives -name 'meta-ok-poem'
  1243. /home/brennen/archives/2013/2/9/meta-ok-poem
  1244. </code></pre>
  1245. <p>There&rsquo;s a lot to like about this. For one thing, it&rsquo;s immediately visible in a
  1246. directory listing. For another, it doesn&rsquo;t require searching through thousands
  1247. of lines of text to extract a specific string. If a directory has a
  1248. <code>meta-ok-poem</code> in it, I can be pretty sure that it will contain an interesting
  1249. <code>index</code>.</p>
  1250. <p>What are the downsides? Well, it requires transforming lots of text files into
  1251. directories-containing-files. I might automate that process, but it&rsquo;s still a
  1252. little tedious and it makes the layout of the entry archive more complicated
  1253. overall. There&rsquo;s a cost to doing things this way. It lets me extend my
  1254. existing model of a blog entry to include arbitrary metadata, but it also adds
  1255. steps to writing or finding blog entries.</p>
  1256. <p>Abstractions usually cost you something. Is this one worth the hassle?
  1257. Sometimes the best way to answer that question is to start writing code that
  1258. handles a given abstraction.</p>
  1259. <hr />
  1260. <h1><a name=script href=#script>#</a> 4. script</h1>
  1261. <p>Back in chapter 1, I said that &ldquo;the way you use the computer is often just to write
  1262. little programs that invoke other programs&rdquo;. In fact, we&rsquo;ve already gone over a
  1263. bunch of these. Grepping through the text of a previous chapter should pull
  1264. up some good examples:</p>
  1265. <!-- exec -->
  1266. <pre><code>$ grep -E '\$ [a-z]+.*\| ' ../literary_environment/index.md
  1267. $ sort authors_* | uniq -c
  1268. $ sort authors_* | uniq &gt; ./all_authors
  1269. $ find ~/p1k3/archives/2010/11 -regextype egrep -regex '.*([0-9]+|index)' -type f | xargs wc -w | tail -1
  1270. $ sort authors_* | uniq | wc -l
  1271. $ sort colors | uniq -i | tail -1
  1272. $ cut -d' ' -f1 ./authors_* | sort | uniq -ci | sort -n | tail -3
  1273. $ sort -u ./authors_* | cut -d' ' -f1 | uniq -ci | sort -n | tail -3
  1274. $ sort -k1 all_authors.tsv | expand -t14
  1275. $ paste firstnames lastnames | sort -k2 | expand -t12
  1276. $ cat ./authors_* | grep 'Vanessa'
  1277. </code></pre>
  1278. <!-- end -->
  1279. <p>None of these one-liners do all that much, but they all take input of one sort
  1280. or another and apply one or more transformations to it. They&rsquo;re little formal
  1281. sentences describing how to make one thing into another, which is as good a
  1282. definition of programming as most. Or at least this is a good way to describe
  1283. programming-in-the-small. (A lot of the programs we use day-to-day are more
  1284. like essays, novels, or interminable Fantasy series where every character you
  1285. like dies horribly than they are like individual sentences.)</p>
  1286. <p>One-liners like these are all well and good when you&rsquo;re staring at a terminal,
  1287. trying to figure something out - but what about when you&rsquo;ve already figured it out and
  1288. you want to repeat it in the future?</p>
  1289. <p>It turns out that Bash has you covered. Since shell commands are just text,
  1290. they can live in a text file as easily as they can be typed.</p>
  1291. <h2><a name=learn-you-an-editor href=#learn-you-an-editor>#</a> learn you an editor</h2>
  1292. <p>We&rsquo;ve skirted the topic so far, but now that we&rsquo;re talking about writing out
  1293. text files in earnest, you&rsquo;re going to want a text editor.</p>
  1294. <p>My editor is where I spend most of my time that isn&rsquo;t in a web browser, because
  1295. it&rsquo;s where I write both code and prose. It turns out that the features which
  1296. make a good code editor overlap a lot with the ones that make a good editor of
  1297. English sentences.</p>
  1298. <p>So what should you use? Well, there have been other contenders in recent
  1299. years, but in truth nothing comes close to dethroning the Great Old Ones of
  1300. text editing. Emacs is a creature both primal and sophisticated, like an
  1301. avatar of some interstellar civilization that evolved long before multicellular
  1302. life existed on earth and seeded the galaxy with incomprehensible artefacts and
  1303. colossal engineering projects. Vim is like a lovable chainsaw-studded robot
  1304. with the most elegant keyboard interface in history secretly emblazoned on its
  1305. shining diamond heart.</p>
  1306. <p>It&rsquo;s worth the time it takes to learn one of the serious editors, but there are
  1307. easier places to start. Nano, for example, is easy to pick up, and should be
  1308. available on most systems. To start it, just say:</p>
  1309. <pre><code>$ nano file
  1310. </code></pre>
  1311. <p>You should see something like this:</p>
  1312. <p style="text-align:center;"> <img src="images/nano.png" alt="nano" /></p>
  1313. <p>Arrow keys will move your cursor around, and typing stuff will make it appear
  1314. in the file. This is pretty much like every other editor you&rsquo;ve ever used. If
  1315. you haven&rsquo;t used Nano before, that stuff along the bottom of the terminal is a
  1316. reference to the most commonly used commands. <code>^</code> is a convention for &ldquo;Ctrl&rdquo;,
  1317. so <code>^O</code> means Ctrl-o (the case of the letter doesn&rsquo;t actually matter), which
  1318. will save the file you&rsquo;re working on. Ctrl-x will quit, which is probably the
  1319. first important thing to know about any given editor.</p>
  1320. <h2><a name=d-i-y-utilities href=#d-i-y-utilities>#</a> d.i.y. utilities</h2>
  1321. <p>So back to putting commands in text files. Here&rsquo;s a file I just created in
  1322. my editor:</p>
  1323. <!-- exec -->
  1324. <pre><code>$ cat okpoems
  1325. #!/bin/bash
  1326. # find all the marker files and get the name of
  1327. # the directory containing each
  1328. find ~/p1k3/archives -name 'meta-ok-poem' | xargs -n1 dirname
  1329. exit 0
  1330. </code></pre>
  1331. <!-- end -->
  1332. <p>This is known as a script. There are a handful of things to notice here.
  1333. First, there&rsquo;s this fragment:</p>
  1334. <pre><code>#!/bin/bash
  1335. </code></pre>
  1336. <p>The <code>#!</code> right at the beginning, followed by the path to a program, is a
  1337. special sequence that lets the kernel know what program should be used to
  1338. interpret the contents of the file. <code>/bin/bash</code> is the path on the filesystem
  1339. where Bash itself lives. You might see this referred to as a shebang or a hash
  1340. bang.</p>
  1341. <p>Lines that start with a <code>#</code> are comments, used to describe the code to a human
  1342. reader. The <code>exit 0</code> tells Bash that the currently running script should exit
  1343. with a status of 0, which basically means &ldquo;nothing went wrong&rdquo;.</p>
  1344. <p>If you examine the directory listing for <code>okpoems</code>, you&rsquo;ll see something
  1345. important:</p>
  1346. <!-- exec -->
  1347. <pre><code>$ ls -l okpoems
  1348. -rwxrwxr-x 1 brennen brennen 163 Apr 19 00:08 okpoems
  1349. </code></pre>
  1350. <!-- end -->
  1351. <p>That looks pretty cryptic. For the moment, just remember that those little
  1352. <code>x</code>s in the first bit mean that the file has been marked e<strong>x</strong>ecutable. We
  1353. accomplish this by saying something like:</p>
  1354. <pre><code>$ chmod +x ./okpoems
  1355. </code></pre>
  1356. <p>Once that&rsquo;s done, it and the shebang line in combination mean that typing
  1357. <code>./okpoems</code> will have the same effect as typing <code>bash okpoems</code>:</p>
  1358. <!-- exec -->
  1359. <pre><code>$ ./okpoems
  1360. /home/brennen/p1k3/archives/2013/2/9
  1361. /home/brennen/p1k3/archives/2012/3/17
  1362. /home/brennen/p1k3/archives/2012/3/26
  1363. </code></pre>
  1364. <!-- end -->
  1365. <h2><a name=heavy-lifting href=#heavy-lifting>#</a> heavy lifting</h2>
  1366. <p><code>okpoems</code> demonstrates the basics, but it doesn&rsquo;t do very much. Here&rsquo;s
  1367. a script with a little more substance to it:</p>
  1368. <!-- exec -->
  1369. <pre><code>$ cat markpoem
  1370. #!/bin/bash
  1371. # $1 is the first parameter to our script
  1372. POEM=$1
  1373. # Complain and exit if we weren't given a path:
  1374. if [ ! $POEM ]; then
  1375. echo 'usage: markpoem &lt;path&gt;'
  1376. # Confusingly, an exit status of 0 means to the shell that everything went
  1377. # fine, while any other number means that something went wrong.
  1378. exit 64
  1379. fi
  1380. if [ ! -e $POEM ]; then
  1381. echo "$POEM not found"
  1382. exit 66
  1383. fi
  1384. echo "marking $POEM an ok poem"
  1385. POEM_BASENAME=$(basename $POEM)
  1386. # If the target is a plain file instead of a directory, make it into
  1387. # a directory and move the content into $POEM/index:
  1388. if [ -f $POEM ]; then
  1389. echo "making $POEM into a directory, moving content to"
  1390. echo " $POEM/index"
  1391. TEMPFILE="/tmp/$POEM_BASENAME.$(date +%s.%N)"
  1392. mv $POEM $TEMPFILE
  1393. mkdir $POEM
  1394. mv $TEMPFILE $POEM/index
  1395. fi
  1396. if [ -d $POEM ]; then
  1397. # touch(1) will either create the file or update its timestamp:
  1398. touch $POEM/meta-ok-poem
  1399. else
  1400. echo "something broke - why isn't $POEM a directory?"
  1401. file $POEM
  1402. fi
  1403. # Signal that all is copacetic:
  1404. echo kthxbai
  1405. exit 0
  1406. </code></pre>
  1407. <!-- end -->
  1408. <p>Both of these scripts are imperfect, but they were quick to write, they&rsquo;re made
  1409. out of standard commands, and I don&rsquo;t yet hate myself for them: All signs that
  1410. I&rsquo;m not totally on the wrong track with the <code>meta-ok-poem</code> abstraction, and
  1411. could live with it as part of an ongoing writing project. <code>okpoems</code> and
  1412. <code>markpoem</code> would also be easy to use with custom keybindings in my editor. In
  1413. a few more lines of code, I can build a system to wade through the list of
  1414. candidate files and quickly mark the interesting ones.</p>
  1415. <h2><a name=generality href=#generality>#</a> generality</h2>
  1416. <p>So what&rsquo;s lacking here? Well, probably a bunch of things, feature-wise. I can
  1417. imagine writing a script to unmark a poem, for example. That said, there&rsquo;s one
  1418. really glaring problem. &ldquo;Ok poem&rdquo; is only one kind of property a blog entry
  1419. might possess. Suppose I wanted a way to express that a poem is terrible?</p>
  1420. <p>It turns out I already know how to add properties to an entry. If I generalize
  1421. just a little, the tools become much more flexible.</p>
  1422. <!-- exec -->
  1423. <pre><code>$ ./addprop /home/brennen/p1k3/archives/2012/3/26 meta-terrible-poem
  1424. marking /home/brennen/p1k3/archives/2012/3/26 with meta-terrible-poem
  1425. kthxbai
  1426. </code></pre>
  1427. <!-- end -->
  1428. <!-- exec -->
  1429. <pre><code>$ ./findprop meta-terrible-poem
  1430. /home/brennen/p1k3/archives/2012/3/26
  1431. </code></pre>
  1432. <!-- end -->
  1433. <p><code>addprop</code> is only a little different from <code>markpoem</code>. It takes two parameters
  1434. instead of one - the target entry and a property to add.</p>
  1435. <!-- exec -->
  1436. <pre><code>$ cat addprop
  1437. #!/bin/bash
  1438. ENTRY=$1
  1439. PROPERTY=$2
  1440. # Complain and exit if we weren't given a path and a property:
  1441. if [[ ! $ENTRY || ! $PROPERTY ]]; then
  1442. echo "usage: addprop &lt;path&gt; &lt;property&gt;"
  1443. exit 64
  1444. fi
  1445. if [ ! -e $ENTRY ]; then
  1446. echo "$ENTRY not found"
  1447. exit 66
  1448. fi
  1449. echo "marking $ENTRY with $PROPERTY"
  1450. # If the target is a plain file instead of a directory, make it into
  1451. # a directory and move the content into $ENTRY/index:
  1452. if [ -f $ENTRY ]; then
  1453. echo "making $ENTRY into a directory, moving content to"
  1454. echo " $ENTRY/index"
  1455. # Get a safe temporary file:
  1456. TEMPFILE=`mktemp`
  1457. mv $ENTRY $TEMPFILE
  1458. mkdir $ENTRY
  1459. mv $TEMPFILE $ENTRY/index
  1460. fi
  1461. if [ -d $ENTRY ]; then
  1462. touch $ENTRY/$PROPERTY
  1463. else
  1464. echo "something broke - why isn't $ENTRY a directory?"
  1465. file $ENTRY
  1466. fi
  1467. echo kthxbai
  1468. exit 0
  1469. </code></pre>
  1470. <!-- end -->
  1471. <p>Meanwhile, <code>findprop</code> is more or less <code>okpoems</code>, but with a parameter for the
  1472. property to find:</p>
  1473. <!-- exec -->
  1474. <pre><code>$ cat findprop
  1475. #!/bin/bash
  1476. if [ ! $1 ]
  1477. then
  1478. echo "usage: findprop &lt;property&gt;"
  1479. exit
  1480. fi
  1481. # find all the marker files and get the name of
  1482. # the directory containing each
  1483. find ~/p1k3/archives -name $1 | xargs -n1 dirname
  1484. exit 0
  1485. </code></pre>
  1486. <!-- end -->
  1487. <p>These scripts aren&rsquo;t much more complicated than their poem-specific
  1488. counterparts, but now they can be used to solve problems I haven&rsquo;t even thought
  1489. of yet, and included in other scripts that need their functionality.</p>
  1490. <hr />
  1491. <h1><a name=general-purpose-programmering href=#general-purpose-programmering>#</a> 5. general purpose programmering</h1>
  1492. <p>I didn&rsquo;t set out to write a book about programming, <em>as such</em>, but because
  1493. programming and the command line are so inextricably linked, this text
  1494. draws near the subject almost of its own accord.</p>
  1495. <p>If you&rsquo;re not terribly interested in programming, this chapter can easily
  1496. enough be skipped. It&rsquo;s more in the way of philosophical rambling than
  1497. concrete instruction, and will be of most use to those with an existing
  1498. background in writing code.</p>
  1499. <p style="text-align:center;"> ✢</p>
  1500. <p>If you&rsquo;ve used computers for more than a few years, you&rsquo;re probably viscerally
  1501. aware that most software is fragile and most systems decay. In the time since
  1502. I took my first tentative steps into the little world of a computer (a friend&rsquo;s
  1503. dad&rsquo;s unidentifiable gaming machine, my own father&rsquo;s blue monochrome Zenith
  1504. laptop, the Apple II) the churn has been overwhelming. By now I&rsquo;ve learned my
  1505. way around vastly more software &mdash; operating systems, programming languages and
  1506. development environments, games, editors, chat clients, mail systems &mdash; than I
  1507. presently could use if I wanted to. Most of it has gone the way of some
  1508. ancient civilization, surviving (if at all) only in faint, half-understood
  1509. cultural echoes and occasional museum-piece displays. Every user of technology
  1510. becomes, in time, a refugee from an irretrievably recent past.</p>
  1511. <p>And yet, despite all this, the shell endures. Most of the ideas in this book
  1512. are older than I am. Most of them could have been applied in 1994 or
  1513. thereabouts, when I first logged on to multiuser systems running AT&amp;T Unix.
  1514. Since the early 1990s, systems built on a fundamental substrate of Unix-like
  1515. behavior and abstractions have proliferated wildly, becoming foundational at
  1516. once to the modern web, the ecosystem of free and open software, and the
  1517. technological dominance ca. 2014 of companies like Apple, Google, and Facebook.</p>
  1518. <p>Why is this, exactly?</p>
  1519. <p style="text-align:center;"> ✣</p>
  1520. <p>As I&rsquo;ve said (and hopefully shown), the commands you write in your shell
  1521. are essentially little programs. Like other programs, they can be stored
  1522. for later use and recombined with other commands, creating new uses for
  1523. your ideas.</p>
  1524. <p>It would be hard to say that there&rsquo;s any <em>one</em> reason command line environments
  1525. remain so vital after decades of evolution and hard-won refinement in computer
  1526. interfaces, but it seems like this combinatory nature is somewhere near the
  1527. heart of it. The command line often lacks the polish of other interfaces we
  1528. depend on, but in exchange it offers a richness and freedom of expression
  1529. rarely seen elsewhere, and invites its users to build upon its basic
  1530. facilities.</p>
  1531. <p>What is it that makes last chapter&rsquo;s <code>addprop</code> preferable to the more specific
  1532. <code>markpoem</code>? Let&rsquo;s look at an alternative implementation of <code>markpoem</code>:</p>
  1533. <!-- exec -->
  1534. <pre><code>$ cat simple_markpoem
  1535. #!/bin/bash
  1536. addprop $1 meta-ok-poem
  1537. </code></pre>
  1538. <!-- end -->
  1539. <p>Is this script trivial? Absolutely. It&rsquo;s so trivial that it barely seems to
  1540. exist, because I already wrote <code>addprop</code> to do all the heavy lifting and play
  1541. well with others, freeing us to imagine new uses for its central idea without
  1542. worrying about the implementation details.</p>
  1543. <p>Unlike <code>markpoem</code>, <code>addprop</code> doesn&rsquo;t know anything about poetry. All it knows
  1544. about, in fact, is putting a file (or three) in a particular place. And this
  1545. is in keeping with a basic insight of Unix: Pieces of software that do one
  1546. very simple thing generalize well. Good command line tools are like a hex
  1547. wrench, a hammer, a utility knife: They embody knowledge of turning, of
  1548. striking, of cutting &mdash; and with this kind of knowledge at hand, the user can
  1549. change the world even though no individual tool is made with complete knowledge
  1550. of the world as a whole. There&rsquo;s a lot of power in the accumulation of small
  1551. competencies.</p>
  1552. <p>Of course, if your code is only good at one thing, to be of any use, it has to
  1553. talk to code that&rsquo;s good at other things. There&rsquo;s another basic insight in the
  1554. Unix tradition: Tools should be composable. All those little programs have to
  1555. share some assumptions, have to speak some kind of trade language, in order to
  1556. combine usefully. Which is how we&rsquo;ve arrived at standard IO, pipelines,
  1557. filesystems, and text as as a lowest-common-denominator medium of exchange. If
  1558. you think about most of these things, they have some very rough edges, but they
  1559. give otherwise simple tools ways to communicate without becoming
  1560. super-complicated along the way.</p>
  1561. <p style="text-align:center;"> ✤</p>
  1562. <p>What is the command line?</p>
  1563. <p>The command line is an environment of tool use.</p>
  1564. <p>So are kitchens, workshops, libraries, and programming languages.</p>
  1565. <p style="text-align:center;"> ✥</p>
  1566. <p>Here&rsquo;s a confession: I don&rsquo;t like writing shell scripts very much, and I
  1567. can&rsquo;t blame anyone else for feeling the same way.</p>
  1568. <p>That doesn&rsquo;t mean you shouldn&rsquo;t <em>know</em> about them, or that you shouldn&rsquo;t
  1569. <em>write</em> them. I write little ones all the time, and the ability to puzzle
  1570. through other people&rsquo;s scripts comes in handy. Oftentimes, the best, most
  1571. tasteful way to automate something is to build a script out of the commonly
  1572. available commands. The standard tools are already there on millions of
  1573. machines. Many of them have been pretty well understood for a generation, and
  1574. most will probably be around for a generation or three to come. They do neat
  1575. stuff. Scripts let you build on ideas you&rsquo;ve already worked out, and give
  1576. repeatable operations a memorable, user-friendly name. They encourage reuse of
  1577. existing programs, and help express your ideas to people who&rsquo;ll come after you.</p>
  1578. <p>One of the reliable markers of powerful software is that it can be scripted: It
  1579. extends to its users some of the same power that its authors used in creating
  1580. it. Scriptable software is to some extent <em>living</em> software. It&rsquo;s a book that
  1581. you, the reader, get to help write.</p>
  1582. <p>In all these ways, shell scripts are wonderful, a little bit magical, and
  1583. quietly indispensable to the machinery of modern civilization.</p>
  1584. <p>Unfortunately, in all the ways that a shell like Bash is weird, finicky, and
  1585. covered in 40 years of incidental cruft, long-form Bash scripts are even worse.
  1586. Bash is a useful glue language, particularly if you&rsquo;re already comfortable
  1587. wiring commands together. Syntactic and conceptual innovations like pipes are
  1588. beautiful and necessary. What Bash is <em>not</em>, despite its power, is a very good
  1589. general purpose programming language. It&rsquo;s just not especially good at things
  1590. like math, or complex data structures, or not looking like a punctuation-heavy
  1591. variety of alphabet soup.</p>
  1592. <p>It turns out that there&rsquo;s a threshold of complexity beyond which life becomes
  1593. easier if you switch from shell scripting to a more robust language. Just
  1594. where this threshold is located varies a lot between users and problems, but I
  1595. often think about switching languages before a script gets bigger than I can
  1596. view on my screen all at once. <code>addprop</code> is a good example:</p>
  1597. <!-- exec -->
  1598. <pre><code>$ wc -l ../script/addprop
  1599. 41 ../script/addprop
  1600. </code></pre>
  1601. <!-- end -->
  1602. <p>41 lines is a touch over what fits on one screen in the editor I usually use.
  1603. If I were going to add much in the way of features, I&rsquo;d think pretty hard about
  1604. porting it to another language first.</p>
  1605. <p>What&rsquo;s cool is that if you know a language like C, Python, Perl, Ruby, PHP, or
  1606. JavaScript, your code can participate in the shell environment as a first class
  1607. citizen simply by respecting the conventions of standard IO, files, and command
  1608. line arguments. Often, in order to create a useful utility, it&rsquo;s only
  1609. necessary to deal with <code>STDIN</code>, or operate on a particular sort of file, and
  1610. most languages offer simple conventions for doing these things.</p>
  1611. <p style="text-align:center;"> *</p>
  1612. <p>I think the shell can be taught and understood as a humane environment, despite
  1613. all of its ugliness and complication, because it offers the materials of its
  1614. own construction to its users, whatever their concerns. The writer, the
  1615. philosopher, the scientist, the programmer: Files and text and pipes know
  1616. little enough about these things, but in their very indifference to the
  1617. specifics of any one complex purpose, they&rsquo;re adaptable to the basic needs of
  1618. many. Simple utilities which enact simple kinds of knowledge survive and
  1619. recombine because there is a wisdom to be found in small things.</p>
  1620. <p>Files and text know nothing about poetry, nothing in particular of the human
  1621. soul. Neither do pen and ink, printing presses or codex books, but somehow we
  1622. got Shakespeare and Montaigne.</p>
  1623. <hr />
  1624. <h1><a name=one-of-these-things-is-not-like-the-others href=#one-of-these-things-is-not-like-the-others>#</a> 6. one of these things is not like the others</h1>
  1625. <p>If you&rsquo;re the sort of person who took a few detours into the history of
  1626. religion in college, you might be familiar with some of the ways people used to
  1627. do textual comparison. When pen, paper, and typesetting were what scholars had
  1628. to work with, they did some fairly sophisticated things in order to expose the
  1629. relationships between multiple pieces of text.</p>
  1630. <p style="text-align:center;"> <img src="images/throckmorton_small.jpg" height=320 width=470></p>
  1631. <p>Here&rsquo;s a book I got in college: <em>Gospel Parallels: A Comparison of the
  1632. Synoptic Gospels</em>, Burton H. Throckmorton, Jr., Ed. It breaks up three books
  1633. from the New Testament by the stories and themes that they contain, and shows
  1634. the overlapping sections of each book that contain parallel texts. You can
  1635. work your way through and see what parts only show up in one book, or in two
  1636. but not the other, or in all three. Pages are arranged like so:</p>
  1637. <pre>
  1638. § JESUS DOES SOME STUFF
  1639. ________________________________________________
  1640. | MAT | MAR | LUK |
  1641. |-----------------+--------------------+---------|
  1642. | Stuff | | |
  1643. | | Stuff | |
  1644. | | Stuff | Stuff |
  1645. | | Stuff | |
  1646. | | Stuff | |
  1647. | | | |
  1648. </pre>
  1649. <p>The way I understand it, a book like this one only scratches the surface of the
  1650. field. Tools like this support a lot of theory about which books copied each
  1651. other and how, and what other sources they might have copied that we&rsquo;ve since
  1652. lost.</p>
  1653. <p>This is some <em>incredibly</em> dry material, even if you kind of dig thinking about
  1654. the questions it addresses. It takes a special temperament to actually sit
  1655. poring over fragmentary texts in ancient languages and do these painstaking
  1656. comparisons. Even if you&rsquo;re a writer or editor and work with a lot of
  1657. revisions of a text, there&rsquo;s a good chance you rarely do this kind of
  1658. comparison on your own work, because that shit is <em>tedious</em>.</p>
  1659. <h2><a name=diff href=#diff>#</a> diff</h2>
  1660. <p>It turns out that academics aren&rsquo;t the only people who need tools for comparing
  1661. different versions of a text. Working programmers, in fact, need to do this
  1662. <em>constantly</em>. Programmers are also happiest when putting off the <em>actual</em> task
  1663. at hand to solve some incidental problem that cropped up along the way, so by
  1664. now there are a lot of ways to say &ldquo;here&rsquo;s how this file is different from this
  1665. file&rdquo;, or &ldquo;here&rsquo;s how this file is different from itself a year ago&rdquo;.</p>
  1666. <p>Let&rsquo;s look at a couple of shell scripts from an earlier chapter:</p>
  1667. <!-- exec -->
  1668. <pre><code>$ cat ../script/okpoems
  1669. #!/bin/bash
  1670. # find all the marker files and get the name of
  1671. # the directory containing each
  1672. find ~/p1k3/archives -name 'meta-ok-poem' | xargs -n1 dirname
  1673. exit 0
  1674. </code></pre>
  1675. <!-- end -->
  1676. <!-- exec -->
  1677. <pre><code>$ cat ../script/findprop
  1678. #!/bin/bash
  1679. if [ ! $1 ]
  1680. then
  1681. echo "usage: findprop &lt;property&gt;"
  1682. exit
  1683. fi
  1684. # find all the marker files and get the name of
  1685. # the directory containing each
  1686. find ~/p1k3/archives -name $1 | xargs -n1 dirname
  1687. exit 0
  1688. </code></pre>
  1689. <!-- end -->
  1690. <p>It&rsquo;s pretty obvious these are similar files, but do we know what <em>exactly</em>
  1691. changed between them at a glance? It wouldn&rsquo;t be hard to figure out, once. If
  1692. you wanted to be really certain about it, you could print them out, set them
  1693. side by side, and go over them with a highlighter.</p>
  1694. <p>Now imagine doing that for a bunch of files, some of them hundreds or thousands
  1695. of lines long. I&rsquo;ve actually done that before, colored markers and all, but I
  1696. didn&rsquo;t feel smart while I was doing it. This is a job for software.</p>
  1697. <!-- exec -->
  1698. <pre><code>$ diff ../script/okpoems ../script/findprop
  1699. 2a3,8
  1700. &gt; if [ ! $1 ]
  1701. &gt; then
  1702. &gt; echo "usage: findprop &lt;property&gt;"
  1703. &gt; exit
  1704. &gt; fi
  1705. &gt;
  1706. 5c11
  1707. &lt; find ~/p1k3/archives -name 'meta-ok-poem' | xargs -n1 dirname
  1708. ---
  1709. &gt; find ~/p1k3/archives -name $1 | xargs -n1 dirname
  1710. </code></pre>
  1711. <!-- end -->
  1712. <p>That&rsquo;s not the most human-friendly output, but it&rsquo;s a little simpler than it
  1713. seems at first glance. It&rsquo;s basically just a way of describing the changes
  1714. needed to turn <code>okpoems</code> into <code>findprop</code>. The string <code>2a3,8</code> can be read as
  1715. &ldquo;at line 2, add lines 3 through 8&rdquo;. Lines with a <code>&gt;</code> in front of them are
  1716. added. <code>5c11</code> can be read as &ldquo;line 5 in the original file becomes line 11 in
  1717. the new file&rdquo;, and the <code>&lt;</code> line is replaced with the <code>&gt;</code> line. If you wanted,
  1718. you could take a copy of the original file and apply these instructions by hand
  1719. in your text editor, and you&rsquo;d wind up with the new file.</p>
  1720. <p>A lot of people (me included) prefer what&rsquo;s known as a &ldquo;unified&rdquo; diff, because
  1721. it&rsquo;s easier to read and offers context for the changed lines. We can ask for
  1722. one of these with <code>diff -u</code>:</p>
  1723. <!-- exec -->
  1724. <pre><code>$ diff -u ../script/okpoems ../script/findprop
  1725. --- ../script/okpoems 2014-04-19 00:08:03.321230818 -0600
  1726. +++ ../script/findprop 2014-04-21 21:51:29.360846449 -0600
  1727. @@ -1,7 +1,13 @@
  1728. #!/bin/bash
  1729. +if [ ! $1 ]
  1730. +then
  1731. + echo "usage: findprop &lt;property&gt;"
  1732. + exit
  1733. +fi
  1734. +
  1735. # find all the marker files and get the name of
  1736. # the directory containing each
  1737. -find ~/p1k3/archives -name 'meta-ok-poem' | xargs -n1 dirname
  1738. +find ~/p1k3/archives -name $1 | xargs -n1 dirname
  1739. exit 0
  1740. </code></pre>
  1741. <!-- end -->
  1742. <p>That&rsquo;s a little longer, and has some metadata we might not always care about,
  1743. but if you look for lines starting with <code>+</code> and <code>-</code>, it&rsquo;s easy to read as
  1744. &ldquo;added these, took away these&rdquo;. This diff tells us at a glance that we added
  1745. some lines to complain if we didn&rsquo;t get a command line argument, and replaced
  1746. <code>'meta-ok-poem'</code> in the <code>find</code> command with that argument. Since it shows us
  1747. some context, we have a pretty good idea where those lines are in the file
  1748. and what they&rsquo;re for.</p>
  1749. <p>What if we don&rsquo;t care exactly <em>how</em> the files differ, but only whether they
  1750. do?</p>
  1751. <!-- exec -->
  1752. <pre><code>$ diff -q ../script/okpoems ../script/findprop
  1753. Files ../script/okpoems and ../script/findprop differ
  1754. </code></pre>
  1755. <!-- end -->
  1756. <p>I use <code>diff</code> a lot in the course of my day job, because I spend a lot of time
  1757. needing to know just how two programs differ. Just as importantly, I often
  1758. need to know how (or whether!) the <em>output</em> of programs differs. As a concrete
  1759. example, I want to make sure that <code>findprop meta-ok-poem</code> is really a suitable
  1760. replacement for <code>okpoems</code>. Since I expect their output to be identical, I can
  1761. do this:</p>
  1762. <!-- exec -->
  1763. <pre><code>$ ../script/okpoems &gt; okpoem_output
  1764. </code></pre>
  1765. <!-- end -->
  1766. <!-- exec -->
  1767. <pre><code>$ ../script/findprop meta-ok-poem &gt; findprop_output
  1768. </code></pre>
  1769. <!-- end -->
  1770. <!-- exec -->
  1771. <pre><code>$ diff -s okpoem_output findprop_output
  1772. Files okpoem_output and findprop_output are identical
  1773. </code></pre>
  1774. <!-- end -->
  1775. <p>The <code>-s</code> just means that <code>diff</code> should explicitly tell us if files are the
  1776. <strong>s</strong>ame. Otherwise, it&rsquo;d output nothing at all, because there aren&rsquo;t any
  1777. differences.</p>
  1778. <p>As with many other tools, <code>diff</code> doesn&rsquo;t very much care whether it&rsquo;s looking at
  1779. shell scripts or a list of filenames or what-have-you. If you read the man
  1780. page, you&rsquo;ll find some features geared towards people writing C-like
  1781. programming languages, but its real specialty is just text files with lines
  1782. made out of characters, which works well for lots of code, but certainly could
  1783. be applied to English prose.</p>
  1784. <p>Since I have a couple of versions ready to hand, let&rsquo;s apply this to a text
  1785. with some well-known variations and a bit of a literary legacy. Here&rsquo;s the
  1786. first day of the Genesis creation narrative in a couple of English
  1787. translations:</p>
  1788. <!-- exec -->
  1789. <pre><code>$ cat genesis_nkj
  1790. In the beginning God created the heavens and the earth. The earth was without
  1791. form, and void; and darkness was on the face of the deep. And the Spirit of
  1792. God was hovering over the face of the waters. Then God said, "Let there be
  1793. light"; and there was light. And God saw the light, that it was good; and God
  1794. divided the light from the darkness. God called the light Day, and the darkness
  1795. He called Night. So the evening and the morning were the first day.
  1796. </code></pre>
  1797. <!-- end -->
  1798. <!-- exec -->
  1799. <pre><code>$ cat genesis_nrsv
  1800. In the beginning when God created the heavens and the earth, the earth was a
  1801. formless void and darkness covered the face of the deep, while a wind from
  1802. God swept over the face of the waters. Then God said, "Let there be light";
  1803. and there was light. And God saw that the light was good; and God separated
  1804. the light from the darkness. God called the light Day, and the darkness he
  1805. called Night. And there was evening and there was morning, the first day.
  1806. </code></pre>
  1807. <!-- end -->
  1808. <p>What happens if we diff them?</p>
  1809. <!-- exec -->
  1810. <pre><code>$ diff -u genesis_nkj genesis_nrsv
  1811. --- genesis_nkj 2014-05-11 16:28:29.692508461 -0600
  1812. +++ genesis_nrsv 2014-05-11 16:28:29.744508459 -0600
  1813. @@ -1,6 +1,6 @@
  1814. -In the beginning God created the heavens and the earth. The earth was without
  1815. -form, and void; and darkness was on the face of the deep. And the Spirit of
  1816. -God was hovering over the face of the waters. Then God said, "Let there be
  1817. -light"; and there was light. And God saw the light, that it was good; and God
  1818. -divided the light from the darkness. God called the light Day, and the darkness
  1819. -He called Night. So the evening and the morning were the first day.
  1820. +In the beginning when God created the heavens and the earth, the earth was a
  1821. +formless void and darkness covered the face of the deep, while a wind from
  1822. +God swept over the face of the waters. Then God said, "Let there be light";
  1823. +and there was light. And God saw that the light was good; and God separated
  1824. +the light from the darkness. God called the light Day, and the darkness he
  1825. +called Night. And there was evening and there was morning, the first day.
  1826. </code></pre>
  1827. <!-- end -->
  1828. <p>Kind of useless, right? If a given line differs by so much as a character,
  1829. it&rsquo;s not the same line. This highlights the limitations of <code>diff</code> for comparing
  1830. things that</p>
  1831. <ul>
  1832. <li>aren&rsquo;t logically grouped by line</li>
  1833. <li>aren&rsquo;t easily thought of as versions of the same text with some lines changed</li>
  1834. </ul>
  1835. <p>We could edit the files into a more logically defined structure, like
  1836. one-line-per-verse, and try again:</p>
  1837. <!-- exec -->
  1838. <pre><code>$ diff -u genesis_nkj_by_verse genesis_nrsv_by_verse
  1839. --- genesis_nkj_by_verse 2014-05-11 16:51:14.312457198 -0600
  1840. +++ genesis_nrsv_by_verse 2014-05-11 16:53:02.484453134 -0600
  1841. @@ -1,5 +1,5 @@
  1842. -In the beginning God created the heavens and the earth.
  1843. -The earth was without form, and void; and darkness was on the face of the deep. And the Spirit of God was hovering over the face of the waters.
  1844. +In the beginning when God created the heavens and the earth,
  1845. +the earth was a formless void and darkness covered the face of the deep, while a wind from God swept over the face of the waters.
  1846. Then God said, "Let there be light"; and there was light.
  1847. -And God saw the light, that it was good; and God divided the light from the darkness.
  1848. -God called the light Day, and the darkness He called Night. So the evening and the morning were the first day.
  1849. +And God saw that the light was good; and God separated the light from the darkness.
  1850. +God called the light Day, and the darkness he called Night. And there was evening and there was morning, the first day.
  1851. </code></pre>
  1852. <!-- end -->
  1853. <p>It might be a little more descriptive, but editing all that text just for a
  1854. quick comparison felt suspiciously like work, and anyway the output still
  1855. doesn&rsquo;t seem very useful.</p>
  1856. <h2><a name=wdiff href=#wdiff>#</a> wdiff</h2>
  1857. <p>For cases like this, I&rsquo;m fond of a tool called <code>wdiff</code>:</p>
  1858. <!-- exec -->
  1859. <pre><code>$ wdiff genesis_nkj genesis_nrsv
  1860. In the beginning {+when+} God created the heavens and the [-earth. The-] {+earth, the+} earth was [-without
  1861. form, and void;-] {+a
  1862. formless void+} and darkness [-was on-] {+covered+} the face of the [-deep. And the Spirit of-] {+deep, while a wind from+}
  1863. God [-was hovering-] {+swept+} over the face of the waters. Then God said, "Let there be light";
  1864. and there was light. And God saw [-the light,-] that [-it-] {+the light+} was good; and God
  1865. [-divided-] {+separated+}
  1866. the light from the darkness. God called the light Day, and the darkness
  1867. [-He-] {+he+}
  1868. called Night. [-So the-] {+And there was+} evening and [-the morning were-] {+there was morning,+} the first day.
  1869. </code></pre>
  1870. <!-- end -->
  1871. <p>Deleted words are surrounded by <code>[- -]</code> and inserted ones by <code>{+ +}</code>. You can
  1872. even ask it to spit out HTML tags for insertion and deletion&hellip;</p>
  1873. <pre><code>$ wdiff -w '&lt;del&gt;' -x '&lt;/del&gt;' -y '&lt;ins&gt;' -z '&lt;/ins&gt;' genesis_nkj genesis_nrsv
  1874. </code></pre>
  1875. <p>&hellip;and come up with something your browser will render like this:</p>
  1876. <blockquote>
  1877. <p>In the beginning <ins>when</ins> God created the heavens and the <del>earth. The</del> <ins>earth, the</ins> earth was <del>without
  1878. form, and void;</del> <ins>a
  1879. formless void</ins> and darkness <del>was on</del> <ins>covered</ins> the face of the <del>deep. And the Spirit of</del> <ins>deep, while a wind from</ins>
  1880. God <del>was hovering</del> <ins>swept</ins> over the face of the waters. Then God said, "Let there be light";
  1881. and there was light. And God saw <del>the light,</del> that <del>it</del> <ins>the light</ins> was good; and God
  1882. <del>divided</del> <ins>separated</ins>
  1883. the light from the darkness. God called the light Day, and the darkness
  1884. <del>He</del> <ins>he</ins>
  1885. called Night. <del>So the</del> <ins>And there was</ins> evening and <del>the morning were</del> <ins>there was morning,</ins> the first day.</p>
  1886. </blockquote>
  1887. <p>Burton H. Throckmorton, Jr. this ain&rsquo;t. Still, it has its uses.</p>
  1888. <hr />
  1889. <h1><a name=the-command-line-as-as-a-shared-world href=#the-command-line-as-as-a-shared-world>#</a> 7. the command line as as a shared world</h1>
  1890. <p>In an earlier chapter, I wrote:</p>
  1891. <blockquote><p>You can think of the shell as a kind of environment you inhabit, in much
  1892. the way your character inhabits an adventure game.</p></blockquote>
  1893. <p>It turns out that sometimes there are other human inhabitants of this
  1894. environment.</p>
  1895. <p>Unix was built on a model known as &ldquo;time-sharing&rdquo;. This is an idea with a lot
  1896. of history, but the very short version is that when computers were rare and
  1897. expensive, it made sense for lots of people to be able to use them at once.
  1898. This is part of the story of how ideas like e-mail and chat were originally
  1899. born, well before networks took over the world: As ways for the many users of
  1900. one computer to communicate on the same machine.</p>
  1901. <p>Says Dennis Ritchie:</p>
  1902. <blockquote><p>What we wanted to preserve was not just a good environment in which to do
  1903. programming, but a system around which a fellowship could form. We knew from
  1904. experience that the essence of communal computing, as supplied by
  1905. remote-access, time-shared machines, is not just to type programs into a
  1906. terminal instead of a keypunch, but to encourage close communication.</p></blockquote>
  1907. <p>Times have changed, and while it&rsquo;s mundane to use software that&rsquo;s shared
  1908. between many users, it&rsquo;s not nearly as common as it once was for a bunch of us
  1909. to be logged into the same computer all at once.</p>
  1910. <p style="text-align:center;"> ★</p>
  1911. <p>In the mid 1990s, when I was first exposed to Unix, it was by opening up a
  1912. program called NCSA Telnet on one of the Macs at school and connecting to a
  1913. server called mother.esu1.k12.ne.us.</p>
  1914. <p>NCSA Telnet was a terminal, not unlike the kind that you use to open a shell on
  1915. your own Linux computer, a piece of software that itself emulated actual,
  1916. physical hardware from an earlier era. Hardware terminals were basically very
  1917. simple computers with keyboards, screens, and just enough networking brains to
  1918. talk to a <em>real</em> computer somewhere else. You&rsquo;ll still come across these
  1919. scattered around big institutional environments. The last time I looked over
  1920. the shoulder of an airline checkin desk clerk, for example, I saw green
  1921. monochrome text that was probably coming from an IBM mainframe somewhere
  1922. far away.</p>
  1923. <p>Part of what was exciting about being logged into a computer somewhere else
  1924. was that you could <em>talk to people</em>.</p>
  1925. <p style="text-align:center;"> ★</p>
  1926. <p><em>{This chapter is a work in progress.}</em></p>
  1927. <hr />
  1928. <h1><a name=the-command-line-and-the-web href=#the-command-line-and-the-web>#</a> 8. the command line and the web</h1>
  1929. <p>Web browsers are really complicated these days. They&rsquo;re full of rendering
  1930. engines, audio and video players, programming languages, development tools,
  1931. databases &mdash; you name it, and there&rsquo;s a fair chance it&rsquo;s in there somewhere.
  1932. The modern web browser is kitchen sink software, and to make matters worse, it
  1933. is <em>totally surrounded</em> by technobabble. It can take <em>years</em> to come to terms
  1934. with the ocean of words about web stuff and sort out the meaningful ones from
  1935. the snake oil and bureaucratic mysticism.</p>
  1936. <p>All of which can make the web itself seem like a really complicated landscape,
  1937. and obscure the simplicity of its basic design, which is this:</p>
  1938. <p>Some programs pass text around to one another.</p>
  1939. <p>Which might sound familiar.</p>
  1940. <p>The gist of it is that the web is made out of URLs, &ldquo;Uniform Resource
  1941. Locators&rdquo;, which are paths to things. If you squint, these look kind of like
  1942. paths to files on your filesystem. When you visit a URL in your browser, it
  1943. asks a server for a certain path, and the server gives it back some text. When
  1944. you click a button to submit a form, your browser sends some text to the server
  1945. and waits to see what it says back. The text that gets passed around is
  1946. (usually) written in a language with particular significance to web browsers,
  1947. but if you look at it directly, it&rsquo;s a format that humans can understand.</p>
  1948. <p>Let&rsquo;s illustrate this. I&rsquo;ve written a really simple web page that lives at
  1949. <a href="http://p1k3.com/hello_world.html"><code>http://p1k3.com/hello_world.html</code></a>.</p>
  1950. <pre><code>$ curl 'https://p1k3.com/hello_world.html'
  1951. &lt;html&gt;
  1952. &lt;head&gt;
  1953. &lt;title&gt;hello, world&lt;/title&gt;
  1954. &lt;/head&gt;
  1955. &lt;body&gt;
  1956. &lt;h1&gt;hi everybody&lt;/h1&gt;
  1957. &lt;p&gt;How are things?&lt;/p&gt;
  1958. &lt;/body&gt;
  1959. &lt;/html&gt;
  1960. </code></pre>
  1961. <p><code>curl</code> is a program with lots and lots of features &mdash; it too is a little bit
  1962. of a kitchen sink &mdash; but it has one core purpose, which is to grab things from
  1963. URLs and spit them back out. It&rsquo;s a little bit like <code>cat</code> for things that live
  1964. on the web. Try the above command with just about any URL you can think of,
  1965. and you&rsquo;ll probably get <em>something</em> back. Let&rsquo;s try this book:</p>
  1966. <pre><code>$ curl 'https://p1k3.com/userland-book/' | head
  1967. &lt;!DOCTYPE html&gt;
  1968. &lt;html lang=en&gt;
  1969. &lt;head&gt;
  1970. &lt;meta charset="utf-8"&gt;
  1971. &lt;title&gt;userland: a book about the command line for humans&lt;/title&gt;
  1972. &lt;link rel=stylesheet href="userland.css" /&gt;
  1973. &lt;script src="js/jquery.js" type="text/javascript"&gt;&lt;/script&gt;
  1974. &lt;/head&gt;
  1975. &lt;body&gt;
  1976. </code></pre>
  1977. <p><code>hello_world.html</code> and <code>userland-book</code> are both written in HyperText Markup
  1978. Language. HTML is just text with a specific kind of structure. It&rsquo;s been
  1979. around for quite a while now, and has grown up a lot in 20 years, but at heart
  1980. it still looks a lot <a href="http://info.cern.ch/hypertext/WWW/TheProject.html">like it did in 1991</a>.</p>
  1981. <p>The basic idea is that the contents of a web page are marked up with tags.
  1982. A tag looks like this:</p>
  1983. <pre><code>&lt;title&gt;hi!&lt;/title&gt; -,
  1984. | | |
  1985. | `- content |
  1986. | `- closing tag
  1987. `-opening tag
  1988. </code></pre>
  1989. <p>Sometimes you&rsquo;ll see tags with what are known as &ldquo;attributes&rdquo;:</p>
  1990. <pre><code>&lt;a href="https://p1k3.com/userland-book"&gt;userland&lt;/a&gt;
  1991. </code></pre>
  1992. <p>This is how links are written in HTML. <code>href="..."</code> tells the browser where to
  1993. go when the user clicks on &ldquo;<a href="http://p1k3.com/userland-book">userland</a>&rdquo;.</p>
  1994. <p>Tags are a way to describe not so much what something <em>looks like</em> as what
  1995. something <em>means</em>. Browsers are, in large part, big collections of knowledge
  1996. about the meanings of tags and ways to represent those meanings.</p>
  1997. <p>While the browser you use day-to-day has (probably) a graphical interface and
  1998. does all sorts of things impossible to render in a terminal, some of the
  1999. earliest web browsers were entirely text-based, and text-mode browsers still
  2000. exist. Lynx, which originated at the University of Kansas in the early 1990s,
  2001. is still actively maintained:</p>
  2002. <pre><code>$ lynx -dump 'http://p1k3.com/userland-book/' | head
  2003. userland
  2004. __________________________________________________________________
  2005. [1]# a book about the command line for humans
  2006. Late last year, [2]a side trip into text utilities got me thinking
  2007. about how much my writing habits depend on the Linux command line. This
  2008. struck me as a good hook for talking about the tools I use every day
  2009. with an audience of mixed technical background.
  2010. </code></pre>
  2011. <p>If you invoke Lynx without any options, it&rsquo;ll start up in interactive mode, and
  2012. you can navigate between links with the arrow keys. <code>lynx -dump</code> spits a
  2013. rendered version of a page to standard output, with links annotated in square
  2014. brackets and printed as footnotes. Another useful option here is <code>-listonly</code>,
  2015. which will print just the list of links contained within a page:</p>
  2016. <pre><code>$ lynx -dump -listonly 'http://p1k3.com/userland-book/' | head
  2017. References
  2018. 2. http://p1k3.com/2013/8/4
  2019. 3. http://p1k3.com/userland-book.git
  2020. 4. https://github.com/brennen/userland-book
  2021. 5. http://p1k3.com/userland-book/
  2022. 6. https://twitter.com/brennen
  2023. 9. http://p1k3.com/userland-book/#a-book-about-the-command-line-for-humans
  2024. 10. http://p1k3.com/userland-book/#copying
  2025. </code></pre>
  2026. <p>An alternative to Lynx is w3m, which copes a little more gracefully with the
  2027. complexities of modern web layout.</p>
  2028. <pre><code>$ w3m -dump 'http://p1k3.com/userland-book/' | head
  2029. userland
  2030. ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
  2031. # a book about the command line for humans
  2032. Late last year, a side trip into text utilities got me thinking about how much
  2033. my writing habits depend on the Linux command line. This struck me as a good
  2034. hook for talking about the tools I use every day with an audience of mixed
  2035. technical background.
  2036. </code></pre>
  2037. <p>Neither of these tools can easily replace enormously capable applications like
  2038. Chrome or Firefox, but they have their place in the toolbox, and help to
  2039. demonstrate how the web is built (in part) on principles we&rsquo;ve already seen at
  2040. work.</p>
  2041. <hr />
  2042. <h1><a name=a-miscellany-of-tools-and-techniques href=#a-miscellany-of-tools-and-techniques>#</a> 9. a miscellany of tools and techniques</h1>
  2043. <h2><a name=dict href=#dict>#</a> dict</h2>
  2044. <p>Want to know the definition of a word, or find useful synonyms?</p>
  2045. <pre><code>$ dict concatenate | head -10
  2046. 4 definitions found
  2047. From The Collaborative International Dictionary of English v.0.48 [gcide]:
  2048. Concatenate \Con*cat"e*nate\ (k[o^]n*k[a^]t"[-e]*n[=a]t), v. t.
  2049. [imp. &amp; p. p. {Concatenated}; p. pr. &amp; vb. n.
  2050. {Concatenating}.] [L. concatenatus, p. p. of concatenare to
  2051. concatenate. See {Catenate}.]
  2052. To link together; to unite in a series or chain, as things
  2053. depending on one another.
  2054. </code></pre>
  2055. <h2><a name=aspell href=#aspell>#</a> aspell</h2>
  2056. <p>Need to interactively spell-check your presentation notes?</p>
  2057. <pre><code>$ aspell check presentation
  2058. </code></pre>
  2059. <p>Just want a list of potentially-misspelled words in a given file?</p>
  2060. <!-- exec -->
  2061. <pre><code>$ aspell list &lt; ../literary_environment/index.md | sort | uniq -ci | sort -nr | head -5
  2062. 40 td
  2063. 24 Veselka
  2064. 17 Reuel
  2065. 16 Brunner
  2066. 15 Tiptree
  2067. </code></pre>
  2068. <!-- end -->
  2069. <h2><a name=mostcommon href=#mostcommon>#</a> mostcommon</h2>
  2070. <p>Something like that last sequence sure does seem to show up a lot in my work:
  2071. Spit out the <em>n</em> most common lines in the input, one way or another. Here&rsquo;s
  2072. a little script to be less repetitive about it.</p>
  2073. <!-- exec -->
  2074. <pre><code>$ aspell list &lt; ../literary_environment/index.md | ./mostcommon -i -n5
  2075. 40 td
  2076. 24 Veselka
  2077. 17 Reuel
  2078. 16 Brunner
  2079. 15 Tiptree
  2080. </code></pre>
  2081. <!-- end -->
  2082. <p>This turns out to be pretty simple:</p>
  2083. <!-- exec -->
  2084. <pre><code>$ cat ./mostcommon
  2085. #!/usr/bin/env bash
  2086. # Optionally specify number of lines to show, defaulting to 10:
  2087. TOSHOW=10
  2088. CASEOPT=""
  2089. while getopts ":in:" opt; do
  2090. case $opt in
  2091. i)
  2092. CASEOPT="-i"
  2093. ;;
  2094. n)
  2095. TOSHOW=$OPTARG
  2096. ;;
  2097. \?)
  2098. echo "Invalid option: -$OPTARG" &gt;&amp;2
  2099. exit 1
  2100. ;;
  2101. :)
  2102. echo "Option -$OPTARG requires an argument." &gt;&amp;2
  2103. exit 1
  2104. ;;
  2105. esac
  2106. done
  2107. # sort and then uniqify STDIN,
  2108. # sort numerically on the first field,
  2109. # chop off everything but $TOSHOW lines of input
  2110. sort &lt; /dev/stdin | uniq -c $CASEOPT | sort -k1 -nr | head -$TOSHOW
  2111. </code></pre>
  2112. <!-- end -->
  2113. <p>Notice, though, that it doesn&rsquo;t handle opening files directly. If you wanted
  2114. to find the most common lines in a file with it, you&rsquo;d have to say something
  2115. like <code>mostcommon &lt; filename</code> in order to redirect the file to <code>mostcommon</code>&rsquo;s
  2116. input.</p>
  2117. <p>Also notice that most of the script is boilerplate for handling a couple of
  2118. options. The work is all done in a oneliner. Worth it? Maybe not, but an
  2119. interesting exercise.</p>
  2120. <h2><a name=cal-and-ncal href=#cal-and-ncal>#</a> cal and ncal</h2>
  2121. <p>Want to know what the calendar looks like for this month?</p>
  2122. <pre><code>$ cal
  2123. April 2014
  2124. Su Mo Tu We Th Fr Sa
  2125. 1 2 3 4 5
  2126. 6 7 8 9 10 11 12
  2127. 13 14 15 16 17 18 19
  2128. 20 21 22 23 24 25 26
  2129. 27 28 29 30
  2130. </code></pre>
  2131. <p>How about for September, 1950, in a more compact format?</p>
  2132. <!-- exec -->
  2133. <pre><code>$ ncal -m9 1950
  2134. September 1950
  2135. Su 3 10 17 24
  2136. Mo 4 11 18 25
  2137. Tu 5 12 19 26
  2138. We 6 13 20 27
  2139. Th 7 14 21 28
  2140. Fr 1 8 15 22 29
  2141. Sa 2 9 16 23 30
  2142. </code></pre>
  2143. <!-- end -->
  2144. <p>Need to know the date of Easter this year?</p>
  2145. <!-- exec -->
  2146. <pre><code>$ ncal -e
  2147. April 20 2014
  2148. </code></pre>
  2149. <!-- end -->
  2150. <h2><a name=seq href=#seq>#</a> seq</h2>
  2151. <p>Need the numbers 1-5?</p>
  2152. <!-- exec -->
  2153. <pre><code>$ seq 1 5
  2154. 1
  2155. 2
  2156. 3
  2157. 4
  2158. 5
  2159. </code></pre>
  2160. <!-- end -->
  2161. <h2><a name=shuf href=#shuf>#</a> shuf</h2>
  2162. <p>Want to shuffle some lines?</p>
  2163. <!-- exec -->
  2164. <pre><code>$ seq 1 5 | shuf
  2165. 2
  2166. 1
  2167. 4
  2168. 3
  2169. 5
  2170. </code></pre>
  2171. <!-- end -->
  2172. <h2><a name=ptx href=#ptx>#</a> ptx</h2>
  2173. <p>Want to make a <a href="http://en.wikipedia.org/wiki/Key_Word_in_Context">permuted index</a> of some phrase?</p>
  2174. <!-- exec -->
  2175. <pre><code>$ echo 'i like american music' | ptx
  2176. i like american music
  2177. i like american music
  2178. i like american music
  2179. i like american music
  2180. </code></pre>
  2181. <!-- end -->
  2182. <h2><a name=figlet href=#figlet>#</a> figlet</h2>
  2183. <p>Need to make ASCII art of some giant letters?</p>
  2184. <!-- exec -->
  2185. <pre><code>$ figlet "R T F M"
  2186. ____ _____ _____ __ __
  2187. | _ \ |_ _| | ___| | \/ |
  2188. | |_) | | | | |_ | |\/| |
  2189. | _ &lt; | | | _| | | | |
  2190. |_| \_\ |_| |_| |_| |_|
  2191. </code></pre>
  2192. <!-- end -->
  2193. <h2><a name=cowsay href=#cowsay>#</a> cowsay</h2>
  2194. <p>How about ASCII art of a <del>cow</del> dragon saying something?</p>
  2195. <!-- exec -->
  2196. <pre><code>$ cowsay -f dragon "RTFM, man"
  2197. ___________
  2198. &lt; RTFM, man &gt;
  2199. -----------
  2200. \ / \ //\
  2201. \ |\___/| / \// \\
  2202. /0 0 \__ / // | \ \
  2203. / / \/_/ // | \ \
  2204. @_^_@'/ \/_ // | \ \
  2205. //_^_/ \/_ // | \ \
  2206. ( //) | \/// | \ \
  2207. ( / /) _|_ / ) // | \ _\
  2208. ( // /) '/,_ _ _/ ( ; -. | _ _\.-~ .-~~~^-.
  2209. (( / / )) ,-{ _ `-.|.-~-. .~ `.
  2210. (( // / )) '/\ / ~-. _ .-~ .-~^-. \
  2211. (( /// )) `. { } / \ \
  2212. (( / )) .----~-.\ \-' .~ \ `. \^-.
  2213. ///.----..&gt; \ _ -~ `. ^-` ^-_
  2214. ///-._ _ _ _ _ _ _}^ - - - - ~ ~-- ,.-~
  2215. /.-~
  2216. </code></pre>
  2217. <!-- end -->
  2218. <hr />
  2219. <h1><a name=endmatter href=#endmatter>#</a> endmatter</h1>
  2220. <h2><a name=further-reading href=#further-reading>#</a> further reading</h2>
  2221. <ul>
  2222. <li><em>The Unix Programming Environment</em> - Brian W. Kernighan, Rob Pike</li>
  2223. <li><a href="http://cm.bell-labs.com/cm/cs/who/dmr/hist.html">The Evolution of the Unix Time-sharing System</a> - Dennis M. Ritchie</li>
  2224. <li><a href="https://www.youtube.com/watch?v=tc4ROCJYbm0">AT&amp;T Archives: The UNIX Operating System</a> (YouTube)</li>
  2225. <li><a href="https://medium.com/message/tilde-club-i-had-a-couple-drinks-and-woke-up-with-1-000-nerds-a8904f0a2ebf">I had a couple drinks and woke up with 1,000 nerds</a> - Paul Ford</li>
  2226. </ul>
  2227. <h2><a name=code href=#code>#</a> code</h2>
  2228. <p>As of July 2018, source for this work can be found <a
  2229. href="https://code.p1k3.com/gitea/brennen/userland-book">on code.p1k3.com</a>.
  2230. I welcome feedback there, <a href="https://mastodon.social/brennen">on
  2231. Mastodon</a>, or by mail to userland@p1k3.com.</p>
  2232. <h2><a name=copying href=#copying>#</a> copying</h2>
  2233. <p>This work is licensed under a
  2234. <a rel="license" href="https://creativecommons.org/licenses/by-sa/4.0/">Creative
  2235. Commons Attribution-ShareAlike 4.0 International License</a>.</p>
  2236. <p><a rel="license" href="https://creativecommons.org/licenses/by-sa/4.0/">
  2237. <img alt="Creative Commons License" src="images/by-sa-4.png" />
  2238. </a></p>
  2239. <hr />
  2240. <script>
  2241. $(document).ready(function () {
  2242. // ☜ ☝ ☞ ☟ ☆ ✠ ✡ ✢ ✣ ✤ ✥ ✦ ✧ ✩ ✪
  2243. var closed_sigil = 'show';
  2244. var open_sigil = 'hide';
  2245. var togglesigil = function (elem) {
  2246. var sigil = $(elem).html();
  2247. if (sigil === closed_sigil) {
  2248. $(elem).html(open_sigil);
  2249. } else {
  2250. $(elem).html(closed_sigil);
  2251. }
  2252. };
  2253. $(".details").each(function () {
  2254. var $this = $(this);
  2255. var $button = $('<button class=clicker-button>' + closed_sigil + '</button>');
  2256. var $details_full = $(this).find('.full');
  2257. $button.click(function (e) {
  2258. e.preventDefault();
  2259. $details_full.toggle({
  2260. duration: 550
  2261. });
  2262. togglesigil(this);
  2263. });
  2264. $(this).find('.clicker').append($button);
  2265. $button.show();
  2266. });
  2267. $('.details .full').hide();
  2268. });
  2269. </script>
  2270. </body>
  2271. </html>