A book about the command line for humans.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

index.html 112KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188
  1. <!DOCTYPE html>
  2. <html lang=en>
  3. <head>
  4. <meta charset="utf-8">
  5. <title>userland: a book about the command line for humans</title>
  6. <link rel=stylesheet href="userland.css" />
  7. <link rel="alternate" type="application/atom+xml" title="changes" href="//p1k3.com/userland-book/feed.xml" />
  8. <script src="js/jquery.js" type="text/javascript"></script>
  9. </head>
  10. <body>
  11. <h1 class=bigtitle>userland</h1>
  12. <hr />
  13. <h1><a name=a-book-about-the-command-line-for-humans href=#a-book-about-the-command-line-for-humans>#</a> a book about the command line for humans</h1>
  14. <p>In the fall of 2013, <a href="//p1k3.com/2013/8/4">thinking about</a> text utilities got
  15. me thinking in turn about how my writing habits depend on the Linux command
  16. line. This seems like a good hook for explaining some tools I use every day,
  17. so now I&rsquo;m writing a short, haphazard book.</p>
  18. <p>This isn&rsquo;t a book about system administration, writing complex software, or
  19. becoming a wizard. I am not a wizard, and I don&rsquo;t subscribe to the idea that
  20. wizardry is required to use these tools. In fact, I barely know what I&rsquo;m doing
  21. most of the time. I still get some stuff done.</p>
  22. <p>This is a work in progress. It probably gets some stuff wrong.</p>
  23. <p>&ndash; bpb / <a href="https://p1k3.com">p1k3</a> / <a href="https://twitter.com/brennen">@brennen</a></p>
  24. <div class=details>
  25. <h2 class=clicker><a name=contents href=#contents>#</a> contents</h2>
  26. <div class=full>
  27. <div class=contents><ul>
  28. <li><a href="#a-book-about-the-command-line-for-humans">a book about the command line for humans</a>
  29. <ul>
  30. <li><a href="#contents">contents</a></li>
  31. </ul>
  32. </li>
  33. <li><a href="#get-you-a-shell">0. get you a shell</a>
  34. <ul>
  35. <li><a href="#ask-for-an-account-on-squiggle-city">ask for an account on squiggle.city</a></li>
  36. <li><a href="#use-a-raspberry-pi-or-beaglebone">use a raspberry pi or beaglebone</a></li>
  37. <li><a href="#use-a-virtual-machine">use a virtual machine</a></li>
  38. </ul>
  39. </li>
  40. <li><a href="#the-command-line-as-literary-environment">1. the command line as literary environment</a>
  41. <ul>
  42. <li><a href="#terms-and-definitions">terms and definitions</a></li>
  43. <li><a href="#twisty-little-passages">twisty little passages</a></li>
  44. <li><a href="#cat">cat</a></li>
  45. <li><a href="#wildcards">wildcards</a></li>
  46. <li><a href="#sort">sort</a></li>
  47. <li><a href="#options">options</a></li>
  48. <li><a href="#uniq">uniq</a></li>
  49. <li><a href="#standard-IO">standard IO</a></li>
  50. <li><a href="#code-help-code-and-man-pages"><code>&ndash;help</code> and man pages</a></li>
  51. <li><a href="#wc">wc</a></li>
  52. <li><a href="#head-tail-and-cut">head, tail, and cut</a></li>
  53. <li><a href="#tab-separated-values">tab separated values</a></li>
  54. <li><a href="#finding-text-grep">finding text: grep</a></li>
  55. <li><a href="#now-you-have-n-problems">now you have n problems</a></li>
  56. </ul>
  57. </li>
  58. <li><a href="#a-literary-problem">2. a literary problem</a></li>
  59. <li><a href="#programmerthink">3. programmerthink</a></li>
  60. <li><a href="#script">4. script</a>
  61. <ul>
  62. <li><a href="#learn-you-an-editor">learn you an editor</a></li>
  63. <li><a href="#d-i-y-utilities">d.i.y. utilities</a></li>
  64. <li><a href="#heavy-lifting">heavy lifting</a></li>
  65. <li><a href="#generality">generality</a></li>
  66. </ul>
  67. </li>
  68. <li><a href="#general-purpose-programmering">5. general purpose programmering</a></li>
  69. <li><a href="#one-of-these-things-is-not-like-the-others">6. one of these things is not like the others</a>
  70. <ul>
  71. <li><a href="#diff">diff</a></li>
  72. <li><a href="#wdiff">wdiff</a></li>
  73. </ul>
  74. </li>
  75. <li><a href="#the-command-line-as-as-a-shared-world">7. the command line as as a shared world</a></li>
  76. <li><a href="#the-command-line-and-the-web">8. the command line and the web</a></li>
  77. <li><a href="#a-miscellany-of-tools-and-techniques">9. a miscellany of tools and techniques</a>
  78. <ul>
  79. <li><a href="#dict">dict</a></li>
  80. <li><a href="#aspell">aspell</a></li>
  81. <li><a href="#mostcommon">mostcommon</a></li>
  82. <li><a href="#cal-and-ncal">cal and ncal</a></li>
  83. <li><a href="#seq">seq</a></li>
  84. <li><a href="#shuf">shuf</a></li>
  85. <li><a href="#ptx">ptx</a></li>
  86. <li><a href="#figlet">figlet</a></li>
  87. <li><a href="#cowsay">cowsay</a></li>
  88. </ul>
  89. </li>
  90. <li><a href="#endmatter">endmatter</a>
  91. <ul>
  92. <li><a href="#further-reading">further reading</a></li>
  93. <li><a href="#code">code</a></li>
  94. <li><a href="#copying">copying</a></li>
  95. </ul>
  96. </li>
  97. </ul>
  98. </div>
  99. </div>
  100. </div>
  101. <hr />
  102. <h1><a name=get-you-a-shell href=#get-you-a-shell>#</a> 0. get you a shell</h1>
  103. <p>You don&rsquo;t have to have a shell at hand to get something out of this book.
  104. Still, as with most practical subjects, you&rsquo;ll learn more if you try things out
  105. as you go. You shouldn&rsquo;t feel guilty about skipping this section. It will
  106. always be here later if you need it.</p>
  107. <p>Not so long ago, it was common for schools and ISPs to hand out shell accounts
  108. on big shared systems. People learned the command line as a side effect of
  109. reading e-mail.</p>
  110. <p>That doesn&rsquo;t happen as often now, but in the meanwhile computers have become
  111. relatively cheap and free software is abundant. If you&rsquo;re reading this on the
  112. web, you can probably get access to a shell. Some options follow.</p>
  113. <h2><a name=ask-for-an-account-on-squiggle-city href=#ask-for-an-account-on-squiggle-city>#</a> ask for an account on squiggle.city</h2>
  114. <p><a href="http://squiggle.city/">squiggle.city</a> is a server I&rsquo;m running explicitly for teaching
  115. purposes, modeled on <a href="http://tilde.club/">tilde.club</a>.</p>
  116. <p>You can get ahold of me by mailing bbearnes at Google&rsquo;s giant e-mail service,
  117. or on twitter <a href="https://twitter.com/brennen">as @brennen</a>. Just let me know you&rsquo;d like an account
  118. and I&rsquo;ll walk you through the basics.</p>
  119. <p>Limited time offer, contains no implied warranty of fitness or merchantability,
  120. accounts free while supplies last!</p>
  121. <h2><a name=use-a-raspberry-pi-or-beaglebone href=#use-a-raspberry-pi-or-beaglebone>#</a> use a raspberry pi or beaglebone</h2>
  122. <p>Do you have a single-board computer laying around? Perfect. If you already
  123. run the standard Raspbian, Debian on a BeagleBone, or a similar-enough Linux,
  124. you don&rsquo;t need much else. I wrote most of this text on a Raspberry Pi, and the
  125. example commands should all work there.</p>
  126. <h2><a name=use-a-virtual-machine href=#use-a-virtual-machine>#</a> use a virtual machine</h2>
  127. <p><em>To come: Instructions for running Linux in a virtual machine on your own
  128. computer or using a cloud provider like Linode, DigitalOcean, or Amazon.</em></p>
  129. <hr />
  130. <h1><a name=the-command-line-as-literary-environment href=#the-command-line-as-literary-environment>#</a> 1. the command line as literary environment</h1>
  131. <p>There&rsquo;re a lot of ways to structure an introduction to the command line. I&rsquo;m
  132. going to start with writing as a point of departure because, aside from web
  133. development, it&rsquo;s what I use a computer for most. I want to shine a light on
  134. the humane potential of ideas that are usually understood as nerd trivia.
  135. Computers have utterly transformed the practice of writing within the space of
  136. my lifetime, but it seems to me that writers as a class miss out on many of the
  137. software tools and patterns taken as a given in more &ldquo;technical&rdquo; fields.</p>
  138. <p>Writing, particularly writing of any real scope or complexity, is very much a
  139. technical task. It makes demands, both physical and psychological, of its
  140. practitioners. As with woodworkers, graphic artists, and farmers, writers
  141. exhibit strong preferences in their tools, materials, and environment, and they
  142. do so because they&rsquo;re engaged in a physically and cognitively challenging task.</p>
  143. <p>My thesis is that the modern Linux command line is a pretty good environment
  144. for working with English prose and prosody, and that maybe this will illuminate
  145. the ways it could be useful in your own work with a computer, whatever that
  146. work happens to be.</p>
  147. <h2><a name=terms-and-definitions href=#terms-and-definitions>#</a> terms and definitions</h2>
  148. <p>What software are we actually talking about when we say &ldquo;the command line&rdquo;?</p>
  149. <p>For the purposes of this discussion, we&rsquo;re talking about an environment built
  150. on a very old paradigm called Unix.</p>
  151. <p style="text-align:center;"> <img src="images/jp_unix.jpg" height=320 width=470></p>
  152. <p>&hellip;except what classical Unix really looks like is this:</p>
  153. <p style="text-align:center;"> <img src="images/blinking.gif" width=470></p>
  154. <p>The Unix-like environment we&rsquo;re going to use isn&rsquo;t very classical, really.
  155. It&rsquo;s an operating system kernel called Linux, combined with a bunch of things
  156. written by other people (people in the GNU and Debian projects, and many
  157. others). Purists will tell you that this isn&rsquo;t properly Unix at all. In
  158. strict historical terms they&rsquo;re right, or at least a certain kind of right, but
  159. for the purposes of my cultural agenda I&rsquo;m going to ignore them right now.</p>
  160. <p style="text-align:center;"> <img src="images/debian.png"></p>
  161. <p>This is what&rsquo;s called a shell. There are many different shells, but they
  162. pretty much all operate on the same idea: You navigate a filesystem and run
  163. programs by typing commands. Commands can be combined in various ways to make
  164. programs of their own, and in fact the way you use the computer is often just
  165. to write little programs that invoke other programs, turtles-all-the-way-down
  166. style.</p>
  167. <p>The standard shell these days is something called Bash, so we&rsquo;ll use Bash.
  168. It&rsquo;s what you&rsquo;ll most often see in the wild. Like most shells, Bash is ugly
  169. and stupid in more ways than it is possible to easily summarize. It&rsquo;s also an
  170. incredibly powerful and expressive piece of software.</p>
  171. <h2><a name=twisty-little-passages href=#twisty-little-passages>#</a> twisty little passages</h2>
  172. <p>Have you ever played a text-based adventure game or MUD, of the kind that
  173. describes a setting and takes commands for movement and so on? Readers of a
  174. certain age and temperament might recognize the opening of Crowther &amp; Woods'
  175. <em>Adventure</em>, the great-granddaddy of text adventure games:</p>
  176. <pre><code>YOU ARE STANDING AT THE END OF A ROAD BEFORE A SMALL BRICK BUILDING.
  177. AROUND YOU IS A FOREST. A SMALL STREAM FLOWS OUT OF THE BUILDING ANd
  178. DOWN A GULLY.
  179. &gt; GO EAST
  180. YOU ARE INSIDE A BUILDING, A WELL HOUSE FOR A LARGE SPRING.
  181. THERE ARE SOME KEYS ON THE GROUND HERE.
  182. THERE IS A SHINY BRASS LAMP NEARBY.
  183. THERE IS FOOD HERE.
  184. THERE IS A BOTTLE OF WATER HERE.
  185. </code></pre>
  186. <p>You can think of the shell as a kind of environment you inhabit, in much the
  187. way your character inhabits an adventure game. The difference is that instead
  188. of navigating around virtual rooms and hallways with commands like <code>LOOK</code> and
  189. <code>EAST</code>, you navigate between directories by typing commands like <code>ls</code> and <code>cd
  190. notes</code>:</p>
  191. <pre><code>$ ls
  192. code Downloads notes p1k3 photos scraps userland-book
  193. $ cd notes
  194. $ ls
  195. notes.txt sparkfun TODO.txt
  196. </code></pre>
  197. <p><code>ls</code> lists files. Some files are directories, which means they can contain
  198. other files, and you can step inside of them by typing <code>cd</code> (for <strong>c</strong>hange
  199. <strong>d</strong>irectory).</p>
  200. <p>In the Macintosh and Windows world, directories have been called
  201. &ldquo;folders&rdquo; for a long time now. This isn&rsquo;t the <em>worst</em> metaphor for what&rsquo;s
  202. going on, and it&rsquo;s so pervasive by now that it&rsquo;s not worth fighting about.
  203. It&rsquo;s also not exactly a <em>great</em> metaphor, since computer filesystems aren&rsquo;t
  204. built very much like the filing cabinets of yore. A directory acts a lot like
  205. a container of some sort, but it&rsquo;s an infinitely expandable one which may
  206. contain nested sub-spaces much larger than itself. Directories are frequently
  207. like the TARDIS: Bigger on the inside.</p>
  208. <h2><a name=cat href=#cat>#</a> cat</h2>
  209. <p>When you&rsquo;re in the shell, you have many tools at your disposal - programs that
  210. can be used on many different files, or chained together with other programs.
  211. They tend to have weird, cryptic names, but a lot of them do very simple
  212. things. Tasks that might be a menu item in a big program like Word, like
  213. counting the number of words in a document or finding a particular phrase, are
  214. often programs unto themselves. We&rsquo;ll start with something even more basic
  215. than that.</p>
  216. <p>Suppose you have some files, and you&rsquo;re curious what&rsquo;s in them. For example,
  217. suppose you&rsquo;ve got a list of authors you&rsquo;re planning to reference, and you just
  218. want to check its contents real quick-like. This is where our friend <code>cat</code>
  219. comes in:</p>
  220. <!-- exec -->
  221. <pre><code>$ cat authors_sff
  222. Ursula K. Le Guin
  223. Jo Walton
  224. Pat Cadigan
  225. John Ronald Reuel Tolkien
  226. Vanessa Veselka
  227. James Tiptree, Jr.
  228. John Brunner
  229. </code></pre>
  230. <!-- end -->
  231. <p>&ldquo;Why,&rdquo; you might be asking, &ldquo;is the command to dump out the contents of a file
  232. to a screen called <code>cat</code>? What do felines have to do with anything?&rdquo;</p>
  233. <p>It turns out that <code>cat</code> is actually short for &ldquo;concatenate&rdquo;, which is a long
  234. word basically meaning &ldquo;stick things together&rdquo;. In programming, we usually
  235. refer to sticking two bits of text together as &ldquo;string concatenation&rdquo;, probably
  236. because programmers like to feel like they&rsquo;re being very precise about very
  237. simple actions.</p>
  238. <p>Suppose you wanted to see the contents of a <em>set</em> of author lists:</p>
  239. <!-- exec -->
  240. <pre><code>$ cat authors_sff authors_contemporary_fic authors_nat_hist
  241. Ursula K. Le Guin
  242. Jo Walton
  243. Pat Cadigan
  244. John Ronald Reuel Tolkien
  245. Vanessa Veselka
  246. James Tiptree, Jr.
  247. John Brunner
  248. Eden Robinson
  249. Vanessa Veselka
  250. Miriam Toews
  251. Gwendolyn L. Waring
  252. </code></pre>
  253. <!-- end -->
  254. <h2><a name=wildcards href=#wildcards>#</a> wildcards</h2>
  255. <p>We&rsquo;re working with three filenames: <code>authors_sff</code>, <code>authors_contemporary_fic</code>,
  256. and <code>authors_nat_hist</code>. That&rsquo;s an awful lot of typing every time we want to do
  257. something to all three files. Fortunately, our shell offers a shorthand for
  258. &ldquo;all the files that start with <code>authors_</code>&rdquo;:</p>
  259. <!-- exec -->
  260. <pre><code>$ cat authors_*
  261. Eden Robinson
  262. Vanessa Veselka
  263. Miriam Toews
  264. Gwendolyn L. Waring
  265. Ursula K. Le Guin
  266. Jo Walton
  267. Pat Cadigan
  268. John Ronald Reuel Tolkien
  269. Vanessa Veselka
  270. James Tiptree, Jr.
  271. John Brunner
  272. </code></pre>
  273. <!-- end -->
  274. <p>In Bash-land, <code>*</code> basically means &ldquo;anything&rdquo;, and is known in the vernacular,
  275. somewhat poetically, as a &ldquo;wildcard&rdquo;. You should always be careful with
  276. wildcards, especially if you&rsquo;re doing anything destructive. They can and will
  277. surprise the unwary. Still, once you&rsquo;re used to the idea, they will save you a
  278. lot of RSI.</p>
  279. <h2><a name=sort href=#sort>#</a> sort</h2>
  280. <p>There&rsquo;s a problem here. Our author list is out of order, and thus confusing to
  281. reference. Fortunately, since one of the most basic things you can do to a
  282. list is to sort it, someone else has already solved this problem for us.
  283. Here&rsquo;s a command that will give us some organization:</p>
  284. <!-- exec -->
  285. <pre><code>$ sort authors_*
  286. Eden Robinson
  287. Gwendolyn L. Waring
  288. James Tiptree, Jr.
  289. John Brunner
  290. John Ronald Reuel Tolkien
  291. Jo Walton
  292. Miriam Toews
  293. Pat Cadigan
  294. Ursula K. Le Guin
  295. Vanessa Veselka
  296. Vanessa Veselka
  297. </code></pre>
  298. <!-- end -->
  299. <p>Does it bother you that they aren&rsquo;t sorted by last name? Me too. As a partial
  300. solution, we can ask <code>sort</code> to use the second &ldquo;field&rdquo; in each line as its sort
  301. <strong>k</strong>ey (by default, sort treats whitespace as a division between fields):</p>
  302. <!-- exec -->
  303. <pre><code>$ sort -k2 authors_*
  304. John Brunner
  305. Pat Cadigan
  306. Ursula K. Le Guin
  307. Gwendolyn L. Waring
  308. Eden Robinson
  309. John Ronald Reuel Tolkien
  310. James Tiptree, Jr.
  311. Miriam Toews
  312. Vanessa Veselka
  313. Vanessa Veselka
  314. Jo Walton
  315. </code></pre>
  316. <!-- end -->
  317. <p>That&rsquo;s closer, right? It sorted on &ldquo;Cadigan&rdquo; and &ldquo;Veselka&rdquo; instead of &ldquo;Pat&rdquo;
  318. and &ldquo;Vanessa&rdquo;. (Of course, it&rsquo;s still far from perfect, because the
  319. second field in each line isn&rsquo;t necessarily the person&rsquo;s last name.)</p>
  320. <h2><a name=options href=#options>#</a> options</h2>
  321. <p>Above, when we wanted to ask <code>sort</code> to behave differently, we gave it what is
  322. known as an option. Most programs with command-line interfaces will allow
  323. their behavior to be changed by adding various options. Options usually
  324. (but not always!) look like <code>-o</code> or <code>--option</code>.</p>
  325. <p>For example, if we wanted to see just the unique lines, irrespective of case,
  326. for a file called colors:</p>
  327. <!-- exec -->
  328. <pre><code>$ cat colors
  329. RED
  330. blue
  331. red
  332. BLUE
  333. Green
  334. green
  335. GREEN
  336. </code></pre>
  337. <!-- end -->
  338. <p>We could write this:</p>
  339. <!-- exec -->
  340. <pre><code>$ sort -uf colors
  341. blue
  342. Green
  343. RED
  344. </code></pre>
  345. <!-- end -->
  346. <p>Here <code>-u</code> stands for <strong>u</strong>nique and <code>-f</code> stands for <strong>f</strong>old case, which means
  347. to treat upper- and lower-case letters as the same for comparison purposes. You&rsquo;ll
  348. often see a group of short options following the <code>-</code> like this.</p>
  349. <h2><a name=uniq href=#uniq>#</a> uniq</h2>
  350. <p>Did you notice how Vanessa Veselka shows up twice in our list of authors?
  351. That&rsquo;s useful if we want to remember that she&rsquo;s in more than one category, but
  352. it&rsquo;s redundant if we&rsquo;re just worried about membership in the overall set of
  353. authors. We can make sure our list doesn&rsquo;t contain repeating lines by using
  354. <code>sort</code>, just like with that list of colors:</p>
  355. <!-- exec -->
  356. <pre><code>$ sort -u -k2 authors_*
  357. John Brunner
  358. Pat Cadigan
  359. Ursula K. Le Guin
  360. Gwendolyn L. Waring
  361. Eden Robinson
  362. John Ronald Reuel Tolkien
  363. James Tiptree, Jr.
  364. Miriam Toews
  365. Vanessa Veselka
  366. Jo Walton
  367. </code></pre>
  368. <!-- end -->
  369. <p>But there&rsquo;s another approach to this &mdash; <code>sort</code> is good at only displaying a line
  370. once, but suppose we wanted to see a count of how many different lists an
  371. author shows up on? <code>sort</code> doesn&rsquo;t do that, but a command called <code>uniq</code> does,
  372. if you give it the option <code>-c</code> for <strong>c</strong>ount.</p>
  373. <p><code>uniq</code> moves through the lines in its input, and if it sees a line more than
  374. once in sequence, it will only print that line once. If you have a bunch of
  375. files and you just want to see the unique lines across all of those files, you
  376. probably need to run them through <code>sort</code> first. How do you do that?</p>
  377. <!-- exec -->
  378. <pre><code>$ sort authors_* | uniq -c
  379. 1 Eden Robinson
  380. 1 Gwendolyn L. Waring
  381. 1 James Tiptree, Jr.
  382. 1 John Brunner
  383. 1 John Ronald Reuel Tolkien
  384. 1 Jo Walton
  385. 1 Miriam Toews
  386. 1 Pat Cadigan
  387. 1 Ursula K. Le Guin
  388. 2 Vanessa Veselka
  389. </code></pre>
  390. <!-- end -->
  391. <h2><a name=standard-IO href=#standard-IO>#</a> standard IO</h2>
  392. <p>The <code>|</code> is called a &ldquo;pipe&rdquo;. In the command above, it tells your shell that
  393. instead of printing the output of <code>sort authors_*</code> right to your terminal, it
  394. should send it to <code>uniq -c</code>.</p>
  395. <p style="text-align:center;"> <img src="images/pipe.gif"></p>
  396. <p>Pipes are some of the most important magic in the shell. When the people who
  397. built Unix in the first place give interviews about the stuff they remember
  398. from the early days, a lot of them reminisce about the invention of pipes and
  399. all of the new stuff it immediately made possible.</p>
  400. <p>Pipes help you control a thing called &ldquo;standard IO&rdquo;. In the world of the
  401. command line, programs take <strong>i</strong>nput and produce <strong>o</strong>utput. A pipe is a way
  402. to hook the output from one program to the input of another.</p>
  403. <p>Unlike a lot of the weirdly named things you&rsquo;ll encounter in software, the
  404. metaphor here is obvious and makes pretty good sense. It even kind of looks
  405. like a physical pipe.</p>
  406. <p>What if, instead of sending the output of one program to the input of another,
  407. you&rsquo;d like to store it in a file for later use?</p>
  408. <p>Check it out:</p>
  409. <!-- exec -->
  410. <pre><code>$ sort authors_* | uniq &gt; ./all_authors
  411. </code></pre>
  412. <!-- end -->
  413. <!-- exec -->
  414. <pre><code>$ cat all_authors
  415. Eden Robinson
  416. Gwendolyn L. Waring
  417. James Tiptree, Jr.
  418. John Brunner
  419. John Ronald Reuel Tolkien
  420. Jo Walton
  421. Miriam Toews
  422. Pat Cadigan
  423. Ursula K. Le Guin
  424. Vanessa Veselka
  425. </code></pre>
  426. <!-- end -->
  427. <p>I like to think of the <code>&gt;</code> as looking like a little funnel. It can be
  428. dangerous &mdash; you should always make sure that you&rsquo;re not going to clobber
  429. an existing file you actually want to keep.</p>
  430. <p>If you want to tack more stuff on to the end of an existing file, you can use
  431. <code>&gt;&gt;</code> instead. To test that, let&rsquo;s use <code>echo</code>, which prints out whatever string
  432. you give it on a line by itself:</p>
  433. <!-- exec -->
  434. <pre><code>$ echo 'hello' &gt; hello_world
  435. </code></pre>
  436. <!-- end -->
  437. <!-- exec -->
  438. <pre><code>$ echo 'world' &gt;&gt; hello_world
  439. </code></pre>
  440. <!-- end -->
  441. <!-- exec -->
  442. <pre><code>$ cat hello_world
  443. hello
  444. world
  445. </code></pre>
  446. <!-- end -->
  447. <p>You can also take a file and pull it directly back into the input of a given
  448. program, which is a bit like a funnel going the other direction:</p>
  449. <!-- exec -->
  450. <pre><code>$ nl &lt; all_authors
  451. 1 Eden Robinson
  452. 2 Gwendolyn L. Waring
  453. 3 James Tiptree, Jr.
  454. 4 John Brunner
  455. 5 John Ronald Reuel Tolkien
  456. 6 Jo Walton
  457. 7 Miriam Toews
  458. 8 Pat Cadigan
  459. 9 Ursula K. Le Guin
  460. 10 Vanessa Veselka
  461. </code></pre>
  462. <!-- end -->
  463. <p><code>nl</code> is just a way to <strong>n</strong>umber <strong>l</strong>ines. This command accomplishes pretty much
  464. the same thing as <code>cat all_authors | nl</code>, or <code>nl all_authors</code>. You won&rsquo;t see
  465. it used as often as <code>|</code> and <code>&gt;</code>, since most utilities can read files on their
  466. own, but it can save you typing <code>cat</code> quite as often.</p>
  467. <p>We&rsquo;ll use these features liberally from here on out.</p>
  468. <h2><a name=code-help-code-and-man-pages href=#code-help-code-and-man-pages>#</a> <code>--help</code> and man pages</h2>
  469. <p>You can change the behavior of most tools by giving them different options.
  470. This is all well and good if you already know what options are available,
  471. but what if you don&rsquo;t?</p>
  472. <p>Often, you can ask the tool itself:</p>
  473. <pre><code>$ sort --help
  474. Usage: sort [OPTION]... [FILE]...
  475. or: sort [OPTION]... --files0-from=F
  476. Write sorted concatenation of all FILE(s) to standard output.
  477. Mandatory arguments to long options are mandatory for short options too.
  478. Ordering options:
  479. -b, --ignore-leading-blanks ignore leading blanks
  480. -d, --dictionary-order consider only blanks and alphanumeric characters
  481. -f, --ignore-case fold lower case to upper case characters
  482. -g, --general-numeric-sort compare according to general numerical value
  483. -i, --ignore-nonprinting consider only printable characters
  484. -M, --month-sort compare (unknown) &lt; 'JAN' &lt; ... &lt; 'DEC'
  485. -h, --human-numeric-sort compare human readable numbers (e.g., 2K 1G)
  486. -n, --numeric-sort compare according to string numerical value
  487. -R, --random-sort sort by random hash of keys
  488. --random-source=FILE get random bytes from FILE
  489. -r, --reverse reverse the result of comparisons
  490. </code></pre>
  491. <p>&hellip;and so on. (It goes on for a while in this vein.)</p>
  492. <p>If that doesn&rsquo;t work, or doesn&rsquo;t provide enough info, the next thing to try is
  493. called a man page. (&ldquo;man&rdquo; is short for &ldquo;manual&rdquo;. It&rsquo;s sort of an unfortunate
  494. abbreviation.)</p>
  495. <pre><code>$ man sort
  496. SORT(1) User Commands SORT(1)
  497. NAME
  498. sort - sort lines of text files
  499. SYNOPSIS
  500. sort [OPTION]... [FILE]...
  501. sort [OPTION]... --files0-from=F
  502. DESCRIPTION
  503. Write sorted concatenation of all FILE(s) to standard output.
  504. </code></pre>
  505. <p>&hellip;and so on. Manual pages vary in quality, and it can take a while to get
  506. used to reading them, but they&rsquo;re very often the best place to look for help.</p>
  507. <p>If you&rsquo;re not sure what <em>program</em> you want to use to solve a given problem, you
  508. might try searching all the man pages on the system for a keyword. <code>man</code>
  509. itself has an option to let you do this - <code>man -k keyword</code> - but most systems
  510. also have a shortcut called <code>apropos</code>, which I like to use because it&rsquo;s easy to
  511. remember if you imagine yourself saying &ldquo;apropos of [some problem I have]&hellip;&rdquo;</p>
  512. <!-- exec -->
  513. <pre><code>$ apropos -s1 sort
  514. apt-sortpkgs (1) - Utility to sort package index files
  515. bunzip2 (1) - a block-sorting file compressor, v1.0.6
  516. bzip2 (1) - a block-sorting file compressor, v1.0.6
  517. comm (1) - compare two sorted files line by line
  518. sort (1) - sort lines of text files
  519. tsort (1) - perform topological sort
  520. </code></pre>
  521. <!-- end -->
  522. <p>It&rsquo;s useful to know that the manual represented by <code>man</code> has numbered sections
  523. for different kinds of manual pages. Most of what the average user needs to
  524. know about lives in section 1, &ldquo;User Commands&rdquo;, so you&rsquo;ll often see the names
  525. of different tools written like <code>sort(1)</code> or <code>cat(1)</code>. This can be a good way
  526. to make it clear in writing that you&rsquo;re talking about a specific piece of
  527. software rather than a verb or a small carnivorous mammal. (I specified <code>-s1</code>
  528. for section 1 above just to cut down on clutter, though in practice I usually
  529. don&rsquo;t bother.)</p>
  530. <p>Like other literary traditions, Unix is littered with this sort of convention.
  531. This one just happens to date from a time when the manual was still a physical
  532. book.</p>
  533. <h2><a name=wc href=#wc>#</a> wc</h2>
  534. <p><code>wc</code> stands for <strong>w</strong>ord <strong>c</strong>ount. It does about what you&rsquo;d expect - it
  535. counts the number of words in its input.</p>
  536. <pre><code>$ wc index.md
  537. 736 4117 24944 index.md
  538. </code></pre>
  539. <p>736 is the number of lines, 4117 the number of words, and 24944 the number of
  540. characters in the file I&rsquo;m writing right now. I use this constantly. Most
  541. obviously, it&rsquo;s a good way to get an idea of how much you&rsquo;ve written. <code>wc</code> is
  542. the tool I used to track my progress the last time I tried National Novel
  543. Writing Month:</p>
  544. <pre><code>$ find ~/p1k3/archives/2010/11 -regextype egrep -regex '.*([0-9]+|index)' -type f | xargs wc -w | tail -1
  545. 6585 total
  546. </code></pre>
  547. <!-- exec -->
  548. <pre><code>$ cowsay 'embarrassing.'
  549. _______________
  550. &lt; embarrassing. &gt;
  551. ---------------
  552. \ ^__^
  553. \ (oo)\_______
  554. (__)\ )\/\
  555. ||----w |
  556. || ||
  557. </code></pre>
  558. <!-- end -->
  559. <p>Anyway. The less obvious thing about <code>wc</code> is that you can use it to count the
  560. output of other commands. Want to know <em>how many</em> unique authors we have?</p>
  561. <!-- exec -->
  562. <pre><code>$ sort authors_* | uniq | wc -l
  563. 10
  564. </code></pre>
  565. <!-- end -->
  566. <p>This kind of thing is trivial, but it comes in handy more often than you might
  567. think.</p>
  568. <h2><a name=head-tail-and-cut href=#head-tail-and-cut>#</a> head, tail, and cut</h2>
  569. <p>Remember our old pal <code>cat</code>, which just splats everything it&rsquo;s given back to
  570. standard output?</p>
  571. <p>Sometimes you&rsquo;ve got a piece of output that&rsquo;s more than you actually want to
  572. deal with at once. Maybe you just want to glance at the first few lines in a
  573. file:</p>
  574. <!-- exec -->
  575. <pre><code>$ head -3 colors
  576. RED
  577. blue
  578. red
  579. </code></pre>
  580. <!-- end -->
  581. <p>&hellip;or maybe you want to see the last thing in a list:</p>
  582. <!-- exec -->
  583. <pre><code>$ sort colors | uniq -i | tail -1
  584. red
  585. </code></pre>
  586. <!-- end -->
  587. <p>&hellip;or maybe you&rsquo;re only interested in the first &ldquo;field&rdquo; in some list. You might
  588. use <code>cut</code> here, asking it to treat spaces as delimiters between fields and
  589. return only the first field for each line of its input:</p>
  590. <!-- exec -->
  591. <pre><code>$ cut -d' ' -f1 ./authors_*
  592. Eden
  593. Vanessa
  594. Miriam
  595. Gwendolyn
  596. Ursula
  597. Jo
  598. Pat
  599. John
  600. Vanessa
  601. James
  602. John
  603. </code></pre>
  604. <!-- end -->
  605. <p>Suppose we&rsquo;re curious what the few most commonly occurring first names on our
  606. author list are? Here&rsquo;s an approach, silly but effective, that combines a lot
  607. of what we&rsquo;ve discussed so far and looks like plenty of one-liners I wind up
  608. writing in real life:</p>
  609. <!-- exec -->
  610. <pre><code>$ cut -d' ' -f1 ./authors_* | sort | uniq -ci | sort -n | tail -3
  611. 1 Ursula
  612. 2 John
  613. 2 Vanessa
  614. </code></pre>
  615. <!-- end -->
  616. <p>Let&rsquo;s walk through this one step by step:</p>
  617. <p>First, we have <code>cut</code> extract the first field of each line in our author lists.</p>
  618. <pre><code>cut -d' ' -f1 ./authors_*
  619. </code></pre>
  620. <p>Then we sort these results</p>
  621. <pre><code>| sort
  622. </code></pre>
  623. <p>and pass them to <code>uniq</code>, asking it for a case-insensitive count of each
  624. repeated line</p>
  625. <pre><code>| uniq -ci
  626. </code></pre>
  627. <p>then sort again, numerically,</p>
  628. <pre><code>| sort -n
  629. </code></pre>
  630. <p>and finally, we chop off everything but the last three lines:</p>
  631. <pre><code>| tail -3
  632. </code></pre>
  633. <p>If you wanted to make sure to count an individual author&rsquo;s first name
  634. only once, even if that author appears more than once in the files,
  635. you could instead do:</p>
  636. <!-- exec -->
  637. <pre><code>$ sort -u ./authors_* | cut -d' ' -f1 | uniq -ci | sort -n | tail -3
  638. 1 Ursula
  639. 1 Vanessa
  640. 2 John
  641. </code></pre>
  642. <!-- end -->
  643. <h2><a name=tab-separated-values href=#tab-separated-values>#</a> tab separated values</h2>
  644. <p>Notice above how we had to tell <code>cut</code> that &ldquo;fields&rdquo; in <code>authors_*</code> are
  645. delimited by spaces? It turns out that if you don&rsquo;t use <code>-d</code>, <code>cut</code> defaults
  646. to using tab characters for a delimiter.</p>
  647. <p>Tab characters are sort of weird little animals. You can&rsquo;t usually <em>see</em> them
  648. directly &mdash; they&rsquo;re like a space character that takes up more than one space
  649. when displayed. By convention, one tab is usually rendered as 8 spaces, but
  650. it&rsquo;s up to the software that&rsquo;s displaying the character what it wants to do.</p>
  651. <p>(In fact, it&rsquo;s more complicated than that: Tabs are often rendered as marking
  652. <em>tab stops</em>, which is a concept I remember from 7th grade typing classes, but
  653. haven&rsquo;t actually thought about in my day-to-day life for nearly 20 years.)</p>
  654. <p>Here&rsquo;s a version of our <code>all_authors</code> that&rsquo;s been rearranged so that the first
  655. field is the author&rsquo;s last name, the second is their first name, the third is
  656. their middle name or initial (if we know it) and the fourth is any suffix.
  657. Fields are separated by a single tab character:</p>
  658. <!-- exec -->
  659. <pre><code>$ cat all_authors.tsv
  660. Robinson Eden
  661. Waring Gwendolyn L.
  662. Tiptree James Jr.
  663. Brunner John
  664. Tolkien John Ronald Reuel
  665. Walton Jo
  666. Toews Miriam
  667. Cadigan Pat
  668. Le Guin Ursula K.
  669. Veselka Vanessa
  670. </code></pre>
  671. <!-- end -->
  672. <p>That looks kind of garbled, right? In order to make it a little more obvious
  673. what&rsquo;s happening, let&rsquo;s use <code>cat -T</code>, which displays tab characters as <code>^I</code>:</p>
  674. <!-- exec -->
  675. <pre><code>$ cat -T all_authors.tsv
  676. Robinson^IEden
  677. Waring^IGwendolyn^IL.
  678. Tiptree^IJames^I^IJr.
  679. Brunner^IJohn
  680. Tolkien^IJohn^IRonald Reuel
  681. Walton^IJo
  682. Toews^IMiriam
  683. Cadigan^IPat
  684. Le Guin^IUrsula^IK.
  685. Veselka^IVanessa
  686. </code></pre>
  687. <!-- end -->
  688. <p>It looks odd when displayed because some names are at or nearly at 8 characters long.
  689. &ldquo;Robinson&rdquo;, at 8 characters, overshoots the first tab stop, so &ldquo;Eden&rdquo; gets indented
  690. further than other first names, and so on.</p>
  691. <p>Fortunately, in order to make this more human-readable, we can pass it through
  692. <code>expand</code>, which turns tabs into a given number of spaces (8 by default):</p>
  693. <!-- exec -->
  694. <pre><code>$ expand -t14 all_authors.tsv
  695. Robinson Eden
  696. Waring Gwendolyn L.
  697. Tiptree James Jr.
  698. Brunner John
  699. Tolkien John Ronald Reuel
  700. Walton Jo
  701. Toews Miriam
  702. Cadigan Pat
  703. Le Guin Ursula K.
  704. Veselka Vanessa
  705. </code></pre>
  706. <!-- end -->
  707. <p>Now it&rsquo;s easy to sort by last name:</p>
  708. <!-- exec -->
  709. <pre><code>$ sort -k1 all_authors.tsv | expand -t14
  710. Brunner John
  711. Cadigan Pat
  712. Le Guin Ursula K.
  713. Robinson Eden
  714. Tiptree James Jr.
  715. Toews Miriam
  716. Tolkien John Ronald Reuel
  717. Veselka Vanessa
  718. Walton Jo
  719. Waring Gwendolyn L.
  720. </code></pre>
  721. <!-- end -->
  722. <p>Or just extract middle names and initials:</p>
  723. <!-- exec -->
  724. <pre><code>$ cut -f3 all_authors.tsv
  725. L.
  726. Ronald Reuel
  727. K.
  728. </code></pre>
  729. <!-- end -->
  730. <p>It probably won&rsquo;t surprise you to learn that there&rsquo;s a corresponding <code>paste</code>
  731. command, which takes two or more files and stitches them together with tab
  732. characters. Let&rsquo;s extract a couple of things from our author list and put them
  733. back together in a different order:</p>
  734. <!-- exec -->
  735. <pre><code>$ cut -f1 all_authors.tsv &gt; lastnames
  736. </code></pre>
  737. <!-- end -->
  738. <!-- exec -->
  739. <pre><code>$ cut -f2 all_authors.tsv &gt; firstnames
  740. </code></pre>
  741. <!-- end -->
  742. <!-- exec -->
  743. <pre><code>$ paste firstnames lastnames | sort -k2 | expand -t12
  744. John Brunner
  745. Pat Cadigan
  746. Ursula Le Guin
  747. Eden Robinson
  748. James Tiptree
  749. Miriam Toews
  750. John Tolkien
  751. Vanessa Veselka
  752. Jo Walton
  753. Gwendolyn Waring
  754. </code></pre>
  755. <!-- end -->
  756. <p>As these examples show, TSV is something very like a primitive spreadsheet: A
  757. way to represent information in columns and rows. In fact, it&rsquo;s a close cousin
  758. of CSV, which is often used as a lowest-common-denominator format for
  759. transferring spreadsheets, and which represents data something like this:</p>
  760. <pre><code>last,first,middle,suffix
  761. Tolkien,John,Ronald Reuel,
  762. Tiptree,James,,Jr.
  763. </code></pre>
  764. <p>The advantage of tabs is that they&rsquo;re supported by a bunch of the standard
  765. tools. A disadvantage is that they&rsquo;re kind of ugly and can be weird to deal
  766. with, but they&rsquo;re useful anyway, and character-delimited rows are often a
  767. good-enough way to hack your way through problems that call for basic
  768. structure.</p>
  769. <h2><a name=finding-text-grep href=#finding-text-grep>#</a> finding text: grep</h2>
  770. <p>After all those contortions, what if you actually just want to see <em>which lists</em>
  771. an individual author appears on?</p>
  772. <!-- exec -->
  773. <pre><code>$ grep 'Vanessa' ./authors_*
  774. ./authors_contemporary_fic:Vanessa Veselka
  775. ./authors_sff:Vanessa Veselka
  776. </code></pre>
  777. <!-- end -->
  778. <p><code>grep</code> takes a string to search for and, optionally, a list of files to search
  779. in. If you don&rsquo;t specify files, it&rsquo;ll look through standard input instead:</p>
  780. <!-- exec -->
  781. <pre><code>$ cat ./authors_* | grep 'Vanessa'
  782. Vanessa Veselka
  783. Vanessa Veselka
  784. </code></pre>
  785. <!-- end -->
  786. <p>Most of the time, piping the output of <code>cat</code> to <code>grep</code> is considered silly,
  787. because <code>grep</code> knows how to find things in files on its own. Many thousands of
  788. words have been written on this topic by leading lights of the nerd community.</p>
  789. <p>You&rsquo;ve probably noticed that this result doesn&rsquo;t contain filenames (and thus
  790. isn&rsquo;t very useful to us). That&rsquo;s because all <code>grep</code> saw was the lines in the
  791. files, not the names of the files themselves.</p>
  792. <h2><a name=now-you-have-n-problems href=#now-you-have-n-problems>#</a> now you have n problems</h2>
  793. <p>To close out this introductory chapter, let&rsquo;s spend a little time on a topic
  794. that will likely vex, confound, and (occasionally) delight you for as long as
  795. you are acquainted with the command line.</p>
  796. <p>When I was talking about <code>grep</code> a moment ago, I fudged the details more than a
  797. little by saying that it expects a string to search for. What <code>grep</code>
  798. <em>actually</em> expects is a <em>pattern</em>. Moreover, it expects a specific kind of
  799. pattern, what&rsquo;s known as a <em>regular expression</em>, a cumbersome phrase frequently
  800. shortened to regex.</p>
  801. <p>There&rsquo;s a lot of theory about what makes up a regular expression. Fortunately,
  802. very little of it matters to the short version that will let you get useful
  803. stuff done. The short version is that a regex is like using wildcards in the
  804. shell to match groups of files, but for text in general and with more magic.</p>
  805. <!-- exec -->
  806. <pre><code>$ grep 'Jo.*' ./authors_*
  807. ./authors_sff:Jo Walton
  808. ./authors_sff:John Ronald Reuel Tolkien
  809. ./authors_sff:John Brunner
  810. </code></pre>
  811. <!-- end -->
  812. <p>The pattern <code>Jo.*</code> says that we&rsquo;re looking for lines which contain a literal
  813. <code>Jo</code>, followed by any quantity (including none) of any character. In a regex,
  814. <code>.</code> means &ldquo;anything&rdquo; and <code>*</code> means &ldquo;any amount of the preceding thing&rdquo;.</p>
  815. <p><code>.</code> and <code>*</code> are magical. In the particular dialect of regexen understood
  816. by <code>grep</code>, other magical things include:</p>
  817. <table>
  818. <tr><td><code>^</code> </td> <td>start of a line </td></tr>
  819. <tr><td><code>$</code> </td> <td>end of a line </td></tr>
  820. <tr><td><code>[abc]</code></td> <td>one of a, b, or c </td></tr>
  821. <tr><td><code>[a-z]</code></td> <td>a character in the range a through z</td></tr>
  822. <tr><td><code>[0-9]</code></td> <td>a character in the range 0 through 9</td></tr>
  823. <tr><td><code>+</code> </td> <td>one or more of the preceding thing </td></tr>
  824. <tr><td><code>?</code> </td> <td>0 or 1 of the preceding thing </td></tr>
  825. <tr><td><code>*</code> </td> <td>any number of the preceding thing </td></tr>
  826. <tr><td><code>(foo|bar)</code></td> <td>"foo" or "bar"</td></tr>
  827. <tr><td><code>(foo)?</code></td> <td>optional "foo"</td></tr>
  828. </table>
  829. <p>It&rsquo;s actually a little more complicated than that: By default, if you want to
  830. use a lot of the magical characters, you have to prefix them with <code>\</code>. This is
  831. both ugly and confusing, so unless you&rsquo;re writing a very simple pattern, it&rsquo;s
  832. often easiest to call <code>grep -E</code>, for <strong>E</strong>xtended regular expressions, which
  833. means that lots of characters will have special meanings.</p>
  834. <p>Authors with 4-letter first names:</p>
  835. <!-- exec -->
  836. <pre><code>$ grep -iE '^[a-z]{4} ' ./authors_*
  837. ./authors_contemporary_fic:Eden Robinson
  838. ./authors_sff:John Ronald Reuel Tolkien
  839. ./authors_sff:John Brunner
  840. </code></pre>
  841. <!-- end -->
  842. <p>A count of authors named John:</p>
  843. <!-- exec -->
  844. <pre><code>$ grep -c '^John ' ./all_authors
  845. 2
  846. </code></pre>
  847. <!-- end -->
  848. <p>Lines in this file matching the words &ldquo;magic&rdquo; or &ldquo;magical&rdquo;:</p>
  849. <pre><code>$ grep -iE 'magic(al)?' ./index.md
  850. Pipes are some of the most important magic in the shell. When the people who
  851. shell to match groups of files, but with more magic.
  852. `.` and `*` are magical. In the particular dialect of regexen understood
  853. by `grep`, other magical things include:
  854. use a lot of the magical characters, you have to prefix them with `\`. This is
  855. Lines in this file matching the words "magic" or "magical":
  856. $ grep -iE 'magic(al)?' ./index.md
  857. </code></pre>
  858. <p>Find some &ldquo;-agic&rdquo; words in a big list of words:</p>
  859. <!-- exec -->
  860. <pre><code>$ grep -iE '(m|tr|pel)agic' /usr/share/dict/words
  861. magic
  862. magic's
  863. magical
  864. magically
  865. magician
  866. magician's
  867. magicians
  868. pelagic
  869. tragic
  870. tragically
  871. tragicomedies
  872. tragicomedy
  873. tragicomedy's
  874. </code></pre>
  875. <!-- end -->
  876. <p><code>grep</code> isn&rsquo;t the only - or even the most important - tool that makes use of
  877. regular expressions, but it&rsquo;s a good place to start because it&rsquo;s one of the
  878. fundamental building blocks for so many other operations. Filtering lists of
  879. things, matching patterns within collections, and writing concise descriptions
  880. of how text should be transformed are at the heart of a practical approach to
  881. Unix-like systems. Regexen turn out to be a seductively powerful way to do
  882. these things - so much so that they&rsquo;ve crept their way into text editors,
  883. databases, and full-featured programming languages.</p>
  884. <p>There&rsquo;s a dark side to all of this, for the truth about regular expressions is
  885. that they are ugly, inconsistent, brittle, and <em>incredibly</em> difficult to think
  886. clearly about. They take years to master and reward the wielder with great
  887. power, but they are also a trap: a temptation towards the path of cleverness
  888. masquerading as wisdom.</p>
  889. <p style="text-align:center;"> ✑</p>
  890. <p>I&rsquo;ll be returning to this theme, but for the time being let&rsquo;s move on. Now
  891. that we&rsquo;ve established, however haphazardly, some of the basics, let&rsquo;s consider
  892. their application to a real-world task.</p>
  893. <hr />
  894. <h1><a name=a-literary-problem href=#a-literary-problem>#</a> 2. a literary problem</h1>
  895. <p>The <a href="../literary_environment">previous chapter</a> introduced a bunch of tools
  896. using contrived examples. Now we&rsquo;ll look at a real problem, and work through a
  897. solution by building on tools we&rsquo;ve already covered.</p>
  898. <p>So on to the problem: I write poetry.</p>
  899. <p>{rimshot dot wav}</p>
  900. <p>Most of the poems I have written are not very good, but lately I&rsquo;ve been
  901. thinking that I&rsquo;d like to comb through the last ten years' worth and pull
  902. the least-embarrassing stuff into a single collection.</p>
  903. <p>I&rsquo;ve hinted at how the contents of my blog are stored as files, but let&rsquo;s take
  904. a look at the whole thing:</p>
  905. <pre><code>$ ls -F ~/p1k3/archives/
  906. 1997/ 2003/ 2009/ bones/ meta/
  907. 1998/ 2004/ 2010/ chapbook/ winfield/
  908. 1999/ 2005/ 2011/ cli/ wip/
  909. 2000/ 2006/ 2012/ colophon/
  910. 2001/ 2007/ 2013/ europe/
  911. 2002/ 2008/ 2014/ hack/
  912. </code></pre>
  913. <p>(<code>ls</code>, again, just lists files. <code>-F</code> tells it to append a character that shows
  914. it what type of file we&rsquo;re looking at, such as a trailing / for directories.
  915. <code>~</code> is a shorthand that means &ldquo;my home directory&rdquo;, which in this case is
  916. <code>/home/brennen</code>.)</p>
  917. <p>Each of the directories here holds other directories. The ones for each year
  918. have sub-directories for the months of the year, which in turn contain files
  919. for the days. The files are just little pieces of HTML and Markdown and some
  920. other stuff. Many years ago, before I had much of an idea how to program, I
  921. wrote a script to glue them all together into a web page and serve them up to
  922. visitors. This all sounds complicated, but all it really means is that if I
  923. want to write a blog entry, I just open a file and type some stuff. Here&rsquo;s an
  924. example for March 1st:</p>
  925. <!-- exec -->
  926. <pre><code>$ cat ~/p1k3/archives/2014/3/1
  927. &lt;h1&gt;Saturday, March 1&lt;/h1&gt;
  928. &lt;markdown&gt;
  929. Sometimes I'm going along on a Saturday morning, still a little dazed from the
  930. night before, and I think something like "I should just go write a detailed
  931. analysis of hooded sweatshirts". Mostly these thoughts don't survive contact
  932. with an actual keyboard. It's almost certainly for the best.
  933. &lt;/markdown&gt;
  934. </code></pre>
  935. <!-- end -->
  936. <p>And here&rsquo;s an older one that contains a short poem:</p>
  937. <!-- took this one out of exec block 'cause later i
  938. made a dir out of it... -->
  939. <pre><code>$ cat ~/p1k3/archives/2012/10/9
  940. &lt;h1&gt;tuesday, october 9&lt;/h1&gt;
  941. &lt;freeverse&gt;i am a stateful machine
  942. i exist in a manifold of consequence
  943. a clattering miscellany of impure functions
  944. and side effects&lt;/freeverse&gt;
  945. </code></pre>
  946. <p>Notice that <code>&lt;freeverse&gt;</code> bit? It kind of looks like an HTML tag, but it&rsquo;s
  947. not. What it actually does is tell my blog script that it should format the
  948. text it contains like a poem. The specifics don&rsquo;t matter for our purposes
  949. (yet), but this convention is going to come in handy, because the first thing I
  950. want to do is get a list of all the entries that contain poems.</p>
  951. <p>Remember <code>grep</code>?</p>
  952. <pre><code>$ grep -ri '&lt;freeverse&gt;' ~/p1k3/archives &gt; ~/possible_poems
  953. </code></pre>
  954. <p>Let&rsquo;s step through this bit by bit:</p>
  955. <p>First, I&rsquo;m asking <code>grep</code> to search <strong>r</strong>ecursively, <strong>i</strong>gnoring case.
  956. &ldquo;Recursively&rdquo; just means that every time the program finds a directory, it
  957. should descend into that directory and search in any files there as well.</p>
  958. <pre><code>grep -ri
  959. </code></pre>
  960. <p>Next comes a pattern to search for. It&rsquo;s in single quotes because the
  961. characters <code>&lt;</code> and <code>&gt;</code> have a special meaning to the shell, and here we need
  962. the shell to understand that it should treat them as literal angle brackets
  963. instead.</p>
  964. <pre><code>'&lt;freeverse&gt;'
  965. </code></pre>
  966. <p>This is the path I want to search:</p>
  967. <pre><code>~/p1k3/archives
  968. </code></pre>
  969. <p>Finally, because there are so many entries to search, I know the process will
  970. be slow and produce a large list, so I tell the shell to redirect it to a file
  971. called <code>possible_poems</code> in my home directory:</p>
  972. <pre><code>&gt; ~/possible_poems
  973. </code></pre>
  974. <p>This is quite a few instances&hellip;</p>
  975. <pre><code>$ wc -l ~/possible_poems
  976. 679 /home/brennen/possible_poems
  977. </code></pre>
  978. <p>&hellip;and it&rsquo;s also not super-pretty to look at:</p>
  979. <pre><code>$ head -5 ~/possible_poems
  980. /home/brennen/p1k3/archives/2011/10/14:&lt;freeverse&gt;i've got this friend has a real knack
  981. /home/brennen/p1k3/archives/2011/4/25:&lt;freeverse&gt;i can't claim to strive for it
  982. /home/brennen/p1k3/archives/2011/8/10:&lt;freeverse&gt;one diminishes or becomes greater
  983. /home/brennen/p1k3/archives/2011/8/12:&lt;freeverse&gt;
  984. /home/brennen/p1k3/archives/2011/1/1:&lt;freeverse&gt;six years on
  985. </code></pre>
  986. <p>Still, it&rsquo;s a decent start. I can see paths to the files I have to check, and
  987. usually a first line. Since I use a fancy text editor, I can just go down the
  988. list opening each file in a new window and copying the stuff I&rsquo;m interested in
  989. to a new file.</p>
  990. <p>This is good enough for government work, but what if instead of jumping around
  991. between hundreds of files, I&rsquo;d rather read everything in one file and just weed
  992. out the bad ones as I go?</p>
  993. <pre><code>$ cat `grep -ril '&lt;freeverse&gt;' ~/p1k3/archives` &gt; ~/possible_poems_full
  994. </code></pre>
  995. <p>This probably bears some explaining. <code>grep</code> is still doing all the real work
  996. here. The main difference from before is that <code>-l</code> tells grep to just list any
  997. files it finds which contain a match.</p>
  998. <pre><code>`grep -ril '&lt;freeverse&gt;' ~/p1k3/archives`
  999. </code></pre>
  1000. <p>Notice those backticks around the grep command? This part is a little
  1001. trippier. It turns out that if you put backticks around something in a
  1002. command, it&rsquo;ll get executed and replaced with its result, which in turn gets
  1003. executed as part of the larger command. So what we&rsquo;re really saying is
  1004. something like:</p>
  1005. <pre><code>$ cat [all of the files in the blog directory with &lt;freeverse&gt; in them]
  1006. </code></pre>
  1007. <p>Did you catch that? I just wrote a command that rewrote itself as a
  1008. <em>different</em>, more specific command. And it appears to have worked on the
  1009. first try:</p>
  1010. <pre><code>$ wc ~/possible_poems_full
  1011. 17628 80980 528699 /home/brennen/possible_poems_full
  1012. </code></pre>
  1013. <p>Welcome to wizard school.</p>
  1014. <hr />
  1015. <h1><a name=programmerthink href=#programmerthink>#</a> 3. programmerthink</h1>
  1016. <p>In the <a href="#a-literary-problem">preceding chapter</a>, I worked through accumulating
  1017. a big piece of text from some other, smaller texts. I started with a bunch of
  1018. files and wound up with one big file called <code>potential_poems_full</code>.</p>
  1019. <p>Let&rsquo;s talk for a minute about how programmers approach problems like this one.
  1020. What I&rsquo;ve just done is sort of an old-school humanities take on things:
  1021. Metaphorically speaking, I took a book off the shelf and hauled it down to the
  1022. copy machine to xerox a bunch of pages, and now I&rsquo;m going to start in on them
  1023. with a highlighter and some Post-Its or something. A process like this will
  1024. often trigger a cascade of questions in the programmer-mind:</p>
  1025. <ul>
  1026. <li>What if, halfway through the project, I realize my selection criteria were all
  1027. wrong and have to backtrack?</li>
  1028. <li>What if I discover corrections that also need to be made in the source documents?</li>
  1029. <li>What if I want to access metadata, like the original location of a file?</li>
  1030. <li>What if I want to quickly re-order the poems according to some new criteria?</li>
  1031. <li>Why am I storing the same text in two different places?</li>
  1032. </ul>
  1033. <p>A unifying theme of these questions is that they could all be answered by
  1034. involving a little more abstraction.</p>
  1035. <p style="text-align:center;"> ★</p>
  1036. <p>Some kinds of abstraction are so common in the physical world that we can
  1037. forget they&rsquo;re part of a sophisticated technology. For example, a good deal of
  1038. bicycle maintenance can be accomplished with a cheap multi-tool containing a
  1039. few different sizes of hex wrench and a couple of screwdrivers.</p>
  1040. <p>A hex wrench or screwdriver doesn&rsquo;t really know anything about bicycles. All
  1041. it <em>really</em> knows about is fitting into a space and allowing torque to be
  1042. applied. Standardized fasteners and adjustment mechanisms on a bicycle ensure
  1043. that the work can be done anywhere, by anyone with a certain set of tools.
  1044. Standard tools mean that if you can work on a particular bike, you can work on
  1045. <em>most</em> bikes, and even on things that aren&rsquo;t bikes at all, but were designed by
  1046. people with the same abstractions in mind.</p>
  1047. <p>The relationship between a wrench, a bolt, and the purpose of a bolt is a lot
  1048. like something we call <em>indirection</em> in software. Programs like <code>grep</code> or
  1049. <code>cat</code> don&rsquo;t really know anything about poetry. All they <em>really</em> know about is
  1050. finding lines of text in input, or sticking inputs together. Files, lines, and
  1051. text are like standardized fasteners that allow a user who can work on one kind
  1052. of data (be it poetry, a list of authors, the source code of a program) to use
  1053. the same tools for other problems and other data.</p>
  1054. <p style="text-align:center;"> ★</p>
  1055. <p>When I first started writing stuff on the web, I edited a page &mdash; a single HTML
  1056. file &mdash; by hand. When the entries on my nascent blog got old, I manually
  1057. cut-and-pasted them to archive files with names like <code>old_main97.html</code>, which
  1058. held all of the stuff I&rsquo;d written in 1997.</p>
  1059. <p>I&rsquo;m not holding this up as an example of youthful folly. In fact, it worked
  1060. fine, and just having a single, static file that you can open in any text
  1061. editor has turned out to be a <em>lot</em> more future-proof than the sophisticated
  1062. blogging software people were starting to write at the time.</p>
  1063. <p>And yet. Something about this habit nagged at my developing programmer mind
  1064. after a few years. It was just a little bit too manual and repetitive, a
  1065. little bit silly to have to write things like a table of contents by hand, or
  1066. move entries around by copy-and-pasting them to different files. Since I knew
  1067. the date for each entry, and wanted to make them navigable on that basis, why
  1068. not define a directory structure for the years and months, and then write a
  1069. file to hold each day? That way, all I&rsquo;d have to do is concatenate the files
  1070. in one directory to display any given month:</p>
  1071. <pre><code>$ cat ~/p1k3/archives/2014/1/* | head -10
  1072. &lt;h1&gt;Sunday, January 12&lt;/h1&gt;
  1073. &lt;h2&gt;the one casey is waiting for&lt;/h2&gt;
  1074. &lt;freeverse&gt;
  1075. after a while
  1076. the thing about drinking
  1077. is that it just feeds
  1078. what you drink to kill
  1079. and kills
  1080. </code></pre>
  1081. <p>I ultimately wound up writing a few thousand lines of Perl to do the actual
  1082. work, but the essential idea of the thing is still little more than invoking
  1083. <code>cat</code> on some stuff.</p>
  1084. <p>I didn&rsquo;t know the word for it at the time, but what I was reaching for was a
  1085. kind of indirection. By putting blog posts in a specific directory layout, I
  1086. was creating a simple model of the temporal structure that I considered their
  1087. most important property. Now, if I want to write commands that ask questions
  1088. about my blog posts or re-combine them in certain ways, I can address my
  1089. concerns to this model. Maybe, for example, I want a rough idea how many words
  1090. I&rsquo;ve written in blog posts so far in 2014:</p>
  1091. <pre><code>$ find ~/p1k3/archives/2014/ -type f | xargs cat | wc -w
  1092. 6677
  1093. </code></pre>
  1094. <p><code>xargs</code> is not the most intuitive command, but it&rsquo;s useful and common enough to
  1095. explain here. At the end of last chapter, when I said:</p>
  1096. <pre><code>$ cat `grep -ril '&lt;freeverse&gt;' ~/p1k3/archives` &gt; ~/possible_poems_full
  1097. </code></pre>
  1098. <p>I could also have written this as:</p>
  1099. <pre><code>$ grep -ril '&lt;freeverse&gt;' ~/p1k3/archives | xargs cat &gt; ~/possible_poems_full
  1100. </code></pre>
  1101. <p>What this does is take its input, which starts like:</p>
  1102. <pre><code>/home/brennen/p1k3/archives/2002/10/16
  1103. /home/brennen/p1k3/archives/2002/10/27
  1104. /home/brennen/p1k3/archives/2002/10/10
  1105. </code></pre>
  1106. <p>&hellip;and run <code>cat</code> on all the things in it:</p>
  1107. <pre><code>cat /home/brennen/p1k3/archives/2002/10/16 /home/brennen/p1k3/archives/2002/10/27 /home/brennen/p1k3/archives/2002/10/10 ...
  1108. </code></pre>
  1109. <p>It can be a better idea to use <code>xargs</code>, because while backticks are
  1110. incredibly useful, they have some limitations. If you&rsquo;re dealing with a very
  1111. large list of files, for example, you might exceed the maximum allowed length
  1112. for arguments to a command on your system. <code>xargs</code> is smart enough to know
  1113. that limit and run <code>cat</code> more than once if needed.</p>
  1114. <p><code>xargs</code> is actually sort of a pain to think about, and will make you jump
  1115. through some irritating hoops if you have spaces or other weirdness in your
  1116. filenames, but I wind up using it quite a bit.</p>
  1117. <p>Maybe I want to see a table of contents:</p>
  1118. <!-- exec -->
  1119. <pre><code>$ find ~/p1k3/archives/2014/ -type d | xargs ls -v | head -10
  1120. /home/brennen/p1k3/archives/2014/:
  1121. 1
  1122. 2
  1123. 3
  1124. 4
  1125. 5
  1126. 6
  1127. 7
  1128. 8
  1129. 9
  1130. </code></pre>
  1131. <!-- end -->
  1132. <p>Or find the subtitles I used in 2013:</p>
  1133. <!-- exec -->
  1134. <pre><code>$ find ~/p1k3/archives/2012/ -type f | xargs perl -ne 'print "$1\n" if m{&lt;h2&gt;(.*?)&lt;/h2&gt;}'
  1135. pursuit
  1136. fragment
  1137. this poem again
  1138. i'll do better next time
  1139. timebinding animals
  1140. more observations on gear nerdery &amp;amp; utility fetishism
  1141. thrift
  1142. A miracle, in fact, means work
  1143. &lt;em&gt;technical notes for late october&lt;/em&gt;, or &lt;em&gt;it gets dork out earlier these days&lt;/em&gt;
  1144. radio
  1145. light enough to travel
  1146. 12:06am
  1147. "figures like Heinlein and Gingrich"
  1148. </code></pre>
  1149. <!-- end -->
  1150. <p>The crucial thing about this is that the filesystem <em>itself</em> is just like <code>cat</code>
  1151. and <code>grep</code>: It doesn&rsquo;t know anything about blogs (or poetry), and it&rsquo;s
  1152. basically indifferent to the actual <em>structure</em> of a file like
  1153. <code>~/p1k3/archives/2014/1/12</code>. What the filesystem knows is that there are files
  1154. with certain names in certain places. It need not know anything about the
  1155. <em>meaning</em> of those names in order to be useful; in fact, it&rsquo;s best if it stays
  1156. agnostic about the question, for this enables us to assign our own meaning to a
  1157. structure and manipulate that structure with standard tools.</p>
  1158. <p style="text-align:center;"> ★</p>
  1159. <p>Back to the problem at hand: I have this collection of files, and I know how
  1160. to extract the ones that contain poems. My goal is to see all the poems and
  1161. collect the subset of them that I still find worthwhile. Just knowing how to
  1162. grep and then edit a big file solves my problem, in a basic sort of way. And
  1163. yet: Something about this nags at my mind. I find that, just as I can already
  1164. use standard tools and the filesystem to ask questions about all of my blog
  1165. posts in a given year or month, I would like to be able to ask questions about
  1166. the set of interesting poems.</p>
  1167. <p>If I want the freedom to execute many different sorts of commands against this
  1168. set of poems, it begins to seem that I need a model.</p>
  1169. <p>When programmers talk about models, they often mean something that people in
  1170. the sciences would recognize: We find ways to represent the arrangement of
  1171. facts so that we can think about them. A structured representation of things
  1172. often means that we can <em>change</em> those things, or at least derive new
  1173. understanding of them.</p>
  1174. <p style="text-align:center;"> ★</p>
  1175. <p>At this point in the narrative, I could pretend that my next step is
  1176. immediately obvious, but in fact it&rsquo;s not. I spend a couple of days thinking
  1177. off and on about how to proceed, scribbling notes during bus rides and while
  1178. drinking beers at the pizza joint down the street. I assess and discard ideas
  1179. which fall into a handful of broad approaches:</p>
  1180. <ul>
  1181. <li>Store blog entries in a relational database system which would allow me to
  1182. associate them with data like &ldquo;this entry is in a collection called &lsquo;ok
  1183. poems&rsquo;&rdquo;.</li>
  1184. <li>Selectively build up a file containing the list of files with ok poems, and use
  1185. it to do other tasks.</li>
  1186. <li>Define a format for metadata that lives within entry files.</li>
  1187. <li>Turn each interesting file into a directory of its own which contains a file
  1188. with the original text and another file with metadata.</li>
  1189. </ul>
  1190. <p>I discard the relational database idea immediately: I like working with files,
  1191. and I don&rsquo;t feel like abandoning a model that&rsquo;s served me well for my entire
  1192. adult life.</p>
  1193. <p>Building up an index file to point at the other files I&rsquo;m working with has a
  1194. certain appeal. I&rsquo;m already most of the way there with the <code>grep</code> output in
  1195. <code>potential_poems</code>. It would be easy to write shell commands to add, remove,
  1196. sort, and search entries. Still, it doesn&rsquo;t feel like a very satisfying
  1197. solution unto itself. I&rsquo;d like to know that an entry is part of the collection
  1198. just by looking at the entry, without having to cross-reference it to a list
  1199. somewhere else.</p>
  1200. <p>What about putting some meaningful text in the file itself? I thought about
  1201. a bunch of different ways to do this, some of them really complicated, and
  1202. eventually arrived at this:</p>
  1203. <pre><code>&lt;!-- collection: ok-poems --&gt;
  1204. </code></pre>
  1205. <p>The <code>&lt;!-- --&gt;</code> bits are how you define a comment in HTML, which means that
  1206. neither my blog code nor web browsers nor my text editor have to know anything
  1207. about the format, but I can easily find files with certain values. Check it:</p>
  1208. <pre><code>$ find ~/p1k3/archives -type f | xargs perl -ne 'print "$ARGV[0]: $1 -&gt; $2\n" if m{&lt;!-- ([a-z]+): (.*?) --&gt;};'
  1209. /home/brennen/p1k3/archives/2014/2/9: collection -&gt; ok-poems
  1210. </code></pre>
  1211. <p>That&rsquo;s an ugly one-liner, and I haven&rsquo;t explained half of what it does, but the
  1212. comment format actually seems pretty workable for this. It&rsquo;s a little tacky to
  1213. look at, but it&rsquo;s simple and searchable.</p>
  1214. <p>Before we settle, though, let&rsquo;s turn to the notion of making each entry into a
  1215. directory that can contain some structured metadata in a separate file.
  1216. Imagine something like:</p>
  1217. <pre><code>$ ls ~/p1k3/archives/2013/2/9
  1218. index Meta
  1219. </code></pre>
  1220. <p>Here I use the name &ldquo;index&rdquo; for the main part of the entry because it&rsquo;s a
  1221. convention of web sites for the top-level page in a directory to be called
  1222. something like <code>index.html</code>. As it happens, my blog software already supports
  1223. this kind of file layout for entries which contain multiple parts, image files,
  1224. and so forth.</p>
  1225. <pre><code>$ head ~/p1k3/archives/2013/2/9/index
  1226. &lt;h1&gt;saturday, february 9&lt;/h1&gt;
  1227. &lt;freeverse&gt;
  1228. midwinter midafternoon; depressed as hell
  1229. sitting in a huge cabin in the rich-people mountains
  1230. writing a sprawl, pages, of melancholic midlife bullshit
  1231. outside the snow gives way to broken clouds and the
  1232. clear unyielding light of the high country sun fills
  1233. $ cat ~/p1k3/archives/2013/2/9/Meta
  1234. collection: ok-poems
  1235. </code></pre>
  1236. <p>It would then be easy to <code>find</code> files called <code>Meta</code> and grep them for
  1237. <code>collection: ok-poems</code>.</p>
  1238. <p>What if I put metadata right in the filename itself, and dispense with the grep
  1239. altogether?</p>
  1240. <pre><code>$ ls ~/p1k3/archives/2013/2/9
  1241. index meta-ok-poem
  1242. $ find ~/p1k3/archives -name 'meta-ok-poem'
  1243. /home/brennen/archives/2013/2/9/meta-ok-poem
  1244. </code></pre>
  1245. <p>There&rsquo;s a lot to like about this. For one thing, it&rsquo;s immediately visible in a
  1246. directory listing. For another, it doesn&rsquo;t require searching through thousands
  1247. of lines of text to extract a specific string. If a directory has a
  1248. <code>meta-ok-poem</code> in it, I can be pretty sure that it will contain an interesting
  1249. <code>index</code>.</p>
  1250. <p>What are the downsides? Well, it requires transforming lots of text files into
  1251. directories-containing-files. I might automate that process, but it&rsquo;s still a
  1252. little tedious and it makes the layout of the entry archive more complicated
  1253. overall. There&rsquo;s a cost to doing things this way. It lets me extend my
  1254. existing model of a blog entry to include arbitrary metadata, but it also adds
  1255. steps to writing or finding blog entries.</p>
  1256. <p>Abstractions usually cost you something. Is this one worth the hassle?
  1257. Sometimes the best way to answer that question is to start writing code that
  1258. handles a given abstraction.</p>
  1259. <hr />
  1260. <h1><a name=script href=#script>#</a> 4. script</h1>
  1261. <p>Back in chapter 1, I said that &ldquo;the way you use the computer is often just to write
  1262. little programs that invoke other programs&rdquo;. In fact, we&rsquo;ve already gone over a
  1263. bunch of these. Grepping through the text of a previous chapter should pull
  1264. up some good examples:</p>
  1265. <!-- exec -->
  1266. <pre><code>$ grep -E '\$ [a-z]+.*\| ' ../literary_environment/index.md
  1267. $ sort authors_* | uniq -c
  1268. $ sort authors_* | uniq &gt; ./all_authors
  1269. $ find ~/p1k3/archives/2010/11 -regextype egrep -regex '.*([0-9]+|index)' -type f | xargs wc -w | tail -1
  1270. $ sort authors_* | uniq | wc -l
  1271. $ sort colors | uniq -i | tail -1
  1272. $ cut -d' ' -f1 ./authors_* | sort | uniq -ci | sort -n | tail -3
  1273. $ sort -u ./authors_* | cut -d' ' -f1 | uniq -ci | sort -n | tail -3
  1274. $ sort -k1 all_authors.tsv | expand -t14
  1275. $ paste firstnames lastnames | sort -k2 | expand -t12
  1276. $ cat ./authors_* | grep 'Vanessa'
  1277. </code></pre>
  1278. <!-- end -->
  1279. <p>None of these one-liners do all that much, but they all take input of one sort
  1280. or another and apply one or more transformations to it. They&rsquo;re little formal
  1281. sentences describing how to make one thing into another, which is as good a
  1282. definition of programming as most. Or at least this is a good way to describe
  1283. programming-in-the-small. (A lot of the programs we use day-to-day are more
  1284. like essays, novels, or interminable Fantasy series where every character you
  1285. like dies horribly than they are like individual sentences.)</p>
  1286. <p>One-liners like these are all well and good when you&rsquo;re staring at a terminal,
  1287. trying to figure something out - but what about when you&rsquo;ve already figured it out and
  1288. you want to repeat it in the future?</p>
  1289. <p>It turns out that Bash has you covered. Since shell commands are just text,
  1290. they can live in a text file as easily as they can be typed.</p>
  1291. <h2><a name=learn-you-an-editor href=#learn-you-an-editor>#</a> learn you an editor</h2>
  1292. <p>We&rsquo;ve skirted the topic so far, but now that we&rsquo;re talking about writing out
  1293. text files in earnest, you&rsquo;re going to want a text editor.</p>
  1294. <p>My editor is where I spend most of my time that isn&rsquo;t in a web browser, because
  1295. it&rsquo;s where I write both code and prose. It turns out that the features which
  1296. make a good code editor overlap a lot with the ones that make a good editor of
  1297. English sentences.</p>
  1298. <p>So what should you use? Well, there have been other contenders in recent
  1299. years, but in truth nothing comes close to dethroning the Great Old Ones of
  1300. text editing. Emacs is a creature both primal and sophisticated, like an
  1301. avatar of some interstellar civilization that evolved long before multicellular
  1302. life existed on earth and seeded the galaxy with incomprehensible artefacts and
  1303. colossal engineering projects. Vim is like a lovable chainsaw-studded robot
  1304. with the most elegant keyboard interface in history secretly emblazoned on its
  1305. shining diamond heart.</p>
  1306. <p>It&rsquo;s worth the time it takes to learn one of the serious editors, but there are
  1307. easier places to start. Nano, for example, is easy to pick up, and should be
  1308. available on most systems. To start it, just say:</p>
  1309. <pre><code>$ nano file
  1310. </code></pre>
  1311. <p>You should see something like this:</p>
  1312. <p style="text-align:center;"> <img src="images/nano.png" alt="nano" /></p>
  1313. <p>Arrow keys will move your cursor around, and typing stuff will make it appear
  1314. in the file. This is pretty much like every other editor you&rsquo;ve ever used. If
  1315. you haven&rsquo;t used Nano before, that stuff along the bottom of the terminal is a
  1316. reference to the most commonly used commands. <code>^</code> is a convention for &ldquo;Ctrl&rdquo;,
  1317. so <code>^O</code> means Ctrl-o (the case of the letter doesn&rsquo;t actually matter), which
  1318. will save the file you&rsquo;re working on. Ctrl-x will quit, which is probably the
  1319. first important thing to know about any given editor.</p>
  1320. <h2><a name=d-i-y-utilities href=#d-i-y-utilities>#</a> d.i.y. utilities</h2>
  1321. <p>So back to putting commands in text files. Here&rsquo;s a file I just created in
  1322. my editor:</p>
  1323. <!-- exec -->
  1324. <pre><code>$ cat okpoems
  1325. #!/bin/bash
  1326. # find all the marker files and get the name of
  1327. # the directory containing each
  1328. find ~/p1k3/archives -name 'meta-ok-poem' | xargs -n1 dirname
  1329. exit 0
  1330. </code></pre>
  1331. <!-- end -->
  1332. <p>This is known as a script. There are a handful of things to notice here.
  1333. First, there&rsquo;s this fragment:</p>
  1334. <pre><code>#!/bin/bash
  1335. </code></pre>
  1336. <p>The <code>#!</code> right at the beginning, followed by the path to a program, is a
  1337. special sequence that lets the kernel know what program should be used to
  1338. interpret the contents of the file. <code>/bin/bash</code> is the path on the filesystem
  1339. where Bash itself lives. You might see this referred to as a shebang or a hash
  1340. bang.</p>
  1341. <p>Lines that start with a <code>#</code> are comments, used to describe the code to a human
  1342. reader. The <code>exit 0</code> tells Bash that the currently running script should exit
  1343. with a status of 0, which basically means &ldquo;nothing went wrong&rdquo;.</p>
  1344. <p>If you examine the directory listing for <code>okpoems</code>, you&rsquo;ll see something
  1345. important:</p>
  1346. <!-- exec -->
  1347. <pre><code>$ ls -l okpoems
  1348. -rwxrwxr-x 1 brennen brennen 163 Apr 19 2014 okpoems
  1349. </code></pre>
  1350. <!-- end -->
  1351. <p>That looks pretty cryptic. For the moment, just remember that those little
  1352. <code>x</code>s in the first bit mean that the file has been marked e<strong>x</strong>ecutable. We
  1353. accomplish this by saying something like:</p>
  1354. <pre><code>$ chmod +x ./okpoems
  1355. </code></pre>
  1356. <p>Once that&rsquo;s done, it and the shebang line in combination mean that typing
  1357. <code>./okpoems</code> will have the same effect as typing <code>bash okpoems</code>:</p>
  1358. <!-- exec -->
  1359. <pre><code>$ ./okpoems
  1360. /home/brennen/p1k3/archives/2014/2/24
  1361. /home/brennen/p1k3/archives/2009/4/5
  1362. /home/brennen/p1k3/archives/2009/4/29
  1363. /home/brennen/p1k3/archives/2009/8/20
  1364. /home/brennen/p1k3/archives/2009/8/30
  1365. /home/brennen/p1k3/archives/2009/8/19
  1366. /home/brennen/p1k3/archives/2009/8/21
  1367. /home/brennen/p1k3/archives/2006/6/29
  1368. /home/brennen/p1k3/archives/2013/10/6
  1369. /home/brennen/p1k3/archives/2013/11/18
  1370. /home/brennen/p1k3/archives/2013/7/16
  1371. /home/brennen/p1k3/archives/2013/2/9
  1372. /home/brennen/p1k3/archives/2011/8/12
  1373. /home/brennen/p1k3/archives/2008/12/18
  1374. /home/brennen/p1k3/archives/2010/9/21
  1375. /home/brennen/p1k3/archives/2012/10/9
  1376. /home/brennen/p1k3/archives/2012/2/1
  1377. /home/brennen/p1k3/archives/2012/3/17
  1378. /home/brennen/p1k3/archives/2012/3/26
  1379. </code></pre>
  1380. <!-- end -->
  1381. <h2><a name=heavy-lifting href=#heavy-lifting>#</a> heavy lifting</h2>
  1382. <p><code>okpoems</code> demonstrates the basics, but it doesn&rsquo;t do very much. Here&rsquo;s
  1383. a script with a little more substance to it:</p>
  1384. <!-- exec -->
  1385. <pre><code>$ cat markpoem
  1386. #!/bin/bash
  1387. # $1 is the first parameter to our script
  1388. POEM=$1
  1389. # Complain and exit if we weren't given a path:
  1390. if [ ! $POEM ]; then
  1391. echo 'usage: markpoem &lt;path&gt;'
  1392. # Confusingly, an exit status of 0 means to the shell that everything went
  1393. # fine, while any other number means that something went wrong.
  1394. exit 64
  1395. fi
  1396. if [ ! -e $POEM ]; then
  1397. echo "$POEM not found"
  1398. exit 66
  1399. fi
  1400. echo "marking $POEM an ok poem"
  1401. POEM_BASENAME=$(basename $POEM)
  1402. # If the target is a plain file instead of a directory, make it into
  1403. # a directory and move the content into $POEM/index:
  1404. if [ -f $POEM ]; then
  1405. echo "making $POEM into a directory, moving content to"
  1406. echo " $POEM/index"
  1407. TEMPFILE="/tmp/$POEM_BASENAME.$(date +%s.%N)"
  1408. mv $POEM $TEMPFILE
  1409. mkdir $POEM
  1410. mv $TEMPFILE $POEM/index
  1411. fi
  1412. if [ -d $POEM ]; then
  1413. # touch(1) will either create the file or update its timestamp:
  1414. touch $POEM/meta-ok-poem
  1415. else
  1416. echo "something broke - why isn't $POEM a directory?"
  1417. file $POEM
  1418. fi
  1419. # Signal that all is copacetic:
  1420. echo kthxbai
  1421. exit 0
  1422. </code></pre>
  1423. <!-- end -->
  1424. <p>Both of these scripts are imperfect, but they were quick to write, they&rsquo;re made
  1425. out of standard commands, and I don&rsquo;t yet hate myself for them: All signs that
  1426. I&rsquo;m not totally on the wrong track with the <code>meta-ok-poem</code> abstraction, and
  1427. could live with it as part of an ongoing writing project. <code>okpoems</code> and
  1428. <code>markpoem</code> would also be easy to use with custom keybindings in my editor. In
  1429. a few more lines of code, I can build a system to wade through the list of
  1430. candidate files and quickly mark the interesting ones.</p>
  1431. <h2><a name=generality href=#generality>#</a> generality</h2>
  1432. <p>So what&rsquo;s lacking here? Well, probably a bunch of things, feature-wise. I can
  1433. imagine writing a script to unmark a poem, for example. That said, there&rsquo;s one
  1434. really glaring problem. &ldquo;Ok poem&rdquo; is only one kind of property a blog entry
  1435. might possess. Suppose I wanted a way to express that a poem is terrible?</p>
  1436. <p>It turns out I already know how to add properties to an entry. If I generalize
  1437. just a little, the tools become much more flexible.</p>
  1438. <!-- exec -->
  1439. <pre><code>$ ./addprop /home/brennen/p1k3/archives/2012/3/26 meta-terrible-poem
  1440. marking /home/brennen/p1k3/archives/2012/3/26 with meta-terrible-poem
  1441. kthxbai
  1442. </code></pre>
  1443. <!-- end -->
  1444. <!-- exec -->
  1445. <pre><code>$ ./findprop meta-terrible-poem
  1446. /home/brennen/p1k3/archives/2012/3/26
  1447. </code></pre>
  1448. <!-- end -->
  1449. <p><code>addprop</code> is only a little different from <code>markpoem</code>. It takes two parameters
  1450. instead of one - the target entry and a property to add.</p>
  1451. <!-- exec -->
  1452. <pre><code>$ cat addprop
  1453. #!/bin/bash
  1454. ENTRY=$1
  1455. PROPERTY=$2
  1456. # Complain and exit if we weren't given a path and a property:
  1457. if [[ ! $ENTRY || ! $PROPERTY ]]; then
  1458. echo "usage: addprop &lt;path&gt; &lt;property&gt;"
  1459. exit 64
  1460. fi
  1461. if [ ! -e $ENTRY ]; then
  1462. echo "$ENTRY not found"
  1463. exit 66
  1464. fi
  1465. echo "marking $ENTRY with $PROPERTY"
  1466. # If the target is a plain file instead of a directory, make it into
  1467. # a directory and move the content into $ENTRY/index:
  1468. if [ -f $ENTRY ]; then
  1469. echo "making $ENTRY into a directory, moving content to"
  1470. echo " $ENTRY/index"
  1471. # Get a safe temporary file:
  1472. TEMPFILE=`mktemp`
  1473. mv $ENTRY $TEMPFILE
  1474. mkdir $ENTRY
  1475. mv $TEMPFILE $ENTRY/index
  1476. fi
  1477. if [ -d $ENTRY ]; then
  1478. touch $ENTRY/$PROPERTY
  1479. else
  1480. echo "something broke - why isn't $ENTRY a directory?"
  1481. file $ENTRY
  1482. fi
  1483. echo kthxbai
  1484. exit 0
  1485. </code></pre>
  1486. <!-- end -->
  1487. <p>Meanwhile, <code>findprop</code> is more or less <code>okpoems</code>, but with a parameter for the
  1488. property to find:</p>
  1489. <!-- exec -->
  1490. <pre><code>$ cat findprop
  1491. #!/bin/bash
  1492. if [ ! $1 ]
  1493. then
  1494. echo "usage: findprop &lt;property&gt;"
  1495. exit
  1496. fi
  1497. # find all the marker files and get the name of
  1498. # the directory containing each
  1499. find ~/p1k3/archives -name $1 | xargs -n1 dirname
  1500. exit 0
  1501. </code></pre>
  1502. <!-- end -->
  1503. <p>These scripts aren&rsquo;t much more complicated than their poem-specific
  1504. counterparts, but now they can be used to solve problems I haven&rsquo;t even thought
  1505. of yet, and included in other scripts that need their functionality.</p>
  1506. <hr />
  1507. <h1><a name=general-purpose-programmering href=#general-purpose-programmering>#</a> 5. general purpose programmering</h1>
  1508. <p>I didn&rsquo;t set out to write a book about programming, <em>as such</em>, but because
  1509. programming and the command line are so inextricably linked, this text
  1510. draws near the subject almost of its own accord.</p>
  1511. <p>If you&rsquo;re not terribly interested in programming, this chapter can easily
  1512. enough be skipped. It&rsquo;s more in the way of philosophical rambling than
  1513. concrete instruction, and will be of most use to those with an existing
  1514. background in writing code.</p>
  1515. <p style="text-align:center;"> ✢</p>
  1516. <p>If you&rsquo;ve used computers for more than a few years, you&rsquo;re probably viscerally
  1517. aware that most software is fragile and most systems decay. In the time since
  1518. I took my first tentative steps into the little world of a computer (a friend&rsquo;s
  1519. dad&rsquo;s unidentifiable gaming machine, my own father&rsquo;s blue monochrome Zenith
  1520. laptop, the Apple II) the churn has been overwhelming. By now I&rsquo;ve learned my
  1521. way around vastly more software &mdash; operating systems, programming languages and
  1522. development environments, games, editors, chat clients, mail systems &mdash; than I
  1523. presently could use if I wanted to. Most of it has gone the way of some
  1524. ancient civilization, surviving (if at all) only in faint, half-understood
  1525. cultural echoes and occasional museum-piece displays. Every user of technology
  1526. becomes, in time, a refugee from an irretrievably recent past.</p>
  1527. <p>And yet, despite all this, the shell endures. Most of the ideas in this book
  1528. are older than I am. Most of them could have been applied in 1994 or
  1529. thereabouts, when I first logged on to multiuser systems running AT&amp;T Unix.
  1530. Since the early 1990s, systems built on a fundamental substrate of Unix-like
  1531. behavior and abstractions have proliferated wildly, becoming foundational at
  1532. once to the modern web, the ecosystem of free and open software, and the
  1533. technological dominance ca. 2014 of companies like Apple, Google, and Facebook.</p>
  1534. <p>Why is this, exactly?</p>
  1535. <p style="text-align:center;"> ✣</p>
  1536. <p>As I&rsquo;ve said (and hopefully shown), the commands you write in your shell
  1537. are essentially little programs. Like other programs, they can be stored
  1538. for later use and recombined with other commands, creating new uses for
  1539. your ideas.</p>
  1540. <p>It would be hard to say that there&rsquo;s any <em>one</em> reason command line environments
  1541. remain so vital after decades of evolution and hard-won refinement in computer
  1542. interfaces, but it seems like this combinatory nature is somewhere near the
  1543. heart of it. The command line often lacks the polish of other interfaces we
  1544. depend on, but in exchange it offers a richness and freedom of expression
  1545. rarely seen elsewhere, and invites its users to build upon its basic
  1546. facilities.</p>
  1547. <p>What is it that makes last chapter&rsquo;s <code>addprop</code> preferable to the more specific
  1548. <code>markpoem</code>? Let&rsquo;s look at an alternative implementation of <code>markpoem</code>:</p>
  1549. <!-- exec -->
  1550. <pre><code>$ cat simple_markpoem
  1551. #!/bin/bash
  1552. addprop $1 meta-ok-poem
  1553. </code></pre>
  1554. <!-- end -->
  1555. <p>Is this script trivial? Absolutely. It&rsquo;s so trivial that it barely seems to
  1556. exist, because I already wrote <code>addprop</code> to do all the heavy lifting and play
  1557. well with others, freeing us to imagine new uses for its central idea without
  1558. worrying about the implementation details.</p>
  1559. <p>Unlike <code>markpoem</code>, <code>addprop</code> doesn&rsquo;t know anything about poetry. All it knows
  1560. about, in fact, is putting a file (or three) in a particular place. And this
  1561. is in keeping with a basic insight of Unix: Pieces of software that do one
  1562. very simple thing generalize well. Good command line tools are like a hex
  1563. wrench, a hammer, a utility knife: They embody knowledge of turning, of
  1564. striking, of cutting &mdash; and with this kind of knowledge at hand, the user can
  1565. change the world even though no individual tool is made with complete knowledge
  1566. of the world as a whole. There&rsquo;s a lot of power in the accumulation of small
  1567. competencies.</p>
  1568. <p>Of course, if your code is only good at one thing, to be of any use, it has to
  1569. talk to code that&rsquo;s good at other things. There&rsquo;s another basic insight in the
  1570. Unix tradition: Tools should be composable. All those little programs have to
  1571. share some assumptions, have to speak some kind of trade language, in order to
  1572. combine usefully. Which is how we&rsquo;ve arrived at standard IO, pipelines,
  1573. filesystems, and text as as a lowest-common-denominator medium of exchange. If
  1574. you think about most of these things, they have some very rough edges, but they
  1575. give otherwise simple tools ways to communicate without becoming
  1576. super-complicated along the way.</p>
  1577. <p style="text-align:center;"> ✤</p>
  1578. <p>What is the command line?</p>
  1579. <p>The command line is an environment of tool use.</p>
  1580. <p>So are kitchens, workshops, libraries, and programming languages.</p>
  1581. <p style="text-align:center;"> ✥</p>
  1582. <p>Here&rsquo;s a confession: I don&rsquo;t like writing shell scripts very much, and I
  1583. can&rsquo;t blame anyone else for feeling the same way.</p>
  1584. <p>That doesn&rsquo;t mean you shouldn&rsquo;t <em>know</em> about them, or that you shouldn&rsquo;t
  1585. <em>write</em> them. I write little ones all the time, and the ability to puzzle
  1586. through other people&rsquo;s scripts comes in handy. Oftentimes, the best, most
  1587. tasteful way to automate something is to build a script out of the commonly
  1588. available commands. The standard tools are already there on millions of
  1589. machines. Many of them have been pretty well understood for a generation, and
  1590. most will probably be around for a generation or three to come. They do neat
  1591. stuff. Scripts let you build on ideas you&rsquo;ve already worked out, and give
  1592. repeatable operations a memorable, user-friendly name. They encourage reuse of
  1593. existing programs, and help express your ideas to people who&rsquo;ll come after you.</p>
  1594. <p>One of the reliable markers of powerful software is that it can be scripted: It
  1595. extends to its users some of the same power that its authors used in creating
  1596. it. Scriptable software is to some extent <em>living</em> software. It&rsquo;s a book that
  1597. you, the reader, get to help write.</p>
  1598. <p>In all these ways, shell scripts are wonderful, a little bit magical, and
  1599. quietly indispensable to the machinery of modern civilization.</p>
  1600. <p>Unfortunately, in all the ways that a shell like Bash is weird, finicky, and
  1601. covered in 40 years of incidental cruft, long-form Bash scripts are even worse.
  1602. Bash is a useful glue language, particularly if you&rsquo;re already comfortable
  1603. wiring commands together. Syntactic and conceptual innovations like pipes are
  1604. beautiful and necessary. What Bash is <em>not</em>, despite its power, is a very good
  1605. general purpose programming language. It&rsquo;s just not especially good at things
  1606. like math, or complex data structures, or not looking like a punctuation-heavy
  1607. variety of alphabet soup.</p>
  1608. <p>It turns out that there&rsquo;s a threshold of complexity beyond which life becomes
  1609. easier if you switch from shell scripting to a more robust language. Just
  1610. where this threshold is located varies a lot between users and problems, but I
  1611. often think about switching languages before a script gets bigger than I can
  1612. view on my screen all at once. <code>addprop</code> is a good example:</p>
  1613. <!-- exec -->
  1614. <pre><code>$ wc -l ../script/addprop
  1615. 41 ../script/addprop
  1616. </code></pre>
  1617. <!-- end -->
  1618. <p>41 lines is a touch over what fits on one screen in the editor I usually use.
  1619. If I were going to add much in the way of features, I&rsquo;d think pretty hard about
  1620. porting it to another language first.</p>
  1621. <p>What&rsquo;s cool is that if you know a language like C, Python, Perl, Ruby, PHP, or
  1622. JavaScript, your code can participate in the shell environment as a first class
  1623. citizen simply by respecting the conventions of standard IO, files, and command
  1624. line arguments. Often, in order to create a useful utility, it&rsquo;s only
  1625. necessary to deal with <code>STDIN</code>, or operate on a particular sort of file, and
  1626. most languages offer simple conventions for doing these things.</p>
  1627. <p style="text-align:center;"> *</p>
  1628. <p>I think the shell can be taught and understood as a humane environment, despite
  1629. all of its ugliness and complication, because it offers the materials of its
  1630. own construction to its users, whatever their concerns. The writer, the
  1631. philosopher, the scientist, the programmer: Files and text and pipes know
  1632. little enough about these things, but in their very indifference to the
  1633. specifics of any one complex purpose, they&rsquo;re adaptable to the basic needs of
  1634. many. Simple utilities which enact simple kinds of knowledge survive and
  1635. recombine because there is a wisdom to be found in small things.</p>
  1636. <p>Files and text know nothing about poetry, nothing in particular of the human
  1637. soul. Neither do pen and ink, printing presses or codex books, but somehow we
  1638. got Shakespeare and Montaigne.</p>
  1639. <hr />
  1640. <h1><a name=one-of-these-things-is-not-like-the-others href=#one-of-these-things-is-not-like-the-others>#</a> 6. one of these things is not like the others</h1>
  1641. <p>If you&rsquo;re the sort of person who took a few detours into the history of
  1642. religion in college, you might be familiar with some of the ways people used to
  1643. do textual comparison. When pen, paper, and typesetting were what scholars had
  1644. to work with, they did some fairly sophisticated things in order to expose the
  1645. relationships between multiple pieces of text.</p>
  1646. <p style="text-align:center;"> <img src="images/throckmorton_small.jpg" height=320 width=470></p>
  1647. <p>Here&rsquo;s a book I got in college: <em>Gospel Parallels: A Comparison of the
  1648. Synoptic Gospels</em>, Burton H. Throckmorton, Jr., Ed. It breaks up three books
  1649. from the New Testament by the stories and themes that they contain, and shows
  1650. the overlapping sections of each book that contain parallel texts. You can
  1651. work your way through and see what parts only show up in one book, or in two
  1652. but not the other, or in all three. Pages are arranged like so:</p>
  1653. <pre>
  1654. § JESUS DOES SOME STUFF
  1655. ________________________________________________
  1656. | MAT | MAR | LUK |
  1657. |-----------------+--------------------+---------|
  1658. | Stuff | | |
  1659. | | Stuff | |
  1660. | | Stuff | Stuff |
  1661. | | Stuff | |
  1662. | | Stuff | |
  1663. | | | |
  1664. </pre>
  1665. <p>The way I understand it, a book like this one only scratches the surface of the
  1666. field. Tools like this support a lot of theory about which books copied each
  1667. other and how, and what other sources they might have copied that we&rsquo;ve since
  1668. lost.</p>
  1669. <p>This is some <em>incredibly</em> dry material, even if you kind of dig thinking about
  1670. the questions it addresses. It takes a special temperament to actually sit
  1671. poring over fragmentary texts in ancient languages and do these painstaking
  1672. comparisons. Even if you&rsquo;re a writer or editor and work with a lot of
  1673. revisions of a text, there&rsquo;s a good chance you rarely do this kind of
  1674. comparison on your own work, because that shit is <em>tedious</em>.</p>
  1675. <h2><a name=diff href=#diff>#</a> diff</h2>
  1676. <p>It turns out that academics aren&rsquo;t the only people who need tools for comparing
  1677. different versions of a text. Working programmers, in fact, need to do this
  1678. <em>constantly</em>. Programmers are also happiest when putting off the <em>actual</em> task
  1679. at hand to solve some incidental problem that cropped up along the way, so by
  1680. now there are a lot of ways to say &ldquo;here&rsquo;s how this file is different from this
  1681. file&rdquo;, or &ldquo;here&rsquo;s how this file is different from itself a year ago&rdquo;.</p>
  1682. <p>Let&rsquo;s look at a couple of shell scripts from an earlier chapter:</p>
  1683. <!-- exec -->
  1684. <pre><code>$ cat ../script/okpoems
  1685. #!/bin/bash
  1686. # find all the marker files and get the name of
  1687. # the directory containing each
  1688. find ~/p1k3/archives -name 'meta-ok-poem' | xargs -n1 dirname
  1689. exit 0
  1690. </code></pre>
  1691. <!-- end -->
  1692. <!-- exec -->
  1693. <pre><code>$ cat ../script/findprop
  1694. #!/bin/bash
  1695. if [ ! $1 ]
  1696. then
  1697. echo "usage: findprop &lt;property&gt;"
  1698. exit
  1699. fi
  1700. # find all the marker files and get the name of
  1701. # the directory containing each
  1702. find ~/p1k3/archives -name $1 | xargs -n1 dirname
  1703. exit 0
  1704. </code></pre>
  1705. <!-- end -->
  1706. <p>It&rsquo;s pretty obvious these are similar files, but do we know what <em>exactly</em>
  1707. changed between them at a glance? It wouldn&rsquo;t be hard to figure out, once. If
  1708. you wanted to be really certain about it, you could print them out, set them
  1709. side by side, and go over them with a highlighter.</p>
  1710. <p>Now imagine doing that for a bunch of files, some of them hundreds or thousands
  1711. of lines long. I&rsquo;ve actually done that before, colored markers and all, but I
  1712. didn&rsquo;t feel smart while I was doing it. This is a job for software.</p>
  1713. <!-- exec -->
  1714. <pre><code>$ diff ../script/okpoems ../script/findprop
  1715. 2a3,8
  1716. &gt; if [ ! $1 ]
  1717. &gt; then
  1718. &gt; echo "usage: findprop &lt;property&gt;"
  1719. &gt; exit
  1720. &gt; fi
  1721. &gt;
  1722. 5c11
  1723. &lt; find ~/p1k3/archives -name 'meta-ok-poem' | xargs -n1 dirname
  1724. ---
  1725. &gt; find ~/p1k3/archives -name $1 | xargs -n1 dirname
  1726. </code></pre>
  1727. <!-- end -->
  1728. <p>That&rsquo;s not the most human-friendly output, but it&rsquo;s a little simpler than it
  1729. seems at first glance. It&rsquo;s basically just a way of describing the changes
  1730. needed to turn <code>okpoems</code> into <code>findprop</code>. The string <code>2a3,8</code> can be read as
  1731. &ldquo;at line 2, add lines 3 through 8&rdquo;. Lines with a <code>&gt;</code> in front of them are
  1732. added. <code>5c11</code> can be read as &ldquo;line 5 in the original file becomes line 11 in
  1733. the new file&rdquo;, and the <code>&lt;</code> line is replaced with the <code>&gt;</code> line. If you wanted,
  1734. you could take a copy of the original file and apply these instructions by hand
  1735. in your text editor, and you&rsquo;d wind up with the new file.</p>
  1736. <p>A lot of people (me included) prefer what&rsquo;s known as a &ldquo;unified&rdquo; diff, because
  1737. it&rsquo;s easier to read and offers context for the changed lines. We can ask for
  1738. one of these with <code>diff -u</code>:</p>
  1739. <!-- exec -->
  1740. <pre><code>$ diff -u ../script/okpoems ../script/findprop
  1741. --- ../script/okpoems 2014-04-19 00:08:03.321230818 -0600
  1742. +++ ../script/findprop 2014-04-21 21:51:29.360846449 -0600
  1743. @@ -1,7 +1,13 @@
  1744. #!/bin/bash
  1745. +if [ ! $1 ]
  1746. +then
  1747. + echo "usage: findprop &lt;property&gt;"
  1748. + exit
  1749. +fi
  1750. +
  1751. # find all the marker files and get the name of
  1752. # the directory containing each
  1753. -find ~/p1k3/archives -name 'meta-ok-poem' | xargs -n1 dirname
  1754. +find ~/p1k3/archives -name $1 | xargs -n1 dirname
  1755. exit 0
  1756. </code></pre>
  1757. <!-- end -->
  1758. <p>That&rsquo;s a little longer, and has some metadata we might not always care about,
  1759. but if you look for lines starting with <code>+</code> and <code>-</code>, it&rsquo;s easy to read as
  1760. &ldquo;added these, took away these&rdquo;. This diff tells us at a glance that we added
  1761. some lines to complain if we didn&rsquo;t get a command line argument, and replaced
  1762. <code>'meta-ok-poem'</code> in the <code>find</code> command with that argument. Since it shows us
  1763. some context, we have a pretty good idea where those lines are in the file
  1764. and what they&rsquo;re for.</p>
  1765. <p>What if we don&rsquo;t care exactly <em>how</em> the files differ, but only whether they
  1766. do?</p>
  1767. <!-- exec -->
  1768. <pre><code>$ diff -q ../script/okpoems ../script/findprop
  1769. Files ../script/okpoems and ../script/findprop differ
  1770. </code></pre>
  1771. <!-- end -->
  1772. <p>I use <code>diff</code> a lot in the course of my day job, because I spend a lot of time
  1773. needing to know just how two programs differ. Just as importantly, I often
  1774. need to know how (or whether!) the <em>output</em> of programs differs. As a concrete
  1775. example, I want to make sure that <code>findprop meta-ok-poem</code> is really a suitable
  1776. replacement for <code>okpoems</code>. Since I expect their output to be identical, I can
  1777. do this:</p>
  1778. <!-- exec -->
  1779. <pre><code>$ ../script/okpoems &gt; okpoem_output
  1780. </code></pre>
  1781. <!-- end -->
  1782. <!-- exec -->
  1783. <pre><code>$ ../script/findprop meta-ok-poem &gt; findprop_output
  1784. </code></pre>
  1785. <!-- end -->
  1786. <!-- exec -->
  1787. <pre><code>$ diff -s okpoem_output findprop_output
  1788. Files okpoem_output and findprop_output are identical
  1789. </code></pre>
  1790. <!-- end -->
  1791. <p>The <code>-s</code> just means that <code>diff</code> should explicitly tell us if files are the
  1792. <strong>s</strong>ame. Otherwise, it&rsquo;d output nothing at all, because there aren&rsquo;t any
  1793. differences.</p>
  1794. <p>As with many other tools, <code>diff</code> doesn&rsquo;t very much care whether it&rsquo;s looking at
  1795. shell scripts or a list of filenames or what-have-you. If you read the man
  1796. page, you&rsquo;ll find some features geared towards people writing C-like
  1797. programming languages, but its real specialty is just text files with lines
  1798. made out of characters, which works well for lots of code, but certainly could
  1799. be applied to English prose.</p>
  1800. <p>Since I have a couple of versions ready to hand, let&rsquo;s apply this to a text
  1801. with some well-known variations and a bit of a literary legacy. Here&rsquo;s the
  1802. first day of the Genesis creation narrative in a couple of English
  1803. translations:</p>
  1804. <!-- exec -->
  1805. <pre><code>$ cat genesis_nkj
  1806. In the beginning God created the heavens and the earth. The earth was without
  1807. form, and void; and darkness was on the face of the deep. And the Spirit of
  1808. God was hovering over the face of the waters. Then God said, "Let there be
  1809. light"; and there was light. And God saw the light, that it was good; and God
  1810. divided the light from the darkness. God called the light Day, and the darkness
  1811. He called Night. So the evening and the morning were the first day.
  1812. </code></pre>
  1813. <!-- end -->
  1814. <!-- exec -->
  1815. <pre><code>$ cat genesis_nrsv
  1816. In the beginning when God created the heavens and the earth, the earth was a
  1817. formless void and darkness covered the face of the deep, while a wind from
  1818. God swept over the face of the waters. Then God said, "Let there be light";
  1819. and there was light. And God saw that the light was good; and God separated
  1820. the light from the darkness. God called the light Day, and the darkness he
  1821. called Night. And there was evening and there was morning, the first day.
  1822. </code></pre>
  1823. <!-- end -->
  1824. <p>What happens if we diff them?</p>
  1825. <!-- exec -->
  1826. <pre><code>$ diff -u genesis_nkj genesis_nrsv
  1827. --- genesis_nkj 2014-05-11 16:28:29.692508461 -0600
  1828. +++ genesis_nrsv 2014-05-11 16:28:29.744508459 -0600
  1829. @@ -1,6 +1,6 @@
  1830. -In the beginning God created the heavens and the earth. The earth was without
  1831. -form, and void; and darkness was on the face of the deep. And the Spirit of
  1832. -God was hovering over the face of the waters. Then God said, "Let there be
  1833. -light"; and there was light. And God saw the light, that it was good; and God
  1834. -divided the light from the darkness. God called the light Day, and the darkness
  1835. -He called Night. So the evening and the morning were the first day.
  1836. +In the beginning when God created the heavens and the earth, the earth was a
  1837. +formless void and darkness covered the face of the deep, while a wind from
  1838. +God swept over the face of the waters. Then God said, "Let there be light";
  1839. +and there was light. And God saw that the light was good; and God separated
  1840. +the light from the darkness. God called the light Day, and the darkness he
  1841. +called Night. And there was evening and there was morning, the first day.
  1842. </code></pre>
  1843. <!-- end -->
  1844. <p>Kind of useless, right? If a given line differs by so much as a character,
  1845. it&rsquo;s not the same line. This highlights the limitations of <code>diff</code> for comparing
  1846. things that</p>
  1847. <ul>
  1848. <li>aren&rsquo;t logically grouped by line</li>
  1849. <li>aren&rsquo;t easily thought of as versions of the same text with some lines changed</li>
  1850. </ul>
  1851. <p>We could edit the files into a more logically defined structure, like
  1852. one-line-per-verse, and try again:</p>
  1853. <!-- exec -->
  1854. <pre><code>$ diff -u genesis_nkj_by_verse genesis_nrsv_by_verse
  1855. --- genesis_nkj_by_verse 2014-05-11 16:51:14.312457198 -0600
  1856. +++ genesis_nrsv_by_verse 2014-05-11 16:53:02.484453134 -0600
  1857. @@ -1,5 +1,5 @@
  1858. -In the beginning God created the heavens and the earth.
  1859. -The earth was without form, and void; and darkness was on the face of the deep. And the Spirit of God was hovering over the face of the waters.
  1860. +In the beginning when God created the heavens and the earth,
  1861. +the earth was a formless void and darkness covered the face of the deep, while a wind from God swept over the face of the waters.
  1862. Then God said, "Let there be light"; and there was light.
  1863. -And God saw the light, that it was good; and God divided the light from the darkness.
  1864. -God called the light Day, and the darkness He called Night. So the evening and the morning were the first day.
  1865. +And God saw that the light was good; and God separated the light from the darkness.
  1866. +God called the light Day, and the darkness he called Night. And there was evening and there was morning, the first day.
  1867. </code></pre>
  1868. <!-- end -->
  1869. <p>It might be a little more descriptive, but editing all that text just for a
  1870. quick comparison felt suspiciously like work, and anyway the output still
  1871. doesn&rsquo;t seem very useful.</p>
  1872. <h2><a name=wdiff href=#wdiff>#</a> wdiff</h2>
  1873. <p>For cases like this, I&rsquo;m fond of a tool called <code>wdiff</code>:</p>
  1874. <!-- exec -->
  1875. <pre><code>$ wdiff genesis_nkj genesis_nrsv
  1876. In the beginning {+when+} God created the heavens and the [-earth. The-] {+earth, the+} earth was [-without
  1877. form, and void;-] {+a
  1878. formless void+} and darkness [-was on-] {+covered+} the face of the [-deep. And the Spirit of-] {+deep, while a wind from+}
  1879. God [-was hovering-] {+swept+} over the face of the waters. Then God said, "Let there be light";
  1880. and there was light. And God saw [-the light,-] that [-it-] {+the light+} was good; and God
  1881. [-divided-] {+separated+}
  1882. the light from the darkness. God called the light Day, and the darkness
  1883. [-He-] {+he+}
  1884. called Night. [-So the-] {+And there was+} evening and [-the morning were-] {+there was morning,+} the first day.
  1885. </code></pre>
  1886. <!-- end -->
  1887. <p>Deleted words are surrounded by <code>[- -]</code> and inserted ones by <code>{+ +}</code>. You can
  1888. even ask it to spit out HTML tags for insertion and deletion&hellip;</p>
  1889. <pre><code>$ wdiff -w '&lt;del&gt;' -x '&lt;/del&gt;' -y '&lt;ins&gt;' -z '&lt;/ins&gt;' genesis_nkj genesis_nrsv
  1890. </code></pre>
  1891. <p>&hellip;and come up with something your browser will render like this:</p>
  1892. <blockquote>
  1893. <p>In the beginning <ins>when</ins> God created the heavens and the <del>earth. The</del> <ins>earth, the</ins> earth was <del>without
  1894. form, and void;</del> <ins>a
  1895. formless void</ins> and darkness <del>was on</del> <ins>covered</ins> the face of the <del>deep. And the Spirit of</del> <ins>deep, while a wind from</ins>
  1896. God <del>was hovering</del> <ins>swept</ins> over the face of the waters. Then God said, "Let there be light";
  1897. and there was light. And God saw <del>the light,</del> that <del>it</del> <ins>the light</ins> was good; and God
  1898. <del>divided</del> <ins>separated</ins>
  1899. the light from the darkness. God called the light Day, and the darkness
  1900. <del>He</del> <ins>he</ins>
  1901. called Night. <del>So the</del> <ins>And there was</ins> evening and <del>the morning were</del> <ins>there was morning,</ins> the first day.</p>
  1902. </blockquote>
  1903. <p>Burton H. Throckmorton, Jr. this ain&rsquo;t. Still, it has its uses.</p>
  1904. <hr />
  1905. <h1><a name=the-command-line-as-as-a-shared-world href=#the-command-line-as-as-a-shared-world>#</a> 7. the command line as as a shared world</h1>
  1906. <p>In an earlier chapter, I wrote:</p>
  1907. <blockquote><p>You can think of the shell as a kind of environment you inhabit, in much
  1908. the way your character inhabits an adventure game.</p></blockquote>
  1909. <p>It turns out that sometimes there are other human inhabitants of this
  1910. environment.</p>
  1911. <p>Unix was built on a model known as &ldquo;time-sharing&rdquo;. This is an idea with a lot
  1912. of history, but the very short version is that when computers were rare and
  1913. expensive, it made sense for lots of people to be able to use them at once.
  1914. This is part of the story of how ideas like e-mail and chat were originally
  1915. born, well before networks took over the world: As ways for the many users of
  1916. one computer to communicate on the same machine.</p>
  1917. <p>Times have changed, and while it&rsquo;s mundane to use software that&rsquo;s shared
  1918. between many users, it&rsquo;s not nearly as common as it once was for a bunch of us
  1919. to be logged into the same computer all at once.</p>
  1920. <p style="text-align:center;"> ★</p>
  1921. <p>In the mid 1990s, when I was first exposed to Unix, it was by opening up a
  1922. program called NCSA Telnet on one of the Macs at school and connecting to a
  1923. server called mother.esu1.k12.ne.us.</p>
  1924. <p>NCSA Telnet was a terminal, not unlike the kind that you use to open a shell on
  1925. your own Linux computer, a piece of software that itself emulated actual,
  1926. physical hardware from an earlier era. Hardware terminals were basically very
  1927. simple computers with keyboards, screens, and just enough networking brains to
  1928. talk to a <em>real</em> computer somewhere else. You&rsquo;ll still come across these
  1929. scattered around big institutional environments. The last time I looked over
  1930. the shoulder of an airline checkin desk clerk, for example, I saw green
  1931. monochrome text that was probably coming from an IBM mainframe somewhere
  1932. far away.</p>
  1933. <p>Part of what was exciting about being logged into a computer somewhere else
  1934. was that you could <em>talk to people</em>.</p>
  1935. <p style="text-align:center;"> ★</p>
  1936. <p><em>{This chapter is a work in progress.}</em></p>
  1937. <hr />
  1938. <h1><a name=the-command-line-and-the-web href=#the-command-line-and-the-web>#</a> 8. the command line and the web</h1>
  1939. <p>Web browsers are really complicated these days. They&rsquo;re full of rendering
  1940. engines, audio and video players, programming languages, development tools,
  1941. databases &mdash; you name it, and there&rsquo;s a fair chance it&rsquo;s in there somewhere.
  1942. The modern web browser is kitchen sink software, and to make matters worse, it
  1943. is <em>totally surrounded</em> by technobabble. It can take <em>years</em> to come to terms
  1944. with the ocean of words about web stuff and sort out the meaningful ones from
  1945. the snake oil and bureaucratic mysticism.</p>
  1946. <p>All of which can make the web itself seem like a really complicated landscape,
  1947. and obscure the simplicity of its basic design, which is this:</p>
  1948. <p>Some programs pass text around to one another.</p>
  1949. <p>Which might sound familiar.</p>
  1950. <p>The gist of it is that the web is made out of URLs, &ldquo;Uniform Resource
  1951. Locators&rdquo;, which are paths to things. If you squint, these look kind of like
  1952. paths to files on your filesystem. When you visit a URL in your browser, it
  1953. asks a server for a certain path, and the server gives it back some text. When
  1954. you click a button to submit a form, your browser sends some text to the server
  1955. and waits to see what it says back. The text that gets passed around is
  1956. (usually) written in a language with particular significance to web browsers,
  1957. but if you look at it directly, it&rsquo;s a format that humans can understand.</p>
  1958. <p>Let&rsquo;s illustrate this. I&rsquo;ve written a really simple web page that lives at
  1959. <a href="http://p1k3.com/hello_world.html"><code>http://p1k3.com/hello_world.html</code></a>.</p>
  1960. <pre><code>$ curl 'https://p1k3.com/hello_world.html'
  1961. &lt;html&gt;
  1962. &lt;head&gt;
  1963. &lt;title&gt;hello, world&lt;/title&gt;
  1964. &lt;/head&gt;
  1965. &lt;body&gt;
  1966. &lt;h1&gt;hi everybody&lt;/h1&gt;
  1967. &lt;p&gt;How are things?&lt;/p&gt;
  1968. &lt;/body&gt;
  1969. &lt;/html&gt;
  1970. </code></pre>
  1971. <p><code>curl</code> is a program with lots and lots of features &mdash; it too is a little bit
  1972. of a kitchen sink &mdash; but it has one core purpose, which is to grab things from
  1973. URLs and spit them back out. It&rsquo;s a little bit like <code>cat</code> for things that live
  1974. on the web. Try the above command with just about any URL you can think of,
  1975. and you&rsquo;ll probably get <em>something</em> back. Let&rsquo;s try this book:</p>
  1976. <pre><code>$ curl 'https://p1k3.com/userland-book/' | head
  1977. &lt;!DOCTYPE html&gt;
  1978. &lt;html lang=en&gt;
  1979. &lt;head&gt;
  1980. &lt;meta charset="utf-8"&gt;
  1981. &lt;title&gt;userland: a book about the command line for humans&lt;/title&gt;
  1982. &lt;link rel=stylesheet href="userland.css" /&gt;
  1983. &lt;script src="js/jquery.js" type="text/javascript"&gt;&lt;/script&gt;
  1984. &lt;/head&gt;
  1985. &lt;body&gt;
  1986. </code></pre>
  1987. <p><code>hello_world.html</code> and <code>userland-book</code> are both written in HyperText Markup
  1988. Language. HTML is just text with a specific kind of structure. It&rsquo;s been
  1989. around for quite a while now, and has grown up a lot in 20 years, but at heart
  1990. it still looks a lot <a href="http://info.cern.ch/hypertext/WWW/TheProject.html">like it did in 1991</a>.</p>
  1991. <p>The basic idea is that the contents of a web page are marked up with tags.
  1992. A tag looks like this:</p>
  1993. <pre><code>&lt;title&gt;hi!&lt;/title&gt; -,
  1994. | | |
  1995. | `- content |
  1996. | `- closing tag
  1997. `-opening tag
  1998. </code></pre>
  1999. <p>Sometimes you&rsquo;ll see tags with what are known as &ldquo;attributes&rdquo;:</p>
  2000. <pre><code>&lt;a href="https://p1k3.com/userland-book"&gt;userland&lt;/a&gt;
  2001. </code></pre>
  2002. <p>This is how links are written in HTML. <code>href="..."</code> tells the browser where to
  2003. go when the user clicks on &ldquo;<a href="http://p1k3.com/userland-book">userland</a>&rdquo;.</p>
  2004. <p>Tags are a way to describe not so much what something <em>looks like</em> as what
  2005. something <em>means</em>. Browsers are, in large part, big collections of knowledge
  2006. about the meanings of tags and ways to represent those meanings.</p>
  2007. <p>While the browser you use day-to-day has (probably) a graphical interface and
  2008. does all sorts of things impossible to render in a terminal, some of the
  2009. earliest web browsers were entirely text-based, and text-mode browsers still
  2010. exist. Lynx, which originated at the University of Kansas in the early 1990s,
  2011. is still actively maintained:</p>
  2012. <pre><code>$ lynx -dump 'http://p1k3.com/userland-book/' | head
  2013. userland
  2014. __________________________________________________________________
  2015. [1]# a book about the command line for humans
  2016. Late last year, [2]a side trip into text utilities got me thinking
  2017. about how much my writing habits depend on the Linux command line. This
  2018. struck me as a good hook for talking about the tools I use every day
  2019. with an audience of mixed technical background.
  2020. </code></pre>
  2021. <p>If you invoke Lynx without any options, it&rsquo;ll start up in interactive mode, and
  2022. you can navigate between links with the arrow keys. <code>lynx -dump</code> spits a
  2023. rendered version of a page to standard output, with links annotated in square
  2024. brackets and printed as footnotes. Another useful option here is <code>-listonly</code>,
  2025. which will print just the list of links contained within a page:</p>
  2026. <pre><code>$ lynx -dump -listonly 'http://p1k3.com/userland-book/' | head
  2027. References
  2028. 2. http://p1k3.com/2013/8/4
  2029. 3. http://p1k3.com/userland-book.git
  2030. 4. https://github.com/brennen/userland-book
  2031. 5. http://p1k3.com/userland-book/
  2032. 6. https://twitter.com/brennen
  2033. 9. http://p1k3.com/userland-book/#a-book-about-the-command-line-for-humans
  2034. 10. http://p1k3.com/userland-book/#copying
  2035. </code></pre>
  2036. <p>An alternative to Lynx is w3m, which copes a little more gracefully with the
  2037. complexities of modern web layout.</p>
  2038. <pre><code>$ w3m -dump 'http://p1k3.com/userland-book/' | head
  2039. userland
  2040. ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
  2041. # a book about the command line for humans
  2042. Late last year, a side trip into text utilities got me thinking about how much
  2043. my writing habits depend on the Linux command line. This struck me as a good
  2044. hook for talking about the tools I use every day with an audience of mixed
  2045. technical background.
  2046. </code></pre>
  2047. <p>Neither of these tools can easily replace enormously capable applications like
  2048. Chrome or Firefox, but they have their place in the toolbox, and help to
  2049. demonstrate how the web is built (in part) on principles we&rsquo;ve already seen at
  2050. work.</p>
  2051. <hr />
  2052. <h1><a name=a-miscellany-of-tools-and-techniques href=#a-miscellany-of-tools-and-techniques>#</a> 9. a miscellany of tools and techniques</h1>
  2053. <h2><a name=dict href=#dict>#</a> dict</h2>
  2054. <p>Want to know the definition of a word, or find useful synonyms?</p>
  2055. <pre><code>$ dict concatenate | head -10
  2056. 4 definitions found
  2057. From The Collaborative International Dictionary of English v.0.48 [gcide]:
  2058. Concatenate \Con*cat"e*nate\ (k[o^]n*k[a^]t"[-e]*n[=a]t), v. t.
  2059. [imp. &amp; p. p. {Concatenated}; p. pr. &amp; vb. n.
  2060. {Concatenating}.] [L. concatenatus, p. p. of concatenare to
  2061. concatenate. See {Catenate}.]
  2062. To link together; to unite in a series or chain, as things
  2063. depending on one another.
  2064. </code></pre>
  2065. <h2><a name=aspell href=#aspell>#</a> aspell</h2>
  2066. <p>Need to interactively spell-check your presentation notes?</p>
  2067. <pre><code>$ aspell check presentation
  2068. </code></pre>
  2069. <p>Just want a list of potentially-misspelled words in a given file?</p>
  2070. <!-- exec -->
  2071. <pre><code>$ aspell list &lt; ../literary_environment/index.md | sort | uniq -ci | sort -nr | head -5
  2072. 40 td
  2073. 24 Veselka
  2074. 17 Reuel
  2075. 16 Brunner
  2076. 15 Tiptree
  2077. </code></pre>
  2078. <!-- end -->
  2079. <h2><a name=mostcommon href=#mostcommon>#</a> mostcommon</h2>
  2080. <p>Something like that last sequence sure does seem to show up a lot in my work:
  2081. Spit out the <em>n</em> most common lines in the input, one way or another. Here&rsquo;s
  2082. a little script to be less repetitive about it.</p>
  2083. <!-- exec -->
  2084. <pre><code>$ aspell list &lt; ../literary_environment/index.md | ./mostcommon -i -n5
  2085. 40 td
  2086. 24 Veselka
  2087. 17 Reuel
  2088. 16 Brunner
  2089. 15 Tiptree
  2090. </code></pre>
  2091. <!-- end -->
  2092. <p>This turns out to be pretty simple:</p>
  2093. <!-- exec -->
  2094. <pre><code>$ cat ./mostcommon
  2095. #!/usr/bin/env bash
  2096. # Optionally specify number of lines to show, defaulting to 10:
  2097. TOSHOW=10
  2098. CASEOPT=""
  2099. while getopts ":in:" opt; do
  2100. case $opt in
  2101. i)
  2102. CASEOPT="-i"
  2103. ;;
  2104. n)
  2105. TOSHOW=$OPTARG
  2106. ;;
  2107. \?)
  2108. echo "Invalid option: -$OPTARG" &gt;&amp;2
  2109. exit 1
  2110. ;;
  2111. :)
  2112. echo "Option -$OPTARG requires an argument." &gt;&amp;2
  2113. exit 1
  2114. ;;
  2115. esac
  2116. done
  2117. # sort and then uniqify STDIN,
  2118. # sort numerically on the first field,
  2119. # chop off everything but $TOSHOW lines of input
  2120. sort &lt; /dev/stdin | uniq -c $CASEOPT | sort -k1 -nr | head -$TOSHOW
  2121. </code></pre>
  2122. <!-- end -->
  2123. <p>Notice, though, that it doesn&rsquo;t handle opening files directly. If you wanted
  2124. to find the most common lines in a file with it, you&rsquo;d have to say something
  2125. like <code>mostcommon &lt; filename</code> in order to redirect the file to <code>mostcommon</code>&rsquo;s
  2126. input.</p>
  2127. <p>Also notice that most of the script is boilerplate for handling a couple of
  2128. options. The work is all done in a oneliner. Worth it? Maybe not, but an
  2129. interesting exercise.</p>
  2130. <h2><a name=cal-and-ncal href=#cal-and-ncal>#</a> cal and ncal</h2>
  2131. <p>Want to know what the calendar looks like for this month?</p>
  2132. <pre><code>$ cal
  2133. April 2014
  2134. Su Mo Tu We Th Fr Sa
  2135. 1 2 3 4 5
  2136. 6 7 8 9 10 11 12
  2137. 13 14 15 16 17 18 19
  2138. 20 21 22 23 24 25 26
  2139. 27 28 29 30
  2140. </code></pre>
  2141. <p>How about for September, 1950, in a more compact format?</p>
  2142. <!-- exec -->
  2143. <pre><code>$ ncal -m9 1950
  2144. September 1950
  2145. Su 3 10 17 24
  2146. Mo 4 11 18 25
  2147. Tu 5 12 19 26
  2148. We 6 13 20 27
  2149. Th 7 14 21 28
  2150. Fr 1 8 15 22 29
  2151. Sa 2 9 16 23 30
  2152. </code></pre>
  2153. <!-- end -->
  2154. <p>Need to know the date of Easter this year?</p>
  2155. <!-- exec -->
  2156. <pre><code>$ ncal -e
  2157. April 5 2015
  2158. </code></pre>
  2159. <!-- end -->
  2160. <h2><a name=seq href=#seq>#</a> seq</h2>
  2161. <p>Need the numbers 1-5?</p>
  2162. <!-- exec -->
  2163. <pre><code>$ seq 1 5
  2164. 1
  2165. 2
  2166. 3
  2167. 4
  2168. 5
  2169. </code></pre>
  2170. <!-- end -->
  2171. <h2><a name=shuf href=#shuf>#</a> shuf</h2>
  2172. <p>Want to shuffle some lines?</p>
  2173. <!-- exec -->
  2174. <pre><code>$ seq 1 5 | shuf
  2175. 4
  2176. 5
  2177. 1
  2178. 3
  2179. 2
  2180. </code></pre>
  2181. <!-- end -->
  2182. <h2><a name=ptx href=#ptx>#</a> ptx</h2>
  2183. <p>Want to make a <a href="http://en.wikipedia.org/wiki/Key_Word_in_Context">permuted index</a> of some phrase?</p>
  2184. <!-- exec -->
  2185. <pre><code>$ echo 'i like american music' | ptx
  2186. i like american music
  2187. i like american music
  2188. i like american music
  2189. i like american music
  2190. </code></pre>
  2191. <!-- end -->
  2192. <h2><a name=figlet href=#figlet>#</a> figlet</h2>
  2193. <p>Need to make ASCII art of some giant letters?</p>
  2194. <!-- exec -->
  2195. <pre><code>$ figlet "R T F M"
  2196. ____ _____ _____ __ __
  2197. | _ \ |_ _| | ___| | \/ |
  2198. | |_) | | | | |_ | |\/| |
  2199. | _ &lt; | | | _| | | | |
  2200. |_| \_\ |_| |_| |_| |_|
  2201. </code></pre>
  2202. <!-- end -->
  2203. <h2><a name=cowsay href=#cowsay>#</a> cowsay</h2>
  2204. <p>How about ASCII art of a <del>cow</del> dragon saying something?</p>
  2205. <!-- exec -->
  2206. <pre><code>$ cowsay -f dragon "RTFM, man"
  2207. ___________
  2208. &lt; RTFM, man &gt;
  2209. -----------
  2210. \ / \ //\
  2211. \ |\___/| / \// \\
  2212. /0 0 \__ / // | \ \
  2213. / / \/_/ // | \ \
  2214. @_^_@'/ \/_ // | \ \
  2215. //_^_/ \/_ // | \ \
  2216. ( //) | \/// | \ \
  2217. ( / /) _|_ / ) // | \ _\
  2218. ( // /) '/,_ _ _/ ( ; -. | _ _\.-~ .-~~~^-.
  2219. (( / / )) ,-{ _ `-.|.-~-. .~ `.
  2220. (( // / )) '/\ / ~-. _ .-~ .-~^-. \
  2221. (( /// )) `. { } / \ \
  2222. (( / )) .----~-.\ \-' .~ \ `. \^-.
  2223. ///.----..&gt; \ _ -~ `. ^-` ^-_
  2224. ///-._ _ _ _ _ _ _}^ - - - - ~ ~-- ,.-~
  2225. /.-~
  2226. </code></pre>
  2227. <!-- end -->
  2228. <hr />
  2229. <h1><a name=endmatter href=#endmatter>#</a> endmatter</h1>
  2230. <h2><a name=further-reading href=#further-reading>#</a> further reading</h2>
  2231. <ul>
  2232. <li><em>The Unix Programming Environment</em> - Brian W. Kernighan, Rob Pike</li>
  2233. <li><a href="https://www.youtube.com/watch?v=tc4ROCJYbm0">AT&amp;T Archives: The UNIX Operating System</a> (YouTube)</li>
  2234. <li><a href="https://medium.com/message/tilde-club-i-had-a-couple-drinks-and-woke-up-with-1-000-nerds-a8904f0a2ebf">I had a couple drinks and woke up with 1,000 nerds</a> - Paul Ford</li>
  2235. </ul>
  2236. <h2><a name=code href=#code>#</a> code</h2>
  2237. <p><a href="https://p1k3.com/userland-book.git">p1k3.com/userland-book.git</a>
  2238. should be considered the canonical git repository, but I&rsquo;m pushing everything
  2239. to a <a href="https://github.com/brennen/userland-book">GitHub mirror</a>, and
  2240. welcome feedback there.</p>
  2241. <h2><a name=copying href=#copying>#</a> copying</h2>
  2242. <p>This work is licensed under a
  2243. <a rel="license" href="https://creativecommons.org/licenses/by-sa/4.0/">Creative
  2244. Commons Attribution-ShareAlike 4.0 International License</a>.</p>
  2245. <p><a rel="license" href="https://creativecommons.org/licenses/by-sa/4.0/">
  2246. <img alt="Creative Commons License" src="images/by-sa-4.png" />
  2247. </a></p>
  2248. <hr />
  2249. <script>
  2250. $(document).ready(function () {
  2251. // ☜ ☝ ☞ ☟ ☆ ✠ ✡ ✢ ✣ ✤ ✥ ✦ ✧ ✩ ✪
  2252. var closed_sigil = 'show';
  2253. var open_sigil = 'hide';
  2254. var togglesigil = function (elem) {
  2255. var sigil = $(elem).html();
  2256. if (sigil === closed_sigil) {
  2257. $(elem).html(open_sigil);
  2258. } else {
  2259. $(elem).html(closed_sigil);
  2260. }
  2261. };
  2262. $(".details").each(function () {
  2263. var $this = $(this);
  2264. var $button = $('<button class=clicker-button>' + closed_sigil + '</button>');
  2265. var $details_full = $(this).find('.full');
  2266. $button.click(function (e) {
  2267. e.preventDefault();
  2268. $details_full.toggle({
  2269. duration: 550
  2270. });
  2271. togglesigil(this);
  2272. });
  2273. $(this).find('.clicker').append($button);
  2274. $button.show();
  2275. });
  2276. $('.details .full').hide();
  2277. });
  2278. </script>
  2279. </body>
  2280. </html>