use regex::{Regex, Replacer}; use std::io::{self, Read}; use structopt::StructOpt; use failure::ResultExt; use exitfailure::ExitFailure; #[derive(StructOpt)] #[structopt(name = "bpb-words", about = "Split files into individual words.")] struct Cli { /// (f)ile to read, or - for stdin #[structopt(default_value = "-")] path: String, /// (d)elimiter to split on - a regular expression #[structopt(short, long, default_value = r"\s+")] delimiter: String, /// Strip non-alphanumeric characters #[structopt(short, long)] alpha_only: bool, /// Coerce all words to lower-case #[structopt(short, long)] lower_case: bool, /// (s)hortest word to pass through #[structopt(short, long)] shortest: Option, /// (b)iggest word to pass through #[structopt(short, long)] biggest: Option, } // TODO: // - [X] command-line help // - [X] file input // - [X] support specifying split delimiter pattern // - [X] support stripping non-alphanumeric chars // - [X] support coercing to lowercase // - [ ] support max and min length of words to pass through fn main() -> Result<(), ExitFailure> { let args = Cli::from_args(); let mut content = String::new(); if args.path == "-" { let stdin = io::stdin(); let mut handle = stdin.lock(); handle.read_to_string(&mut content)?; } else { // https://rust-lang-nursery.github.io/cli-wg/tutorial/errors.html content = std::fs::read_to_string(&args.path) .with_context(|_| format!("could not read file `{}`", &args.path))?; } let delimiter = Regex::new(&args.delimiter)?; // bpb_words::split_words(&delimiter, &content, &mut words)?; let split = delimiter.split(&content); for word in split { // Handle stripping non a-z (ish) characters: let mut replaced = String::new(); let word = if args.alpha_only { replaced = bpb_words::replace_nonalpha(&word, ""); replaced.as_str() } else { word }; // Handle lowercasing: let mut lc = String::new(); let word = if args.lower_case { lc = word.to_lowercase(); lc.as_str() } else { word }; let count = word.chars().count(); let pass_max_len = match args.biggest { // Some(max_len) => count <= max_len, Some(max_len) => count as u64 <= max_len, None => true, }; let pass_min_len = match args.shortest { // Some(max_len) => count <= max_len, Some(min_len) => count as u64 >= min_len, None => true, }; if (pass_min_len && pass_max_len) { println!("{}", word); } } Ok(()) }