#!/usr/bin/python # (C) Kim Holburn 2016 # released under GNU Public License http://www.gnu.org/copyleft/gpl.html # script to generate pseudo-random phrases # version = "1.6 2016-07-11" # version = "1.5 2016-07-09"; # version 1.4 2016-07-05 # version 1.3 2016-06-29 # version 1.2 2016-06-25 import argparse import string import random import re from os import urandom import os.path from itertools import chain import sys #from sys import version_info #from sys import stderr #from sys import stdout assert sys.version_info >= (2,7) mypath = __file__ myfile = os.path.basename(__file__) mydir = os.path.dirname(__file__) (myfilex,myext) = os.path.splitext(myfile) (mypathex,myext) = os.path.splitext(mypath) myversion = "1.6 2016-07-11" def main(): parser = argparse.ArgumentParser( description="generate random words" """ version {} Examples: The way I normally use this is: %(prog)s (generate some random words) %(prog)s -f (frequency weighted random words) Other possibilities: %(prog)s -w 3 -t 5 %(prog)s -F ws """.format(myversion), epilog=""" Default mode is dictionary mode defaults for dictionary mode is: %(prog)s -d -F "/usr/share/dict/words" -w 5 -m 2 -x 7 -t 5 -s " " defaults for frequency mode is: %(prog)s -f -F "/randlib/all.al" -w 5 -m 2 -x 7 -t 5 -s " " -P 1 defaults for wordy mode is: %(prog)s -r -w 5 -m 2 -x 7 -t 5 -s " " There are no default files for make mode. Make mode is used like this: %(prog)s -M -F [ etc.] --bad %(prog)s -M -F [ etc.] -O %(prog)s -M -F [ etc.] > output.words List mode just lists dictionary files in places this program looks """, prefix_chars='-', prog='randword', usage='%(prog)s [options]', formatter_class=argparse.RawTextHelpFormatter, conflict_handler='resolve') parser.add_argument("-h", "--help", action="store_true", dest="help", default=False, help="show this help screen and exit") parser.add_argument("-V", "--version", action="store_true", dest="version", default=False, help="show the version and exit") parser.add_argument("-v", "--verbose", action="count", dest="verbose", default=0, help="print more status messages to stdout") parser.add_argument("-t", "--times", action="store", dest="times", type=int, default=1, help="number of times to run (default=5)") parser.add_argument("-N", "--neol", action="store_false", dest="eol", default=True, help="Send no end of line at end (default=eol)") #parser.add_argument("-p", "--presets", # action="store", dest="preset", type=int, default=0, choices=xrange(1, 4), # help=("presets (default=1): \n" # "1 (-w 5 -m 2 -x 7 -t 5 -s ' ' -S '')\n" # "2 (-f )\n" # "3 (-r )") # ) parser.add_argument("-m", "--min", action="store", dest="wmin", type=int, default=0, help="minimum word length (default=2)") parser.add_argument("-x", "--max", action="store", dest="wmax", type=int, default=0, help="maximum word length (default=7)") parser.add_argument("-a", "--any-length", action="store_true", dest="anylen", default=False, help="Words of any length (default=False).\n" "In wordy mode is equal to: max=24,min=1") parser.add_argument("-w", "--words", action="store", dest="nwords", type=int, default=0, help="number of words (default=4)") parser.add_argument("-s", "--spaces", action="store", dest="spaces", type=str, default='', help="space characters (default=' ')\n" "more than one space character will be used randomly\n" "between each word" ) parser.add_argument("-S", "--no-space", action="store", dest="nspace", type=str, help="no space - one of the space characters can represent no space" r""" It's hard to explain this option. add a letter like say 3 to the space list then add -S "3" and whenever the 3 gets chosen as the space, the program adds no space. In part this option exists because some argument parsers and OSes make it hard to enter an empty string for spaces. Not mentioning any names argpaser! """ ) parser.add_argument("-A", "--alternate", action="store_true", dest="alternate", default=False, help="Alternate way of choosing words (default=false)\n" "Normal way of choosing words is to choose a word length\n" "from the range of word lengths with equal weight,\n" "then choose a word of that length.\n" "that way word lengths have equal weight\n" "Alternate method is to create a list of only words\n" "of allowed length and then choose from that list.\n" "This gives less short words. Doesn't apply to wordy mode" ) parser.add_argument("-C", "--capitalise", action="store_true", dest="caps", default=False, help="Capitalise words (default=False)") parser.add_argument("-F", "--file", action="append", dest="dictfile", nargs='+', help='Dictionary files \n' """ Will look in directories "randlib/" or %(prog)ss/" Will try for files with a ".words" extension. Dictionary files need to have one word per line. Frequency files need to have one word and one number per line. They can have more fields separated by space, tab or commas but the word (and number) must be the first or second field. """.format(mypath) ) dictg = parser.add_argument_group('dictg', description="Dictionary mode: \n" "" "The following options imply dictionary mode. ") dictg.add_argument("-d", "--dictionary-mode", action="store_true", dest="dict", default=False, help="Dictionary mode [default]") dictg.add_argument("-u", "--unique", action="store_true", dest="unique", default=False, help="remove duplicate words (default=False)") freqg = parser.add_argument_group('freq', description="Frequency mode: " """ use frequency weighted word list (more frequently used words are more likely to be chosen) Requires a frequency file. Default frequency file is: "{}/randlib/all.al". Frequency files should have at least two fields per line, a word and a frequency number, in either order but the order must be the same throughout the file. The fields can be separated by space, tab or comma. There can be more than two fields but only the first two fields are used. The following options imply frequency mode: """.format(mydir)) freqg.add_argument("-f", "--frequency", action="store_true", dest="freq", default=False, help="frequency mode") freqg.add_argument("-P", "--popularity", action="store", dest="pop", default=0, type=int, help="Popularity: only use words of frequency n or greater (default=1)") wordyg = parser.add_argument_group('wordy', description="Wordy mode: " "make word-like random output.\n" "The following options imply wordy mode. ") wordyg.add_argument("-r", "--wordy", action="store_true", dest="wordy", default=False, help="wordy mode") wordyg.add_argument("-i", "--inword", action="store_true", dest="inword", default=False, help="Add inword punctuation ' or - (default=False)") makeg = parser.add_argument_group('make', description="Makedict mode: " "make a word dictionary with frequency\n" "from files of text (there is no default file)\n" "example: %(prog)s -M -F []\n") makeg.add_argument("-M", "--make", action="store_true", dest="make", default=False, help="Make dictionary") makeg.add_argument("--bad", action="store_false", dest="good", default=True, help="output rejected words (diagnostic)") makeg.add_argument("-O", "--output-dict", action="store", dest="outputdict", help="dictionary output file (otherwise stdout is used)") listy = parser.add_argument_group('listy', description="List mode: " "show all the word dictionaries I can see\n" "or files in the directories I search\n") listy.add_argument("-L", "--list", action="store_true", dest="listy", default=False, help="List dictionaries") listy.add_argument("-l","--full-list", action="store_true", dest="full", default=False, help="list full paths") # args = parser.parse_args() if args.help: parser.print_help() print exit() if args.version: print "{} version {}".format(myfile, myversion) exit() times = 5 eol = args.eol eolc = '\n' if not eol: eolc = '' verbose = args.verbose preset = 1 presetl = ['lllldddYYYsss', 'lllldddYY', 'lllld', 'H'] presetc = [12, 12, 8, 20] presetn = [1, 1, 1, 1] output = '' wmax = 9 wmin = 2 nwords = 5 spaces = ' ' nspace = '' anylen = False caps = False dict = True dictfile = '/usr/share/dict/words' # dictfile1 = mypath + 's/words' # dictfile2 = mydir + '/randlib/words' dictfile1 = 'words' dictfiles = [] unique = args.unique alternate = False mymode = '' freq = False freqfile = 'all.al' # freqfile = mypath + 's/all.al' # freqfile1 = mydir + '/randlib/all.al' pop = 1 def findit(farg): paths = ['', mydir + '/randlib/', mypathex + 's/'] if 1 < verbose: print "paths = ({})".format(paths) if mypathex != mypath: paths.append(mypath + 's/') exts = ["", '.words'] for pp in paths: for ee in exts: ff = pp + farg + ee # try the file as a full path by itself # /dir-randword/randword or /dir/randword.py # try for file in /dir/randlib/ # try for file in /dir/randwords/ # try in /dir/randwords.pys/ # try also with .words ext if 1 < verbose: print "filef = ({})".format(ff) if os.path.exists(ff): return ff return "" inword = False full = args.full good = args.good makey = args.make outputdict = '' if args.outputdict: if os.path.isfile(args.outputdict): print "Error: output file ({}) exists".format(args.outputdict) exit (1) outputdict = args.outputdict if 1 < verbose: print type(args) print "options:" for key, value in vars(args).items(): print " {}:({})".format(key, value) caps = args.caps if 1 < args.times: times = args.times if 0 < args.wmin: wmin = args.wmin if 0 < args.wmax: if args.wmax < args.wmin: print "Error: max must be larger than min" exit(1) wmax = args.wmax if args.nwords: nwords = args.nwords if args.spaces: if re.search(r"[^\x20-\x7E]", args.spaces): # stupid python doesn't have [:print:] or even \p{Print} character classes print "Error: spaces must be printable characters" exit(1) spaces = args.spaces if args.nspace: if len(args.nspace) != 1: print "Error: nspace must be a single character" exit(1) if re.search(r"[^\x20-\x7E]", args.nspace): # stupid python doesn't have [:print:] or even \p{Print} character classes print "Error: nspace must be a printable character" exit(1) if not spaces: # stupid python doesn't have [:print:] or even \p{Print} character classes print "Error: nspace only useful if spaces" exit(1) if args.nspace not in spaces: print >> sys.stderr, "Warning: nspace is not in spaces" nspace = args.nspace if args.anylen: anylen = args.anylen if args.alternate: alternate = args.alternate if args.inword: inword = True # decide which mode dict_opts = args.dict or args.unique freq_opts = args.freq or args.pop wordy_opts = args.wordy or args.inword makey_opts = makey or not args.good or args.outputdict listy_opts = args.listy or args.full opty = 0 # if dict_opts: opty += 1 # if freq_opts: opty += 1 # if wordy_opts: opty += 1 # if makey_opts: opty += 1 # if listy_opts: opty += 1 for o in [dict_opts,freq_opts,wordy_opts,makey_opts,listy_opts]: if o: opty += 1 # if ((dict_opts and (freq_opts or wordy_opts or makey_opts or listy_opts)) or # (freq_opts and (wordy_opts or makey_opts or listy_opts)) or # (wordy_opts and (makey_opts or listy_opts)) or # (makey_opts and listy_opts)): if 1 < opty: print "Error: only use one mode: dictionary, frequency, wordy, make or list" exit(1) if dict_opts: dict = True mymode = 'd' if freq_opts: dict = False freq = True mymode = 'f' if wordy_opts: dict = False mymode = 'r' if makey_opts: if not args.dictfile: print "Error: make dictionary mode but no input files" exit(1) dict = False makey = True mymode = 'M' if listy_opts: mymode = 'L' if not mymode: mymode = 'd' #print "Error: I can't work out which mode to use" #exit(1) if 1< verbose: print "mode=({})".format(mymode) if 1\S+)\b') dre1 = re.compile(r'(?P\S+)\b') if style == 0: words.extend(mylines) else: if style == 2: dre = dre2 else: # style == 1 or style == 3: dre = dre1 dwords = [] for myline in mylines: m = dre.match(myline) if m: word = m.group('word') #(n, word) = myline.split (None,2) dwords.append(word) words.extend(dwords) if 1\d+)[\s,\.]+(?P\S+)') fre1 = re.compile(r'(?P\S+)[\s,\.]+(?P\d+)') bad = 0 pr = 0 if style == 0 or style == 3: print "Error: you chose frequency but file ({}) is a simple word list".format(f) exit(1) elif style == 1: fre = fre1 else: fre = fre2 for myline in mylines: wt = 0 m = fre.match(myline) if m: (word, weight) = (m.group('word'), m.group('freq')) wt = int(weight) if weight.isdigit() else 0 if wt and pop <= wt: fwords[word] = fwords.get(word,0) + wt #if word in fwords: fwords[word] += wt #else: fwords[word] = wt ftotal += wt if 1