From f81c299c2779fb1d6c861919f443e6ed8d1f62bc Mon Sep 17 00:00:00 2001 From: "Alex Z." Date: Tue, 8 Sep 2009 01:11:52 +0000 Subject: [PATCH] Various code cleanups for the captcha generating script * Use optparse instead of getopt * Replace deprecated md5 module * Replace deprecated string module functions with string methods * More graceful failure * Allow users to set the font size * Don't run forever if no valid word combinations can be found --- captcha.py | 118 +++++++++++++++++++++++++++++------------------------ 1 file changed, 65 insertions(+), 53 deletions(-) diff --git a/captcha.py b/captcha.py index effdd3681..0cd1450e2 100644 --- a/captcha.py +++ b/captcha.py @@ -26,17 +26,24 @@ # 2008-01-06: Add regex check to skip words containing other than a-z import random -import Image -import ImageFont -import ImageDraw -import ImageEnhance -import ImageOps -import math, string, md5 -import getopt +import math +import hashlib +from optparse import OptionParser import os import sys import re +try: + import Image + import ImageFont + import ImageDraw + import ImageEnhance + import ImageOps +except: + sys.exit("This script requires the Python Imaging Library - http://www.pythonware.com/products/pil/") + +nonalpha = re.compile('[^a-z]') # regex to test for suitability of words + # Does X-axis wobbly copy, sandwiched between two rotates def wobbly_copy(src, wob, col, scale, ang): x, y = src.size @@ -48,7 +55,6 @@ def wobbly_copy(src, wob, col, scale, ang): # Do a cheap bounding-box op here to try to limit work below bbx = rot.getbbox() if bbx == None: - print "whoops" return src else: l, t, r, b= bbx @@ -102,13 +108,13 @@ def gen_captcha(text, fontname, fontsize, file_name): # save the image, in format determined from filename im.save(file_name) -def gen_subdir(basedir, hash, levels): +def gen_subdir(basedir, md5hash, levels): """Generate a subdirectory path out of the first _levels_ characters of _hash_, and ensure the directories exist under _basedir_.""" subdir = None for i in range(0, levels): - char = hash[i] + char = md5hash[i] if subdir: subdir = os.path.join(subdir, char) else: @@ -124,9 +130,9 @@ def try_pick_word(words, blacklist, verbose): word = word1+word2 if verbose: print "word is %s" % word - r = re.compile('[^a-z]'); - if r.search(word): - print "skipping word pair '%s' because it contains non-alphabetic characters" % word + if nonalpha.search(word): + if verbose: + print "skipping word pair '%s' because it contains non-alphabetic characters" % word return None for naughty in blacklist: @@ -137,13 +143,14 @@ def try_pick_word(words, blacklist, verbose): return word def pick_word(words, blacklist, verbose): - while True: + for x in range(1000): # If we can't find a valid combination in 1000 tries, just give up word = try_pick_word(words, blacklist, verbose) if word: return word + sys.exit("Unable to find valid word combinations") def read_wordlist(filename): - return [string.lower(x.strip()) for x in open(wordlist).readlines()] + return [x.strip().lower() for x in open(wordlist).readlines()] if __name__ == '__main__': """This grabs random words from the dictionary 'words' (one @@ -153,47 +160,51 @@ if __name__ == '__main__': To check a reply, hash it in the same way with the same salt and secret key, then compare with the hash value given. """ - font = "VeraBd.ttf" - wordlist = "awordlist.txt" - blacklistfile = None - key = "CHANGE_THIS_SECRET!" - output = "." - count = 20 - fill = 0 - dirs = 0 - verbose = False + parser = OptionParser() + parser.add_option("--wordlist", help="A list of words (required)", metavar="WORDS.txt") + parser.add_option("--key", help="The passphrase set as $wgCaptchaSecret (required)", metavar="KEY") + parser.add_option("--output", help="The directory to put the images in - $wgCaptchaDirectory (required)", metavar="DIR") + parser.add_option("--font", help="The font to use (required)", metavar="FONT.ttf") + parser.add_option("--font-size", help="The font size (default 40)", metavar="N", type='int', default=40) + parser.add_option("--count", help="The maximum number of images to make (default 20)", metavar="N", type='int', default=20) + parser.add_option("--blacklist", help="A blacklist of words that should not be used", metavar="FILE") + parser.add_option("--fill", help="Fill the output directory to contain N files, overrides count, cannot be used with --dirs", metavar="N", type='int') + parser.add_option("--dirs", help="Put the images into subdirectories N levels deep - $wgCaptchaDirectoryLevels", metavar="N", type='int') + parser.add_option("--verbose", "-v", help="Show debugging information", action='store_true') - opts, args = getopt.getopt(sys.argv[1:], "", ["font=", "wordlist=", "blacklist=", "key=", "output=", "count=", "fill=", "dirs=", "verbose"]) - for o, a in opts: - if o == "--font": - font = a - if o == "--wordlist": - wordlist = a - if o == "--blacklist": - blacklistfile = a - if o == "--key": - key = a - if o == "--output": - output = a - if o == "--count": - count = int(a) - if o == "--fill": - fill = int(a) - if o == "--dirs": - dirs = int(a) - if o == "--verbose": - verbose = True + opts, args = parser.parse_args() + + if opts.wordlist: + wordlist = opts.wordlist + else: + sys.exit("Need to specify a wordlist") + if opts.key: + key = opts.key + else: + sys.exit("Need to specify a key") + if opts.output: + output = opts.output + else: + sys.exit("Need to specify an output directory") + if opts.font and os.path.exists(opts.font): + font = opts.font + else: + sys.exit("Need to specify the location of a font") + + blacklistfile = opts.blacklist + count = opts.count + fill = opts.fill + dirs = opts.dirs + verbose = opts.verbose + fontsize = opts.font_size if fill: - # Option processing order is not guaranteed, so count the output - # files after... count = max(0, fill - len(os.listdir(output))) words = read_wordlist(wordlist) words = [x for x in words - if len(x) <= 5 and len(x) >= 4 and x[0] != "f" - and x[0] != x[1] and x[-1] != x[-2] - and (not "'" in x)] + if len(x) in (4,5) and x[0] != "f" + and x[0] != x[1] and x[-1] != x[-2]] if blacklistfile: blacklist = read_wordlist(blacklistfile) @@ -204,11 +215,12 @@ if __name__ == '__main__': word = pick_word(words, blacklist, verbose) salt = "%08x" % random.randrange(2**32) # 64 bits of hash is plenty for this purpose - hash = md5.new(key+salt+word+key+salt).hexdigest()[:16] - filename = "image_%s_%s.png" % (salt, hash) + md5hash = hashlib.md5(key+salt+word+key+salt).hexdigest()[:16] + filename = "image_%s_%s.png" % (salt, md5hash) if dirs: - subdir = gen_subdir(output, hash, dirs) + subdir = gen_subdir(output, md5hash, dirs) filename = os.path.join(subdir, filename) if verbose: print filename - gen_captcha(word, font, 40, os.path.join(output, filename)) + gen_captcha(word, font, fontsize, os.path.join(output, filename)) +