Various code cleanups for the captcha generating script

* Use optparse instead of getopt
* Replace deprecated md5 module
* Replace deprecated string module functions with string methods
* More graceful failure
* Allow users to set the font size
* Don't run forever if no valid word combinations can be found
This commit is contained in:
Alex Z. 2009-09-08 01:11:52 +00:00
parent 04872c6af8
commit f81c299c27

View file

@ -26,17 +26,24 @@
# 2008-01-06: Add regex check to skip words containing other than a-z
import random
import Image
import ImageFont
import ImageDraw
import ImageEnhance
import ImageOps
import math, string, md5
import getopt
import math
import hashlib
from optparse import OptionParser
import os
import sys
import re
try:
import Image
import ImageFont
import ImageDraw
import ImageEnhance
import ImageOps
except:
sys.exit("This script requires the Python Imaging Library - http://www.pythonware.com/products/pil/")
nonalpha = re.compile('[^a-z]') # regex to test for suitability of words
# Does X-axis wobbly copy, sandwiched between two rotates
def wobbly_copy(src, wob, col, scale, ang):
x, y = src.size
@ -48,7 +55,6 @@ def wobbly_copy(src, wob, col, scale, ang):
# Do a cheap bounding-box op here to try to limit work below
bbx = rot.getbbox()
if bbx == None:
print "whoops"
return src
else:
l, t, r, b= bbx
@ -102,13 +108,13 @@ def gen_captcha(text, fontname, fontsize, file_name):
# save the image, in format determined from filename
im.save(file_name)
def gen_subdir(basedir, hash, levels):
def gen_subdir(basedir, md5hash, levels):
"""Generate a subdirectory path out of the first _levels_
characters of _hash_, and ensure the directories exist
under _basedir_."""
subdir = None
for i in range(0, levels):
char = hash[i]
char = md5hash[i]
if subdir:
subdir = os.path.join(subdir, char)
else:
@ -124,9 +130,9 @@ def try_pick_word(words, blacklist, verbose):
word = word1+word2
if verbose:
print "word is %s" % word
r = re.compile('[^a-z]');
if r.search(word):
print "skipping word pair '%s' because it contains non-alphabetic characters" % word
if nonalpha.search(word):
if verbose:
print "skipping word pair '%s' because it contains non-alphabetic characters" % word
return None
for naughty in blacklist:
@ -137,13 +143,14 @@ def try_pick_word(words, blacklist, verbose):
return word
def pick_word(words, blacklist, verbose):
while True:
for x in range(1000): # If we can't find a valid combination in 1000 tries, just give up
word = try_pick_word(words, blacklist, verbose)
if word:
return word
sys.exit("Unable to find valid word combinations")
def read_wordlist(filename):
return [string.lower(x.strip()) for x in open(wordlist).readlines()]
return [x.strip().lower() for x in open(wordlist).readlines()]
if __name__ == '__main__':
"""This grabs random words from the dictionary 'words' (one
@ -153,47 +160,51 @@ if __name__ == '__main__':
To check a reply, hash it in the same way with the same salt and
secret key, then compare with the hash value given.
"""
font = "VeraBd.ttf"
wordlist = "awordlist.txt"
blacklistfile = None
key = "CHANGE_THIS_SECRET!"
output = "."
count = 20
fill = 0
dirs = 0
verbose = False
parser = OptionParser()
parser.add_option("--wordlist", help="A list of words (required)", metavar="WORDS.txt")
parser.add_option("--key", help="The passphrase set as $wgCaptchaSecret (required)", metavar="KEY")
parser.add_option("--output", help="The directory to put the images in - $wgCaptchaDirectory (required)", metavar="DIR")
parser.add_option("--font", help="The font to use (required)", metavar="FONT.ttf")
parser.add_option("--font-size", help="The font size (default 40)", metavar="N", type='int', default=40)
parser.add_option("--count", help="The maximum number of images to make (default 20)", metavar="N", type='int', default=20)
parser.add_option("--blacklist", help="A blacklist of words that should not be used", metavar="FILE")
parser.add_option("--fill", help="Fill the output directory to contain N files, overrides count, cannot be used with --dirs", metavar="N", type='int')
parser.add_option("--dirs", help="Put the images into subdirectories N levels deep - $wgCaptchaDirectoryLevels", metavar="N", type='int')
parser.add_option("--verbose", "-v", help="Show debugging information", action='store_true')
opts, args = getopt.getopt(sys.argv[1:], "", ["font=", "wordlist=", "blacklist=", "key=", "output=", "count=", "fill=", "dirs=", "verbose"])
for o, a in opts:
if o == "--font":
font = a
if o == "--wordlist":
wordlist = a
if o == "--blacklist":
blacklistfile = a
if o == "--key":
key = a
if o == "--output":
output = a
if o == "--count":
count = int(a)
if o == "--fill":
fill = int(a)
if o == "--dirs":
dirs = int(a)
if o == "--verbose":
verbose = True
opts, args = parser.parse_args()
if opts.wordlist:
wordlist = opts.wordlist
else:
sys.exit("Need to specify a wordlist")
if opts.key:
key = opts.key
else:
sys.exit("Need to specify a key")
if opts.output:
output = opts.output
else:
sys.exit("Need to specify an output directory")
if opts.font and os.path.exists(opts.font):
font = opts.font
else:
sys.exit("Need to specify the location of a font")
blacklistfile = opts.blacklist
count = opts.count
fill = opts.fill
dirs = opts.dirs
verbose = opts.verbose
fontsize = opts.font_size
if fill:
# Option processing order is not guaranteed, so count the output
# files after...
count = max(0, fill - len(os.listdir(output)))
words = read_wordlist(wordlist)
words = [x for x in words
if len(x) <= 5 and len(x) >= 4 and x[0] != "f"
and x[0] != x[1] and x[-1] != x[-2]
and (not "'" in x)]
if len(x) in (4,5) and x[0] != "f"
and x[0] != x[1] and x[-1] != x[-2]]
if blacklistfile:
blacklist = read_wordlist(blacklistfile)
@ -204,11 +215,12 @@ if __name__ == '__main__':
word = pick_word(words, blacklist, verbose)
salt = "%08x" % random.randrange(2**32)
# 64 bits of hash is plenty for this purpose
hash = md5.new(key+salt+word+key+salt).hexdigest()[:16]
filename = "image_%s_%s.png" % (salt, hash)
md5hash = hashlib.md5(key+salt+word+key+salt).hexdigest()[:16]
filename = "image_%s_%s.png" % (salt, md5hash)
if dirs:
subdir = gen_subdir(output, hash, dirs)
subdir = gen_subdir(output, md5hash, dirs)
filename = os.path.join(subdir, filename)
if verbose:
print filename
gen_captcha(word, font, 40, os.path.join(output, filename))
gen_captcha(word, font, fontsize, os.path.join(output, filename))