mediawiki-extensions-Confir.../captcha.py

227 lines
7.7 KiB
Python
Raw Normal View History

#!/usr/bin/python
#
# Script to generate distorted text images for a captcha system.
#
# Copyright (C) 2005 Neil Harris
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# http://www.gnu.org/copyleft/gpl.html
#
# Further tweaks by Brion Vibber <brion@pobox.com>:
# 2006-01-26: Add command-line options for the various parameters
# 2007-02-19: Add --dirs param for hash subdirectory splits
# Tweaks by Greg Sabino Mullane <greg@turnstep.com>:
# 2008-01-06: Add regex check to skip words containing other than a-z
import random
import math
import hashlib
from optparse import OptionParser
import os
import sys
import re
try:
import Image
import ImageFont
import ImageDraw
import ImageEnhance
import ImageOps
except:
sys.exit("This script requires the Python Imaging Library - http://www.pythonware.com/products/pil/")
nonalpha = re.compile('[^a-z]') # regex to test for suitability of words
# Does X-axis wobbly copy, sandwiched between two rotates
def wobbly_copy(src, wob, col, scale, ang):
x, y = src.size
f = random.uniform(4*scale, 5*scale)
p = random.uniform(0, math.pi*2)
rr = ang+random.uniform(-30, 30) # vary, but not too much
int_d = Image.new('RGB', src.size, 0) # a black rectangle
rot = src.rotate(rr, Image.BILINEAR)
# Do a cheap bounding-box op here to try to limit work below
bbx = rot.getbbox()
if bbx == None:
return src
else:
l, t, r, b= bbx
# and only do lines with content on
for i in range(t, b+1):
# Drop a scan line in
xoff = int(math.sin(p+(i*f/y))*wob)
xoff += int(random.uniform(-wob*0.5, wob*0.5))
int_d.paste(rot.crop((0, i, x, i+1)), (xoff, i))
# try to stop blurring from building up
int_d = int_d.rotate(-rr, Image.BILINEAR)
enh = ImageEnhance.Sharpness(int_d)
return enh.enhance(2)
def gen_captcha(text, fontname, fontsize, file_name):
"""Generate a captcha image"""
# white text on a black background
bgcolor = 0x0
fgcolor = 0xffffff
# create a font object
font = ImageFont.truetype(fontname,fontsize)
# determine dimensions of the text
dim = font.getsize(text)
# create a new image significantly larger that the text
edge = max(dim[0], dim[1]) + 2*min(dim[0], dim[1])
im = Image.new('RGB', (edge, edge), bgcolor)
d = ImageDraw.Draw(im)
x, y = im.size
# add the text to the image
d.text((x/2-dim[0]/2, y/2-dim[1]/2), text, font=font, fill=fgcolor)
k = 3
wob = 0.20*dim[1]/k
rot = 45
# Apply lots of small stirring operations, rather than a few large ones
# in order to get some uniformity of treatment, whilst
# maintaining randomness
for i in range(k):
im = wobbly_copy(im, wob, bgcolor, i*2+3, rot+0)
im = wobbly_copy(im, wob, bgcolor, i*2+1, rot+45)
im = wobbly_copy(im, wob, bgcolor, i*2+2, rot+90)
rot += 30
# now get the bounding box of the nonzero parts of the image
bbox = im.getbbox()
bord = min(dim[0], dim[1])/4 # a bit of a border
im = im.crop((bbox[0]-bord, bbox[1]-bord, bbox[2]+bord, bbox[3]+bord))
# and turn into black on white
im = ImageOps.invert(im)
# save the image, in format determined from filename
im.save(file_name)
def gen_subdir(basedir, md5hash, levels):
"""Generate a subdirectory path out of the first _levels_
characters of _hash_, and ensure the directories exist
under _basedir_."""
subdir = None
for i in range(0, levels):
char = md5hash[i]
if subdir:
subdir = os.path.join(subdir, char)
else:
subdir = char
fulldir = os.path.join(basedir, subdir)
if not os.path.exists(fulldir):
os.mkdir(fulldir)
return subdir
def try_pick_word(words, blacklist, verbose):
word1 = words[random.randint(0,len(words)-1)]
word2 = words[random.randint(0,len(words)-1)]
word = word1+word2
if verbose:
print "word is %s" % word
if nonalpha.search(word):
if verbose:
print "skipping word pair '%s' because it contains non-alphabetic characters" % word
return None
for naughty in blacklist:
if naughty in word:
if verbose:
print "skipping word pair '%s' because it contains blacklisted word '%s'" % (word, naughty)
return None
return word
def pick_word(words, blacklist, verbose):
for x in range(1000): # If we can't find a valid combination in 1000 tries, just give up
word = try_pick_word(words, blacklist, verbose)
if word:
return word
sys.exit("Unable to find valid word combinations")
def read_wordlist(filename):
return [x.strip().lower() for x in open(wordlist).readlines()]
if __name__ == '__main__':
"""This grabs random words from the dictionary 'words' (one
word per line) and generates a captcha image for each one,
with a keyed salted hash of the correct answer in the filename.
To check a reply, hash it in the same way with the same salt and
secret key, then compare with the hash value given.
"""
parser = OptionParser()
parser.add_option("--wordlist", help="A list of words (required)", metavar="WORDS.txt")
parser.add_option("--key", help="The passphrase set as $wgCaptchaSecret (required)", metavar="KEY")
parser.add_option("--output", help="The directory to put the images in - $wgCaptchaDirectory (required)", metavar="DIR")
parser.add_option("--font", help="The font to use (required)", metavar="FONT.ttf")
parser.add_option("--font-size", help="The font size (default 40)", metavar="N", type='int', default=40)
parser.add_option("--count", help="The maximum number of images to make (default 20)", metavar="N", type='int', default=20)
parser.add_option("--blacklist", help="A blacklist of words that should not be used", metavar="FILE")
parser.add_option("--fill", help="Fill the output directory to contain N files, overrides count, cannot be used with --dirs", metavar="N", type='int')
parser.add_option("--dirs", help="Put the images into subdirectories N levels deep - $wgCaptchaDirectoryLevels", metavar="N", type='int')
parser.add_option("--verbose", "-v", help="Show debugging information", action='store_true')
opts, args = parser.parse_args()
if opts.wordlist:
wordlist = opts.wordlist
else:
sys.exit("Need to specify a wordlist")
if opts.key:
key = opts.key
else:
sys.exit("Need to specify a key")
if opts.output:
output = opts.output
else:
sys.exit("Need to specify an output directory")
if opts.font and os.path.exists(opts.font):
font = opts.font
else:
sys.exit("Need to specify the location of a font")
blacklistfile = opts.blacklist
count = opts.count
fill = opts.fill
dirs = opts.dirs
verbose = opts.verbose
fontsize = opts.font_size
if fill:
count = max(0, fill - len(os.listdir(output)))
words = read_wordlist(wordlist)
words = [x for x in words
if len(x) in (4,5) and x[0] != "f"
and x[0] != x[1] and x[-1] != x[-2]]
if blacklistfile:
blacklist = read_wordlist(blacklistfile)
else:
blacklist = []
for i in range(count):
word = pick_word(words, blacklist, verbose)
salt = "%08x" % random.randrange(2**32)
# 64 bits of hash is plenty for this purpose
md5hash = hashlib.md5(key+salt+word+key+salt).hexdigest()[:16]
filename = "image_%s_%s.png" % (salt, md5hash)
if dirs:
subdir = gen_subdir(output, md5hash, dirs)
filename = os.path.join(subdir, filename)
if verbose:
print filename
gen_captcha(word, font, fontsize, os.path.join(output, filename))