2006-01-27 10:22:37 +00:00
|
|
|
#!/usr/bin/python
|
|
|
|
#
|
|
|
|
# Script to generate distorted text images for a captcha system.
|
|
|
|
#
|
|
|
|
# Copyright (C) 2005 Neil Harris
|
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License along
|
|
|
|
# with this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
|
|
# http://www.gnu.org/copyleft/gpl.html
|
|
|
|
#
|
|
|
|
# Further tweaks by Brion Vibber <brion@pobox.com>:
|
|
|
|
# 2006-01-26: Add command-line options for the various parameters
|
2007-02-19 20:09:03 +00:00
|
|
|
# 2007-02-19: Add --dirs param for hash subdirectory splits
|
2008-01-07 03:28:38 +00:00
|
|
|
# Tweaks by Greg Sabino Mullane <greg@turnstep.com>:
|
|
|
|
# 2008-01-06: Add regex check to skip words containing other than a-z
|
2006-01-27 10:22:37 +00:00
|
|
|
|
|
|
|
import random
|
|
|
|
import Image
|
|
|
|
import ImageFont
|
|
|
|
import ImageDraw
|
|
|
|
import ImageEnhance
|
|
|
|
import ImageOps
|
|
|
|
import math, string, md5
|
|
|
|
import getopt
|
|
|
|
import os
|
|
|
|
import sys
|
2008-01-07 03:28:38 +00:00
|
|
|
import re
|
2006-01-27 10:22:37 +00:00
|
|
|
|
|
|
|
# Does X-axis wobbly copy, sandwiched between two rotates
|
|
|
|
def wobbly_copy(src, wob, col, scale, ang):
|
|
|
|
x, y = src.size
|
|
|
|
f = random.uniform(4*scale, 5*scale)
|
|
|
|
p = random.uniform(0, math.pi*2)
|
|
|
|
rr = ang+random.uniform(-30, 30) # vary, but not too much
|
|
|
|
int_d = Image.new('RGB', src.size, 0) # a black rectangle
|
|
|
|
rot = src.rotate(rr, Image.BILINEAR)
|
|
|
|
# Do a cheap bounding-box op here to try to limit work below
|
|
|
|
bbx = rot.getbbox()
|
|
|
|
if bbx == None:
|
|
|
|
print "whoops"
|
|
|
|
return src
|
|
|
|
else:
|
|
|
|
l, t, r, b= bbx
|
|
|
|
# and only do lines with content on
|
|
|
|
for i in range(t, b+1):
|
|
|
|
# Drop a scan line in
|
|
|
|
xoff = int(math.sin(p+(i*f/y))*wob)
|
|
|
|
xoff += int(random.uniform(-wob*0.5, wob*0.5))
|
|
|
|
int_d.paste(rot.crop((0, i, x, i+1)), (xoff, i))
|
|
|
|
# try to stop blurring from building up
|
|
|
|
int_d = int_d.rotate(-rr, Image.BILINEAR)
|
|
|
|
enh = ImageEnhance.Sharpness(int_d)
|
|
|
|
return enh.enhance(2)
|
|
|
|
|
|
|
|
|
|
|
|
def gen_captcha(text, fontname, fontsize, file_name):
|
|
|
|
"""Generate a captcha image"""
|
|
|
|
# white text on a black background
|
|
|
|
bgcolor = 0x0
|
|
|
|
fgcolor = 0xffffff
|
|
|
|
# create a font object
|
|
|
|
font = ImageFont.truetype(fontname,fontsize)
|
|
|
|
# determine dimensions of the text
|
|
|
|
dim = font.getsize(text)
|
|
|
|
# create a new image significantly larger that the text
|
|
|
|
edge = max(dim[0], dim[1]) + 2*min(dim[0], dim[1])
|
|
|
|
im = Image.new('RGB', (edge, edge), bgcolor)
|
|
|
|
d = ImageDraw.Draw(im)
|
|
|
|
x, y = im.size
|
|
|
|
# add the text to the image
|
|
|
|
d.text((x/2-dim[0]/2, y/2-dim[1]/2), text, font=font, fill=fgcolor)
|
|
|
|
k = 3
|
|
|
|
wob = 0.20*dim[1]/k
|
|
|
|
rot = 45
|
|
|
|
# Apply lots of small stirring operations, rather than a few large ones
|
|
|
|
# in order to get some uniformity of treatment, whilst
|
|
|
|
# maintaining randomness
|
|
|
|
for i in range(k):
|
|
|
|
im = wobbly_copy(im, wob, bgcolor, i*2+3, rot+0)
|
|
|
|
im = wobbly_copy(im, wob, bgcolor, i*2+1, rot+45)
|
|
|
|
im = wobbly_copy(im, wob, bgcolor, i*2+2, rot+90)
|
|
|
|
rot += 30
|
|
|
|
|
|
|
|
# now get the bounding box of the nonzero parts of the image
|
|
|
|
bbox = im.getbbox()
|
|
|
|
bord = min(dim[0], dim[1])/4 # a bit of a border
|
|
|
|
im = im.crop((bbox[0]-bord, bbox[1]-bord, bbox[2]+bord, bbox[3]+bord))
|
|
|
|
# and turn into black on white
|
|
|
|
im = ImageOps.invert(im)
|
|
|
|
|
|
|
|
# save the image, in format determined from filename
|
|
|
|
im.save(file_name)
|
|
|
|
|
2007-02-19 20:09:03 +00:00
|
|
|
def gen_subdir(basedir, hash, levels):
|
|
|
|
"""Generate a subdirectory path out of the first _levels_
|
|
|
|
characters of _hash_, and ensure the directories exist
|
|
|
|
under _basedir_."""
|
|
|
|
subdir = None
|
|
|
|
for i in range(0, levels):
|
|
|
|
char = hash[i]
|
|
|
|
if subdir:
|
|
|
|
subdir = os.path.join(subdir, char)
|
|
|
|
else:
|
|
|
|
subdir = char
|
|
|
|
fulldir = os.path.join(basedir, subdir)
|
|
|
|
if not os.path.exists(fulldir):
|
|
|
|
os.mkdir(fulldir)
|
|
|
|
return subdir
|
2007-06-29 19:57:01 +00:00
|
|
|
|
|
|
|
def try_pick_word(words, blacklist, verbose):
|
|
|
|
word1 = words[random.randint(0,len(words)-1)]
|
|
|
|
word2 = words[random.randint(0,len(words)-1)]
|
|
|
|
word = word1+word2
|
2008-01-07 03:28:38 +00:00
|
|
|
if verbose:
|
|
|
|
print "word is %s" % word
|
|
|
|
r = re.compile('[^a-z]');
|
|
|
|
if r.search(word):
|
|
|
|
print "skipping word pair '%s' because it contains non-alphabetic characters" % word
|
|
|
|
return None
|
|
|
|
|
2007-06-29 19:57:01 +00:00
|
|
|
for naughty in blacklist:
|
|
|
|
if naughty in word:
|
|
|
|
if verbose:
|
|
|
|
print "skipping word pair '%s' because it contains blacklisted word '%s'" % (word, naughty)
|
|
|
|
return None
|
|
|
|
return word
|
|
|
|
|
|
|
|
def pick_word(words, blacklist, verbose):
|
|
|
|
while True:
|
|
|
|
word = try_pick_word(words, blacklist, verbose)
|
|
|
|
if word:
|
|
|
|
return word
|
|
|
|
|
|
|
|
def read_wordlist(filename):
|
|
|
|
return [string.lower(x.strip()) for x in open(wordlist).readlines()]
|
|
|
|
|
2006-01-27 10:22:37 +00:00
|
|
|
if __name__ == '__main__':
|
|
|
|
"""This grabs random words from the dictionary 'words' (one
|
|
|
|
word per line) and generates a captcha image for each one,
|
|
|
|
with a keyed salted hash of the correct answer in the filename.
|
|
|
|
|
|
|
|
To check a reply, hash it in the same way with the same salt and
|
|
|
|
secret key, then compare with the hash value given.
|
|
|
|
"""
|
|
|
|
font = "VeraBd.ttf"
|
|
|
|
wordlist = "awordlist.txt"
|
2007-06-29 19:57:01 +00:00
|
|
|
blacklistfile = None
|
2006-01-27 10:22:37 +00:00
|
|
|
key = "CHANGE_THIS_SECRET!"
|
|
|
|
output = "."
|
|
|
|
count = 20
|
|
|
|
fill = 0
|
2007-02-19 20:09:03 +00:00
|
|
|
dirs = 0
|
2006-01-27 10:22:37 +00:00
|
|
|
verbose = False
|
|
|
|
|
2007-06-29 19:57:01 +00:00
|
|
|
opts, args = getopt.getopt(sys.argv[1:], "", ["font=", "wordlist=", "blacklist=", "key=", "output=", "count=", "fill=", "dirs=", "verbose"])
|
2006-01-27 10:22:37 +00:00
|
|
|
for o, a in opts:
|
|
|
|
if o == "--font":
|
|
|
|
font = a
|
|
|
|
if o == "--wordlist":
|
|
|
|
wordlist = a
|
2007-06-29 19:57:01 +00:00
|
|
|
if o == "--blacklist":
|
|
|
|
blacklistfile = a
|
2006-01-27 10:22:37 +00:00
|
|
|
if o == "--key":
|
|
|
|
key = a
|
|
|
|
if o == "--output":
|
|
|
|
output = a
|
|
|
|
if o == "--count":
|
|
|
|
count = int(a)
|
|
|
|
if o == "--fill":
|
|
|
|
fill = int(a)
|
2007-02-19 20:09:03 +00:00
|
|
|
if o == "--dirs":
|
|
|
|
dirs = int(a)
|
2006-01-27 10:22:37 +00:00
|
|
|
if o == "--verbose":
|
|
|
|
verbose = True
|
|
|
|
|
|
|
|
if fill:
|
|
|
|
# Option processing order is not guaranteed, so count the output
|
|
|
|
# files after...
|
|
|
|
count = max(0, fill - len(os.listdir(output)))
|
|
|
|
|
2007-06-29 19:57:01 +00:00
|
|
|
words = read_wordlist(wordlist)
|
2006-01-27 10:22:37 +00:00
|
|
|
words = [x for x in words
|
|
|
|
if len(x) <= 5 and len(x) >= 4 and x[0] != "f"
|
|
|
|
and x[0] != x[1] and x[-1] != x[-2]
|
|
|
|
and (not "'" in x)]
|
2007-06-29 19:57:01 +00:00
|
|
|
|
|
|
|
if blacklistfile:
|
|
|
|
blacklist = read_wordlist(blacklistfile)
|
|
|
|
else:
|
|
|
|
blacklist = []
|
|
|
|
|
2006-01-27 10:22:37 +00:00
|
|
|
for i in range(count):
|
2007-06-29 19:57:01 +00:00
|
|
|
word = pick_word(words, blacklist, verbose)
|
2006-01-27 10:22:37 +00:00
|
|
|
salt = "%08x" % random.randrange(2**32)
|
|
|
|
# 64 bits of hash is plenty for this purpose
|
|
|
|
hash = md5.new(key+salt+word+key+salt).hexdigest()[:16]
|
|
|
|
filename = "image_%s_%s.png" % (salt, hash)
|
2007-02-19 20:09:03 +00:00
|
|
|
if dirs:
|
|
|
|
subdir = gen_subdir(output, hash, dirs)
|
|
|
|
filename = os.path.join(subdir, filename)
|
2006-01-27 10:22:37 +00:00
|
|
|
if verbose:
|
|
|
|
print filename
|
|
|
|
gen_captcha(word, font, 40, os.path.join(output, filename))
|