mediawiki-extensions-Confir.../captcha.py

#!/usr/bin/python3
#
# Script to generate distorted text images for a captcha system.
#
# Copyright (C) 2005 Neil Harris
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
# http://www.gnu.org/copyleft/gpl.html
#
# Further tweaks by Brion Vibber <brion@pobox.com>:
# 2006-01-26: Add command-line options for the various parameters
# 2007-02-19: Add --dirs param for hash subdirectory splits
# Tweaks by Greg Sabino Mullane <greg@turnstep.com>:
# 2008-01-06: Add regex check to skip words containing other than a-z

import random
import math
import hashlib
from optparse import OptionParser
import os
import sys
import re
import multiprocessing

try:
    from PIL import Image
    from PIL import ImageFont
    from PIL import ImageDraw
    from PIL import ImageEnhance
    from PIL import ImageOps
except ImportError:
    sys.exit(
        "This script requires the Python Imaging Library - http://www.pythonware.com/products/pil/"
    )

# regex to test for suitability of words
nonalpha = re.compile("[^a-z]")

# when il beside each other, hard to read
confusedletters = re.compile(
    "[ijtlr][ijtl]|r[nompqr]|[il]"
)

# Pillow 9.2 added getbbox to replace getsize, and getsize() was removed in Pillow 10
# https://pillow.readthedocs.io/en/stable/releasenotes/10.0.0.html#font-size-and-offset-methods
# We don't have a requirements.txt, and therefore don't declare any specific supported or min version...
IMAGEFONT_HAS_GETBBOX = hasattr(ImageFont.ImageFont, "getbbox")


# Does X-axis wobbly copy, sandwiched between two rotates
def wobbly_copy(src, wob, col, scale, ang):
    x, y = src.size
    f = random.uniform(4 * scale, 5 * scale)
    p = random.uniform(0, math.pi * 2)
    rr = ang + random.uniform(-30, 30)  # vary, but not too much
    int_d = Image.new("RGB", src.size, 0)  # a black rectangle
    rot = src.rotate(rr, Image.BILINEAR)
    # Do a cheap bounding-box op here to try to limit work below
    bbx = rot.getbbox()
    if bbx is None:
        return src
    else:
        l, t, r, b = bbx
    # and only do lines with content on
    for i in range(t, b + 1):
        # Drop a scan line in
        xoff = int(math.sin(p + (i * f / y)) * wob)
        xoff += int(random.uniform(-wob * 0.5, wob * 0.5))
        int_d.paste(rot.crop((0, i, x, i + 1)), (xoff, i))
    # try to stop blurring from building up
    int_d = int_d.rotate(-rr, Image.BILINEAR)
    enh = ImageEnhance.Sharpness(int_d)
    return enh.enhance(2)


def gen_captcha(text, fontname, fontsize, file_name):
    """Generate a captcha image"""
    # white text on a black background
    bgcolor = 0x0
    fgcolor = 0xFFFFFF
    # create a font object
    font = ImageFont.truetype(fontname, fontsize)

    # determine dimensions of the text
    if IMAGEFONT_HAS_GETBBOX:
        dim = font.getbbox(text)[2:]
    else:
        dim = font.getsize(text)

    # create a new image significantly larger that the text
    edge = max(dim[0], dim[1]) + 2 * min(dim[0], dim[1])
    im = Image.new("RGB", (edge, edge), bgcolor)
    d = ImageDraw.Draw(im)
    x, y = im.size
    # add the text to the image
    # Using between 5-6 pixels of negative kerning seemed
    # enough to confuse tesseract but still be very readable
    offset = 0
    for c in text:
        d.text(
            (x / 2 - dim[0] / 2 + offset, y / 2 - dim[1] / 2 + random.uniform(-3, 7)),
            c,
            font=font,
            fill=fgcolor,
        )
        if IMAGEFONT_HAS_GETBBOX:
            offset += font.getbbox(c)[2:][0]
        else:
            offset += font.getsize(c)[0]

        offset -= random.uniform(5, 6)

    for i in range(10):
        x0 = int(
            offset * ((i / 2) - 1) / 5
            + x / 2
            - dim[0] / 2
            + random.uniform(0, 10)
        )
        y0 = int(y / 2 - dim[1] + 30 + random.uniform(-10, 15))

        x1 = int(offset * i / 7 + x / 2 - dim[0] / 2 + random.uniform(-5, 5))
        y1 = int(y / 2 - dim[1] + 30 + random.uniform(-10, 30))

        if x1 < x0:
            x0, x1 = x1, x0

        if y1 < y0:
            y0, y1 = y1, y0

        d.arc(
            (x0, y0, x1, y1),
            int(random.uniform(-30, 30)),
            int(random.uniform(160, 300)),
            fill=fgcolor,
        )

    # now get the bounding box of the nonzero parts of the image
    bbox = im.getbbox()
    bord = min(dim[0], dim[1]) / 4  # a bit of a border
    im = im.crop((bbox[0] - bord, bbox[1] - bord, bbox[2] + bord, bbox[3] + bord))

    # and turn into black on white
    im = ImageOps.invert(im)

    # save the image, in format determined from filename
    im.save(file_name)


def gen_subdir(basedir, md5hash, levels):
    """Generate a subdirectory path out of the first _levels_
    characters of _hash_, and ensure the directories exist
    under _basedir_."""
    subdir = None
    for i in range(0, levels):
        char = md5hash[i]
        if subdir:
            subdir = os.path.join(subdir, char)
        else:
            subdir = char
        fulldir = os.path.join(basedir, subdir)
        if not os.path.exists(fulldir):
            os.mkdir(fulldir)
    return subdir


def try_pick_word(words, badwordlist, verbose, nwords, min_length, max_length):
    if words is not None:
        word = words[random.randint(0, len(words) - 1)]
        while nwords > 1:
            word2 = words[random.randint(0, len(words) - 1)]
            word = word + word2
            nwords = nwords - 1
    else:
        word = ""
        max_length = max_length if max_length > 0 else 10
        for i in range(0, random.randint(min_length, max_length)):
            word = word + chr(97 + random.randint(0, 25))

    if verbose:
        print("word is %s" % word)

    if len(word) < min_length:
        if verbose:
            print(
                "skipping word pair '%s' because it has fewer than %d characters"
                % (word, min_length)
            )
        return None

    if max_length > 0 and len(word) > max_length:
        if verbose:
            print(
                "skipping word pair '%s' because it has more than %d characters"
                % (word, max_length)
            )
        return None

    if nonalpha.search(word):
        if verbose:
            print(
                "skipping word pair '%s' because it contains non-alphabetic characters"
                % word
            )
        return None
    if confusedletters.search(word):
        if verbose:
            print(
                "skipping word pair '%s' because it contains confusing letters beside each other"
                % word
            )
        return None

    for naughty in badwordlist:
        if naughty in word:
            if verbose:
                print(
                    "skipping word pair '%s' because it contains word '%s'"
                    % (word, naughty)
                )
            return None
    return word


def pick_word(words, badwordlist, verbose, nwords, min_length, max_length):
    for x in range(
        1000
    ):  # If we can't find a valid combination in 1000 tries, just give up
        word = try_pick_word(
            words, badwordlist, verbose, nwords, min_length, max_length
        )
        if word:
            return word
    sys.exit("Unable to find valid word combinations")


def read_wordlist(filename):
    if not os.path.isfile(filename):
        return []
    f = open(filename)
    words = [x.strip().lower() for x in f.readlines()]
    f.close()
    return words


def run_in_thread(object):
    count = object[0]
    words = object[1]
    badwordlist = object[2]
    opts = object[3]
    font = object[4]
    fontsize = object[5]

    for i in range(count):
        word = pick_word(
            words,
            badwordlist,
            opts.verbose,
            opts.number_words,
            opts.min_length,
            opts.max_length,
        )
        salt = "%08x" % random.randrange(2**32)
        # 64 bits of hash is plenty for this purpose
        md5hash = hashlib.md5(
            (opts.key + salt + word + opts.key + salt).encode("utf-8")
        ).hexdigest()[:16]
        filename = "image_%s_%s.png" % (salt, md5hash)
        if opts.dirs:
            subdir = gen_subdir(opts.output, md5hash, opts.dirs)
            filename = os.path.join(subdir, filename)
        if opts.verbose:
            print(filename)
        gen_captcha(word, font, fontsize, os.path.join(opts.output, filename))


if __name__ == "__main__":
    """This grabs random words from the dictionary 'words' (one
    word per line) and generates a captcha image for each one,
    with a keyed salted hash of the correct answer in the filename.

    To check a reply, hash it in the same way with the same salt and
    secret key, then compare with the hash value given.
    """
    script_dir = os.path.dirname(os.path.realpath(__file__))
    parser = OptionParser()
    parser.add_option(
        "--wordlist", help="A list of words (required)", metavar="WORDS.txt"
    )
    parser.add_option(
        "--random",
        help="Use random characters instead of a wordlist",
        action="store_true",
    )
    parser.add_option(
        "--key", help="The passphrase set as $wgCaptchaSecret (required)", metavar="KEY"
    )
    parser.add_option(
        "--output",
        help="The directory to put the images in - $wgCaptchaDirectory (required)",
        metavar="DIR",
    )
    parser.add_option("--font", help="The font to use (required)", metavar="FONT.ttf")
    parser.add_option(
        "--font-size",
        help="The font size (default 40)",
        metavar="N",
        type="int",
        default=40,
    )
    parser.add_option(
        "--count",
        help="The maximum number of images to make (default 20)",
        metavar="N",
        type="int",
        default=20,
    )
    parser.add_option(
        "--badwordlist",
        help="A list of words that should not be used",
        metavar="FILE",
        default=os.path.join(script_dir, "badwordlist"),
    )
    parser.add_option(
        "--fill",
        help="Fill the output directory to contain N files, overrides count, cannot be used with --dirs",
        metavar="N",
        type="int",
    )
    parser.add_option(
        "--dirs",
        help="Put the images into subdirectories N levels deep - $wgCaptchaDirectoryLevels",
        metavar="N",
        type="int",
    )
    parser.add_option(
        "--verbose", "-v", help="Show debugging information", action="store_true"
    )
    parser.add_option(
        "--number-words",
        help="Number of words from the wordlist which make a captcha challenge (default 2)",
        type="int",
        default=2,
    )
    parser.add_option(
        "--min-length",
        help="Minimum length for a captcha challenge",
        type="int",
        default=1,
    )
    parser.add_option(
        "--max-length",
        help="Maximum length for a captcha challenge",
        type="int",
        default=-1,
    )
    parser.add_option(
        "--threads",
        help="Maximum number of threads to be used to generate captchas.",
        type="int",
        default=1,
    )

    opts, args = parser.parse_args()

    if opts.wordlist:
        wordlist = opts.wordlist
    elif opts.random:
        wordlist = None
    else:
        sys.exit("Need to specify a wordlist")
    if opts.key:
        key = opts.key
    else:
        sys.exit("Need to specify a key")
    if opts.output:
        output = opts.output
    else:
        sys.exit("Need to specify an output directory")
    if opts.font and os.path.exists(opts.font):
        font = opts.font
    else:
        sys.exit("Need to specify the location of a font")

    badwordlist = read_wordlist(opts.badwordlist)
    count = opts.count
    fill = opts.fill
    fontsize = opts.font_size
    threads = opts.threads

    if fill:
        count = max(0, fill - len(os.listdir(output)))

    words = None
    if wordlist:
        words = read_wordlist(wordlist)
        words = [
            x
            for x in words
            if len(x) in (4, 5) and x[0] != "f" and x[0] != x[1] and x[-1] != x[-2]
        ]

    if count == 0:
        sys.exit("No need to generate CAPTCHA images.")

    if count < threads:
        chunks = 1
        threads = 1
    else:
        chunks = count // threads

    p = multiprocessing.Pool(threads)
    data = []
    print(
        "Generating %s CAPTCHA images separated in %s image(s) per chunk run by %s threads..."
        % (count, chunks, threads)
    )
    for i in range(0, threads):
        data.append([chunks, words, badwordlist, opts, font, fontsize])

    p.map(run_in_thread, data)
Update captch(-old)?.py shebang to python 3 Bug: T268468 Change-Id: I9e3918939365a3142772627a192b0d92be7d7000 2020-12-01 14:16:56 +00:00			`#!/usr/bin/python3`
Captcha generating script by Neil Harris with some tweaks for command-line options Requires Python Imaging Library, a word list file, and a TrueType font. 2006-01-27 10:22:37 +00:00			`#`
			`# Script to generate distorted text images for a captcha system.`
			`#`
			`# Copyright (C) 2005 Neil Harris`
			`#`
			`# This program is free software; you can redistribute it and/or modify`
			`# it under the terms of the GNU General Public License as published by`
			`# the Free Software Foundation; either version 2 of the License, or`
			`# (at your option) any later version.`
			`#`
			`# This program is distributed in the hope that it will be useful,`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`# GNU General Public License for more details.`
			`#`
			`# You should have received a copy of the GNU General Public License along`
			`# with this program; if not, write to the Free Software Foundation, Inc.,`
Correct the address of the FSF in extension GPL headers 59 Temple Place -> 51 Franklin Street 2010-06-21 13:45:17 +00:00			`# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.`
Captcha generating script by Neil Harris with some tweaks for command-line options Requires Python Imaging Library, a word list file, and a TrueType font. 2006-01-27 10:22:37 +00:00			`# http://www.gnu.org/copyleft/gpl.html`
			`#`
			`# Further tweaks by Brion Vibber <brion@pobox.com>:`
			`# 2006-01-26: Add command-line options for the various parameters`
Add options to break up the captcha image storage with hash-digit subdirectories to avoid trawling through a giant directory on every hit 2007-02-19 20:09:03 +00:00			`# 2007-02-19: Add --dirs param for hash subdirectory splits`
Skip words if they don't contain all letters. 2008-01-07 03:28:38 +00:00			`# Tweaks by Greg Sabino Mullane <greg@turnstep.com>:`
			`# 2008-01-06: Add regex check to skip words containing other than a-z`
Captcha generating script by Neil Harris with some tweaks for command-line options Requires Python Imaging Library, a word list file, and a TrueType font. 2006-01-27 10:22:37 +00:00
			`import random`
Various code cleanups for the captcha generating script * Use optparse instead of getopt * Replace deprecated md5 module * Replace deprecated string module functions with string methods * More graceful failure * Allow users to set the font size * Don't run forever if no valid word combinations can be found 2009-09-08 01:11:52 +00:00			`import math`
			`import hashlib`
			`from optparse import OptionParser`
Captcha generating script by Neil Harris with some tweaks for command-line options Requires Python Imaging Library, a word list file, and a TrueType font. 2006-01-27 10:22:37 +00:00			`import os`
			`import sys`
Skip words if they don't contain all letters. 2008-01-07 03:28:38 +00:00			`import re`
Add threads parameter to captcha.py for multithread CAPTCHA generation Bug: T157734 Change-Id: If4f6bc9048aceacc41538c001255425e848fd8e9 2017-02-10 18:04:12 +00:00			`import multiprocessing`
Captcha generating script by Neil Harris with some tweaks for command-line options Requires Python Imaging Library, a word list file, and a TrueType font. 2006-01-27 10:22:37 +00:00
Various code cleanups for the captcha generating script * Use optparse instead of getopt * Replace deprecated md5 module * Replace deprecated string module functions with string methods * More graceful failure * Allow users to set the font size * Don't run forever if no valid word combinations can be found 2009-09-08 01:11:52 +00:00			`try:`
*.py: Auto-fix using black Change-Id: I4645717df655ac570c1fe6e69058082a1fa7ee6b 2024-01-16 17:26:02 +00:00			`from PIL import Image`
			`from PIL import ImageFont`
			`from PIL import ImageDraw`
			`from PIL import ImageEnhance`
			`from PIL import ImageOps`
*.py: Fixup a couple more linting issues Change-Id: Ic0ba59dc1af1bdefab606a939887752b3b3b3c80 2024-01-16 22:22:29 +00:00			`except ImportError:`
*.py: Auto-fix using black Change-Id: I4645717df655ac570c1fe6e69058082a1fa7ee6b 2024-01-16 17:26:02 +00:00			`sys.exit(`
			`"This script requires the Python Imaging Library - http://www.pythonware.com/products/pil/"`
			`)`

*.py: Fixup a couple more linting issues Change-Id: Ic0ba59dc1af1bdefab606a939887752b3b3b3c80 2024-01-16 22:22:29 +00:00			`# regex to test for suitability of words`
			`nonalpha = re.compile("[^a-z]")`

			`# when il beside each other, hard to read`
*.py: Auto-fix using black Change-Id: I4645717df655ac570c1fe6e69058082a1fa7ee6b 2024-01-16 17:26:02 +00:00			`confusedletters = re.compile(`
			`"[ijtlr][ijtl]\|r[nompqr]\|[il]"`
*.py: Fixup a couple more linting issues Change-Id: Ic0ba59dc1af1bdefab606a939887752b3b3b3c80 2024-01-16 22:22:29 +00:00			`)`

captcha(-old).py: Support Pillow 10 getsize() function was removed in version 10 Bug: T354099 Change-Id: I019a5a89de4340d73a938c907c0a6f5cc22a659c 2024-01-02 02:46:20 +00:00			`# Pillow 9.2 added getbbox to replace getsize, and getsize() was removed in Pillow 10`
			`# https://pillow.readthedocs.io/en/stable/releasenotes/10.0.0.html#font-size-and-offset-methods`
			`# We don't have a requirements.txt, and therefore don't declare any specific supported or min version...`
			`IMAGEFONT_HAS_GETBBOX = hasattr(ImageFont.ImageFont, "getbbox")`

*.py: Auto-fix using black Change-Id: I4645717df655ac570c1fe6e69058082a1fa7ee6b 2024-01-16 17:26:02 +00:00
Captcha generating script by Neil Harris with some tweaks for command-line options Requires Python Imaging Library, a word list file, and a TrueType font. 2006-01-27 10:22:37 +00:00			`# Does X-axis wobbly copy, sandwiched between two rotates`
			`def wobbly_copy(src, wob, col, scale, ang):`
*.py: Auto-fix using black Change-Id: I4645717df655ac570c1fe6e69058082a1fa7ee6b 2024-01-16 17:26:02 +00:00			`x, y = src.size`
			`f = random.uniform(4 * scale, 5 * scale)`
			`p = random.uniform(0, math.pi * 2)`
			`rr = ang + random.uniform(-30, 30) # vary, but not too much`
			`int_d = Image.new("RGB", src.size, 0) # a black rectangle`
			`rot = src.rotate(rr, Image.BILINEAR)`
			`# Do a cheap bounding-box op here to try to limit work below`
			`bbx = rot.getbbox()`
*.py: Fixup a couple more linting issues Change-Id: Ic0ba59dc1af1bdefab606a939887752b3b3b3c80 2024-01-16 22:22:29 +00:00			`if bbx is None:`
*.py: Auto-fix using black Change-Id: I4645717df655ac570c1fe6e69058082a1fa7ee6b 2024-01-16 17:26:02 +00:00			`return src`
			`else:`
			`l, t, r, b = bbx`
			`# and only do lines with content on`
			`for i in range(t, b + 1):`
			`# Drop a scan line in`
			`xoff = int(math.sin(p + (i * f / y)) * wob)`
			`xoff += int(random.uniform(-wob * 0.5, wob * 0.5))`
			`int_d.paste(rot.crop((0, i, x, i + 1)), (xoff, i))`
			`# try to stop blurring from building up`
			`int_d = int_d.rotate(-rr, Image.BILINEAR)`
			`enh = ImageEnhance.Sharpness(int_d)`
			`return enh.enhance(2)`
Captcha generating script by Neil Harris with some tweaks for command-line options Requires Python Imaging Library, a word list file, and a TrueType font. 2006-01-27 10:22:37 +00:00

			`def gen_captcha(text, fontname, fontsize, file_name):`
*.py: Auto-fix using black Change-Id: I4645717df655ac570c1fe6e69058082a1fa7ee6b 2024-01-16 17:26:02 +00:00			`"""Generate a captcha image"""`
			`# white text on a black background`
			`bgcolor = 0x0`
			`fgcolor = 0xFFFFFF`
			`# create a font object`
			`font = ImageFont.truetype(fontname, fontsize)`

			`# determine dimensions of the text`
			`if IMAGEFONT_HAS_GETBBOX:`
			`dim = font.getbbox(text)[2:]`
			`else:`
			`dim = font.getsize(text)`

			`# create a new image significantly larger that the text`
			`edge = max(dim[0], dim[1]) + 2 * min(dim[0], dim[1])`
			`im = Image.new("RGB", (edge, edge), bgcolor)`
			`d = ImageDraw.Draw(im)`
			`x, y = im.size`
			`# add the text to the image`
			`# Using between 5-6 pixels of negative kerning seemed`
			`# enough to confuse tesseract but still be very readable`
			`offset = 0`
			`for c in text:`
			`d.text(`
			`(x / 2 - dim[0] / 2 + offset, y / 2 - dim[1] / 2 + random.uniform(-3, 7)),`
			`c,`
			`font=font,`
			`fill=fgcolor,`
			`)`
captca.py: Fix PIL 10 support again Bug: T354099 Follows-Up: Ia17157d45995b78c6a73f844dfe7d20d09564748 Change-Id: I9d1fe7ad7f12fd79c960574daf79e558d88cb02c 2024-01-27 14:26:00 +00:00			`if IMAGEFONT_HAS_GETBBOX:`
			`offset += font.getbbox(c)[2:][0]`
			`else:`
			`offset += font.getsize(c)[0]`

			`offset -= random.uniform(5, 6)`
*.py: Auto-fix using black Change-Id: I4645717df655ac570c1fe6e69058082a1fa7ee6b 2024-01-16 17:26:02 +00:00
			`for i in range(10):`
captcha.py: Swap x0/x1 and y0/y1 values before d.arc() call Bug: T354099 Change-Id: I0dfd7dfcb2130d612817b4cf8bd644a7d4eb4e40 (cherry picked from commit a12e5cd5b0731ff1f5bfddcdc609be238aa71993) 2024-02-19 18:36:00 +00:00			`x0 = int(`
			`offset * ((i / 2) - 1) / 5`
			`+ x / 2`
			`- dim[0] / 2`
			`+ random.uniform(0, 10)`
			`)`
			`y0 = int(y / 2 - dim[1] + 30 + random.uniform(-10, 15))`

			`x1 = int(offset * i / 7 + x / 2 - dim[0] / 2 + random.uniform(-5, 5))`
			`y1 = int(y / 2 - dim[1] + 30 + random.uniform(-10, 30))`

			`if x1 < x0:`
			`x0, x1 = x1, x0`

			`if y1 < y0:`
			`y0, y1 = y1, y0`

*.py: Auto-fix using black Change-Id: I4645717df655ac570c1fe6e69058082a1fa7ee6b 2024-01-16 17:26:02 +00:00			`d.arc(`
captcha.py: Swap x0/x1 and y0/y1 values before d.arc() call Bug: T354099 Change-Id: I0dfd7dfcb2130d612817b4cf8bd644a7d4eb4e40 (cherry picked from commit a12e5cd5b0731ff1f5bfddcdc609be238aa71993) 2024-02-19 18:36:00 +00:00			`(x0, y0, x1, y1),`
*.py: Auto-fix using black Change-Id: I4645717df655ac570c1fe6e69058082a1fa7ee6b 2024-01-16 17:26:02 +00:00			`int(random.uniform(-30, 30)),`
			`int(random.uniform(160, 300)),`
			`fill=fgcolor,`
			`)`

			`# now get the bounding box of the nonzero parts of the image`
			`bbox = im.getbbox()`
			`bord = min(dim[0], dim[1]) / 4 # a bit of a border`
			`im = im.crop((bbox[0] - bord, bbox[1] - bord, bbox[2] + bord, bbox[3] + bord))`

			`# and turn into black on white`
			`im = ImageOps.invert(im)`

			`# save the image, in format determined from filename`
			`im.save(file_name)`

Captcha generating script by Neil Harris with some tweaks for command-line options Requires Python Imaging Library, a word list file, and a TrueType font. 2006-01-27 10:22:37 +00:00
Various code cleanups for the captcha generating script * Use optparse instead of getopt * Replace deprecated md5 module * Replace deprecated string module functions with string methods * More graceful failure * Allow users to set the font size * Don't run forever if no valid word combinations can be found 2009-09-08 01:11:52 +00:00			`def gen_subdir(basedir, md5hash, levels):`
*.py: Auto-fix using black Change-Id: I4645717df655ac570c1fe6e69058082a1fa7ee6b 2024-01-16 17:26:02 +00:00			`"""Generate a subdirectory path out of the first _levels_`
			`characters of _hash_, and ensure the directories exist`
			`under _basedir_."""`
			`subdir = None`
			`for i in range(0, levels):`
			`char = md5hash[i]`
			`if subdir:`
			`subdir = os.path.join(subdir, char)`
			`else:`
			`subdir = char`
			`fulldir = os.path.join(basedir, subdir)`
			`if not os.path.exists(fulldir):`
			`os.mkdir(fulldir)`
			`return subdir`

Optional blacklist for word pair generation 2007-06-29 19:57:01 +00:00
FancyCaptcha: Deprecate and add alternative for blacklist parameter in generation Bug: T277936 Change-Id: I8e758023b38a4d450a0bf02b3bfc0b5033959be7 2022-03-29 17:42:29 +00:00			`def try_pick_word(words, badwordlist, verbose, nwords, min_length, max_length):`
*.py: Auto-fix using black Change-Id: I4645717df655ac570c1fe6e69058082a1fa7ee6b 2024-01-16 17:26:02 +00:00			`if words is not None:`
			`word = words[random.randint(0, len(words) - 1)]`
			`while nwords > 1:`
			`word2 = words[random.randint(0, len(words) - 1)]`
			`word = word + word2`
			`nwords = nwords - 1`
			`else:`
			`word = ""`
			`max_length = max_length if max_length > 0 else 10`
			`for i in range(0, random.randint(min_length, max_length)):`
			`word = word + chr(97 + random.randint(0, 25))`

			`if verbose:`
			`print("word is %s" % word)`

			`if len(word) < min_length:`
			`if verbose:`
			`print(`
			`"skipping word pair '%s' because it has fewer than %d characters"`
			`% (word, min_length)`
			`)`
			`return None`

			`if max_length > 0 and len(word) > max_length:`
			`if verbose:`
			`print(`
			`"skipping word pair '%s' because it has more than %d characters"`
			`% (word, max_length)`
			`)`
			`return None`

			`if nonalpha.search(word):`
			`if verbose:`
			`print(`
			`"skipping word pair '%s' because it contains non-alphabetic characters"`
			`% word`
			`)`
			`return None`
			`if confusedletters.search(word):`
			`if verbose:`
			`print(`
			`"skipping word pair '%s' because it contains confusing letters beside each other"`
			`% word`
			`)`
			`return None`

			`for naughty in badwordlist:`
			`if naughty in word:`
			`if verbose:`
			`print(`
			`"skipping word pair '%s' because it contains word '%s'"`
			`% (word, naughty)`
			`)`
			`return None`
			`return word`

Optional blacklist for word pair generation 2007-06-29 19:57:01 +00:00
FancyCaptcha: Deprecate and add alternative for blacklist parameter in generation Bug: T277936 Change-Id: I8e758023b38a4d450a0bf02b3bfc0b5033959be7 2022-03-29 17:42:29 +00:00			`def pick_word(words, badwordlist, verbose, nwords, min_length, max_length):`
*.py: Auto-fix using black Change-Id: I4645717df655ac570c1fe6e69058082a1fa7ee6b 2024-01-16 17:26:02 +00:00			`for x in range(`
			`1000`
			`): # If we can't find a valid combination in 1000 tries, just give up`
			`word = try_pick_word(`
			`words, badwordlist, verbose, nwords, min_length, max_length`
			`)`
			`if word:`
			`return word`
			`sys.exit("Unable to find valid word combinations")`

Optional blacklist for word pair generation 2007-06-29 19:57:01 +00:00
			`def read_wordlist(filename):`
*.py: Auto-fix using black Change-Id: I4645717df655ac570c1fe6e69058082a1fa7ee6b 2024-01-16 17:26:02 +00:00			`if not os.path.isfile(filename):`
			`return []`
			`f = open(filename)`
			`words = [x.strip().lower() for x in f.readlines()]`
			`f.close()`
			`return words`

Optional blacklist for word pair generation 2007-06-29 19:57:01 +00:00
Add threads parameter to captcha.py for multithread CAPTCHA generation Bug: T157734 Change-Id: If4f6bc9048aceacc41538c001255425e848fd8e9 2017-02-10 18:04:12 +00:00			`def run_in_thread(object):`
*.py: Auto-fix using black Change-Id: I4645717df655ac570c1fe6e69058082a1fa7ee6b 2024-01-16 17:26:02 +00:00			`count = object[0]`
			`words = object[1]`
			`badwordlist = object[2]`
			`opts = object[3]`
			`font = object[4]`
			`fontsize = object[5]`

			`for i in range(count):`
			`word = pick_word(`
			`words,`
			`badwordlist,`
			`opts.verbose,`
			`opts.number_words,`
			`opts.min_length,`
			`opts.max_length,`
			`)`
			`salt = "%08x" % random.randrange(2**32)`
			`# 64 bits of hash is plenty for this purpose`
			`md5hash = hashlib.md5(`
			`(opts.key + salt + word + opts.key + salt).encode("utf-8")`
			`).hexdigest()[:16]`
			`filename = "image_%s_%s.png" % (salt, md5hash)`
			`if opts.dirs:`
			`subdir = gen_subdir(opts.output, md5hash, opts.dirs)`
			`filename = os.path.join(subdir, filename)`
			`if opts.verbose:`
			`print(filename)`
			`gen_captcha(word, font, fontsize, os.path.join(opts.output, filename))`


			`if __name__ == "__main__":`
			`"""This grabs random words from the dictionary 'words' (one`
			`word per line) and generates a captcha image for each one,`
			`with a keyed salted hash of the correct answer in the filename.`

			`To check a reply, hash it in the same way with the same salt and`
			`secret key, then compare with the hash value given.`
			`"""`
			`script_dir = os.path.dirname(os.path.realpath(__file__))`
			`parser = OptionParser()`
			`parser.add_option(`
			`"--wordlist", help="A list of words (required)", metavar="WORDS.txt"`
			`)`
			`parser.add_option(`
			`"--random",`
			`help="Use random characters instead of a wordlist",`
			`action="store_true",`
			`)`
			`parser.add_option(`
			`"--key", help="The passphrase set as $wgCaptchaSecret (required)", metavar="KEY"`
			`)`
			`parser.add_option(`
			`"--output",`
			`help="The directory to put the images in - $wgCaptchaDirectory (required)",`
			`metavar="DIR",`
			`)`
			`parser.add_option("--font", help="The font to use (required)", metavar="FONT.ttf")`
			`parser.add_option(`
			`"--font-size",`
			`help="The font size (default 40)",`
			`metavar="N",`
			`type="int",`
			`default=40,`
			`)`
			`parser.add_option(`
			`"--count",`
			`help="The maximum number of images to make (default 20)",`
			`metavar="N",`
			`type="int",`
			`default=20,`
			`)`
			`parser.add_option(`
			`"--badwordlist",`
			`help="A list of words that should not be used",`
			`metavar="FILE",`
			`default=os.path.join(script_dir, "badwordlist"),`
			`)`
			`parser.add_option(`
			`"--fill",`
			`help="Fill the output directory to contain N files, overrides count, cannot be used with --dirs",`
			`metavar="N",`
			`type="int",`
			`)`
			`parser.add_option(`
			`"--dirs",`
			`help="Put the images into subdirectories N levels deep - $wgCaptchaDirectoryLevels",`
			`metavar="N",`
			`type="int",`
			`)`
			`parser.add_option(`
			`"--verbose", "-v", help="Show debugging information", action="store_true"`
			`)`
			`parser.add_option(`
			`"--number-words",`
			`help="Number of words from the wordlist which make a captcha challenge (default 2)",`
			`type="int",`
			`default=2,`
			`)`
			`parser.add_option(`
			`"--min-length",`
			`help="Minimum length for a captcha challenge",`
			`type="int",`
			`default=1,`
			`)`
			`parser.add_option(`
			`"--max-length",`
			`help="Maximum length for a captcha challenge",`
			`type="int",`
			`default=-1,`
			`)`
			`parser.add_option(`
			`"--threads",`
			`help="Maximum number of threads to be used to generate captchas.",`
			`type="int",`
			`default=1,`
			`)`

			`opts, args = parser.parse_args()`

			`if opts.wordlist:`
			`wordlist = opts.wordlist`
			`elif opts.random:`
			`wordlist = None`
			`else:`
			`sys.exit("Need to specify a wordlist")`
			`if opts.key:`
			`key = opts.key`
			`else:`
			`sys.exit("Need to specify a key")`
			`if opts.output:`
			`output = opts.output`
			`else:`
			`sys.exit("Need to specify an output directory")`
			`if opts.font and os.path.exists(opts.font):`
			`font = opts.font`
			`else:`
			`sys.exit("Need to specify the location of a font")`

FancyCaptcha: Remove deprecated blacklist parameter Bug: T277936 Depends-On: Ia467c4fb56e9920826a2e4e505e277683ab154b8 Change-Id: Ia64c20bf4638cdba85860dbd2852ee04f9498561 2022-03-29 17:46:58 +00:00			`badwordlist = read_wordlist(opts.badwordlist)`
*.py: Auto-fix using black Change-Id: I4645717df655ac570c1fe6e69058082a1fa7ee6b 2024-01-16 17:26:02 +00:00			`count = opts.count`
			`fill = opts.fill`
			`fontsize = opts.font_size`
			`threads = opts.threads`

			`if fill:`
			`count = max(0, fill - len(os.listdir(output)))`

			`words = None`
			`if wordlist:`
			`words = read_wordlist(wordlist)`
			`words = [`
			`x`
			`for x in words`
			`if len(x) in (4, 5) and x[0] != "f" and x[0] != x[1] and x[-1] != x[-2]`
			`]`

			`if count == 0:`
			`sys.exit("No need to generate CAPTCHA images.")`

			`if count < threads:`
			`chunks = 1`
			`threads = 1`
			`else:`
			`chunks = count // threads`

			`p = multiprocessing.Pool(threads)`
			`data = []`
			`print(`
			`"Generating %s CAPTCHA images separated in %s image(s) per chunk run by %s threads..."`
			`% (count, chunks, threads)`
			`)`
			`for i in range(0, threads):`
			`data.append([chunks, words, badwordlist, opts, font, fontsize])`

			`p.map(run_in_thread, data)`