Add threads parameter to captcha.py for multithread CAPTCHA generation

Bug: T157734
Change-Id: If4f6bc9048aceacc41538c001255425e848fd8e9
This commit is contained in:
Florian Schmidt 2017-02-10 19:04:12 +01:00 committed by Reedy
parent f488a8066d
commit 66152162fe
3 changed files with 86 additions and 25 deletions

View file

@ -32,6 +32,8 @@ from optparse import OptionParser
import os
import sys
import re
import multiprocessing
import time
try:
from PIL import Image
@ -175,6 +177,27 @@ def read_wordlist(filename):
f.close()
return words
def run_in_thread(object):
count = object[0];
words = object[1]
blacklist = object[2]
opts = object[3]
font = object[4]
fontsize = object[5]
for i in range(count):
word = pick_word(words, blacklist, verbose, opts.number_words, opts.min_length, opts.max_length)
salt = "%08x" % random.randrange(2**32)
# 64 bits of hash is plenty for this purpose
md5hash = hashlib.md5((key+salt+word+key+salt).encode('utf-8')).hexdigest()[:16]
filename = "image_%s_%s.png" % (salt, md5hash)
if dirs:
subdir = gen_subdir(output, md5hash, dirs)
filename = os.path.join(subdir, filename)
if verbose:
print(filename)
gen_captcha(word, font, fontsize, os.path.join(output, filename))
if __name__ == '__main__':
"""This grabs random words from the dictionary 'words' (one
word per line) and generates a captcha image for each one,
@ -199,6 +222,7 @@ if __name__ == '__main__':
parser.add_option("--number-words", help="Number of words from the wordlist which make a captcha challenge (default 2)", type='int', default=2)
parser.add_option("--min-length", help="Minimum length for a captcha challenge", type='int', default=1)
parser.add_option("--max-length", help="Maximum length for a captcha challenge", type='int', default=-1)
parser.add_option("--threads", help="Maximum number of threads to be used to generate captchas.", type='int', default=1)
opts, args = parser.parse_args()
@ -227,6 +251,7 @@ if __name__ == '__main__':
dirs = opts.dirs
verbose = opts.verbose
fontsize = opts.font_size
threads = opts.threads
if fill:
count = max(0, fill - len(os.listdir(output)))
@ -238,16 +263,20 @@ if __name__ == '__main__':
if len(x) in (4,5) and x[0] != "f"
and x[0] != x[1] and x[-1] != x[-2]]
for i in range(count):
word = pick_word(words, blacklist, verbose, opts.number_words, opts.min_length, opts.max_length)
salt = "%08x" % random.randrange(2**32)
# 64 bits of hash is plenty for this purpose
md5hash = hashlib.md5((key+salt+word+key+salt).encode('utf-8')).hexdigest()[:16]
filename = "image_%s_%s.png" % (salt, md5hash)
if dirs:
subdir = gen_subdir(output, md5hash, dirs)
filename = os.path.join(subdir, filename)
if verbose:
print(filename)
gen_captcha(word, font, fontsize, os.path.join(output, filename))
if count == 0:
sys.exit("No need to generate CAPTCHA images.")
if count < threads:
chunks = 1
threads = 1
else:
chunks = int(count / threads)
p = multiprocessing.Pool(threads);
data = []
print("Generating %s CAPTCHA images separated in %s image(s) per chunk run by %s threads..." % (count, chunks, threads))
for i in range(0, threads):
data.append([chunks, words, blacklist, opts, font, fontsize])
p.map(run_in_thread, data)

View file

@ -32,6 +32,8 @@ from optparse import OptionParser
import os
import sys
import re
import multiprocessing
import time
try:
from PIL import Image
@ -192,6 +194,27 @@ def read_wordlist(filename):
f.close()
return words
def run_in_thread(object):
count = object[0];
words = object[1]
blacklist = object[2]
opts = object[3]
font = object[4]
fontsize = object[5]
for i in range(count):
word = pick_word(words, blacklist, verbose, opts.number_words, opts.min_length, opts.max_length)
salt = "%08x" % random.randrange(2**32)
# 64 bits of hash is plenty for this purpose
md5hash = hashlib.md5((key+salt+word+key+salt).encode('utf-8')).hexdigest()[:16]
filename = "image_%s_%s.png" % (salt, md5hash)
if dirs:
subdir = gen_subdir(output, md5hash, dirs)
filename = os.path.join(subdir, filename)
if verbose:
print(filename)
gen_captcha(word, font, fontsize, os.path.join(output, filename))
if __name__ == '__main__':
"""This grabs random words from the dictionary 'words' (one
word per line) and generates a captcha image for each one,
@ -216,6 +239,7 @@ if __name__ == '__main__':
parser.add_option("--number-words", help="Number of words from the wordlist which make a captcha challenge (default 2)", type='int', default=2)
parser.add_option("--min-length", help="Minimum length for a captcha challenge", type='int', default=1)
parser.add_option("--max-length", help="Maximum length for a captcha challenge", type='int', default=-1)
parser.add_option("--threads", help="Maximum number of threads to be used to generate captchas.", type='int', default=1)
opts, args = parser.parse_args()
@ -244,6 +268,7 @@ if __name__ == '__main__':
dirs = opts.dirs
verbose = opts.verbose
fontsize = opts.font_size
threads = opts.threads
if fill:
count = max(0, fill - len(os.listdir(output)))
@ -255,16 +280,19 @@ if __name__ == '__main__':
if len(x) in (4,5) and x[0] != "f"
and x[0] != x[1] and x[-1] != x[-2]]
for i in range(count):
word = pick_word(words, blacklist, verbose, opts.number_words, opts.min_length, opts.max_length)
salt = "%08x" % random.randrange(2**32)
# 64 bits of hash is plenty for this purpose
md5hash = hashlib.md5((key+salt+word+key+salt).encode('utf-8')).hexdigest()[:16]
filename = "image_%s_%s.png" % (salt, md5hash)
if dirs:
subdir = gen_subdir(output, md5hash, dirs)
filename = os.path.join(subdir, filename)
if verbose:
print(filename)
gen_captcha(word, font, fontsize, os.path.join(output, filename))
if count == 0:
sys.exit("No need to generate CAPTCHA images.")
if count < threads:
chunks = 1
threads = 1
else:
chunks = (count / threads)
p = multiprocessing.Pool(threads);
data = []
print("Generating %s CAPTCHA images separated in %s image(s) per chunk run by %s threads..." % (count, chunks, threads))
for i in range(0, threads):
data.append([chunks, words, blacklist, opts, font, fontsize])
p.map(run_in_thread, data)

View file

@ -50,6 +50,8 @@ class GenerateFancyCaptchas extends Maintenance {
"Whether to use captcha-old.py which doesn't have OCR fighting improvements"
);
$this->addOption( "delete", "Delete the old captches" );
$this->addOption( "threads", "The number of threads to use to generate the images",
false, true );
$this->mDescription = "Generate new fancy captchas and move them into storage";
$this->requireExtension( "FancyCaptcha" );
@ -98,7 +100,9 @@ class GenerateFancyCaptchas extends Maintenance {
wfEscapeShellArg( $countGen ),
wfEscapeShellArg( $wgCaptchaDirectoryLevels )
);
foreach ( [ 'wordlist', 'font', 'font-size', 'blacklist', 'verbose' ] as $par ) {
foreach (
[ 'wordlist', 'font', 'font-size', 'blacklist', 'verbose', 'threads' ] as $par
) {
if ( $this->hasOption( $par ) ) {
$cmd .= " --$par " . wfEscapeShellArg( $this->getOption( $par ) );
}