From 74e3c3bb9f323c04977bc2f585688f8165d7cdf8 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Mon, 19 Feb 2007 20:09:03 +0000 Subject: [PATCH] Add options to break up the captcha image storage with hash-digit subdirectories to avoid trawling through a giant directory on every hit --- FancyCaptcha.php | 65 +++++++++++++++++++++++++++++++++++++++++++----- captcha.py | 25 ++++++++++++++++++- 2 files changed, 83 insertions(+), 7 deletions(-) diff --git a/FancyCaptcha.php b/FancyCaptcha.php index c3f2942f4..21931a85a 100644 --- a/FancyCaptcha.php +++ b/FancyCaptcha.php @@ -29,6 +29,9 @@ if ( defined( 'MEDIAWIKI' ) ) { global $wgCaptchaDirectory; $wgCaptchaDirectory = "$wgUploadDirectory/captcha"; // bad default :D +global $wgCaptchaDirectoryLevels; +$wgCaptchaDirectoryLevels = 0; // To break into subdirectories + global $wgCaptchaSecret; $wgCaptchaSecret = "CHANGE_THIS_SECRET!"; @@ -110,9 +113,48 @@ class FancyCaptcha extends SimpleCaptcha { * @return mixed tuple of (salt key, text hash) or false if no image to find */ function pickImage() { - global $wgCaptchaDirectory; - $n = mt_rand( 0, $this->countFiles( $wgCaptchaDirectory ) ); - $dir = opendir( $wgCaptchaDirectory ); + global $wgCaptchaDirectory, $wgCaptchaDirectoryLevels; + return $this->pickImageDir( + $wgCaptchaDirectory, + $wgCaptchaDirectoryLevels ); + } + + function pickImageDir( $directory, $levels ) { + if( $levels ) { + $dirs = array(); + + // Check which subdirs are actually present... + $dir = opendir( $directory ); + while( false !== ($entry = readdir( $dir ) ) ) { + if( ctype_xdigit( $entry ) && strlen( $entry ) == 1 ) { + $dirs[] = $entry; + } + } + closedir( $dir ); + + $place = mt_rand( 0, count( $dirs ) - 1 ); + // In case all dirs are not filled, + // cycle through next digits... + for( $j = 0; $j < count( $dirs ); $j++ ) { + $char = $dirs[($place + $j) % count( $dirs )]; + $return = $this->pickImageDir( "$directory/$char", $levels - 1 ); + if( $return ) { + return $return; + } + } + // Didn't find any images in this directory... empty? + return false; + } else { + return $this->pickImageFromDir( $directory ); + } + } + + function pickImageFromDir( $directory ) { + if( !is_dir( $directory ) ) { + return false; + } + $n = mt_rand( 0, $this->countFiles( $directory ) ); + $dir = opendir( $directory ); $count = 0; @@ -121,7 +163,7 @@ class FancyCaptcha extends SimpleCaptcha { while( false !== $entry ) { $entry = readdir( $dir ); if( preg_match( '/^image_([0-9a-f]+)_([0-9a-f]+)\\.png$/', $entry, $matches ) ) { - $size = getimagesize( "$wgCaptchaDirectory/$entry" ); + $size = getimagesize( "$directory/$entry" ); $pick = array( 'salt' => $matches[1], 'hash' => $matches[2], @@ -156,7 +198,6 @@ class FancyCaptcha extends SimpleCaptcha { function showImage() { global $wgOut, $wgRequest; - global $wgCaptchaDirectory; $wgOut->disable(); @@ -172,7 +213,7 @@ class FancyCaptcha extends SimpleCaptcha { $salt = $info['salt']; $hash = $info['hash']; - $file = $wgCaptchaDirectory . DIRECTORY_SEPARATOR . "image_{$salt}_{$hash}.png"; + $file = $this->imagePath( $salt, $hash ); if( file_exists( $file ) ) { header( 'Content-type: image/png' ); @@ -183,6 +224,18 @@ class FancyCaptcha extends SimpleCaptcha { wfHttpError( 500, 'Internal Error', 'Requested bogus captcha image' ); return false; } + + function imagePath( $salt, $hash ) { + global $wgCaptchaDirectory, $wgCaptchaDirectoryLevels; + $file = $wgCaptchaDirectory; + $file .= DIRECTORY_SEPARATOR; + for( $i = 0; $i < $wgCaptchaDirectoryLevels; $i++ ) { + $file .= $hash{$i}; + $file .= DIRECTORY_SEPARATOR; + } + $file .= "image_{$salt}_{$hash}.png"; + return $file; + } /** * Show a message asking the user to enter a captcha on edit diff --git a/captcha.py b/captcha.py index e4dbc17fe..c9d91056d 100644 --- a/captcha.py +++ b/captcha.py @@ -21,6 +21,7 @@ # # Further tweaks by Brion Vibber : # 2006-01-26: Add command-line options for the various parameters +# 2007-02-19: Add --dirs param for hash subdirectory splits import random import Image @@ -98,6 +99,22 @@ def gen_captcha(text, fontname, fontsize, file_name): # save the image, in format determined from filename im.save(file_name) +def gen_subdir(basedir, hash, levels): + """Generate a subdirectory path out of the first _levels_ + characters of _hash_, and ensure the directories exist + under _basedir_.""" + subdir = None + for i in range(0, levels): + char = hash[i] + if subdir: + subdir = os.path.join(subdir, char) + else: + subdir = char + fulldir = os.path.join(basedir, subdir) + if not os.path.exists(fulldir): + os.mkdir(fulldir) + return subdir + if __name__ == '__main__': """This grabs random words from the dictionary 'words' (one word per line) and generates a captcha image for each one, @@ -112,9 +129,10 @@ if __name__ == '__main__': output = "." count = 20 fill = 0 + dirs = 0 verbose = False - opts, args = getopt.getopt(sys.argv[1:], "", ["font=", "wordlist=", "key=", "output=", "count=", "fill=", "verbose"]) + opts, args = getopt.getopt(sys.argv[1:], "", ["font=", "wordlist=", "key=", "output=", "count=", "fill=", "dirs=", "verbose"]) for o, a in opts: if o == "--font": font = a @@ -128,6 +146,8 @@ if __name__ == '__main__': count = int(a) if o == "--fill": fill = int(a) + if o == "--dirs": + dirs = int(a) if o == "--verbose": verbose = True @@ -149,6 +169,9 @@ if __name__ == '__main__': # 64 bits of hash is plenty for this purpose hash = md5.new(key+salt+word+key+salt).hexdigest()[:16] filename = "image_%s_%s.png" % (salt, hash) + if dirs: + subdir = gen_subdir(output, hash, dirs) + filename = os.path.join(subdir, filename) if verbose: print filename gen_captcha(word, font, 40, os.path.join(output, filename))