Add options to break up the captcha image storage with hash-digit subdirectories to avoid trawling through a giant directory on every hit

This commit is contained in:
Brion Vibber 2007-02-19 20:09:03 +00:00
parent 4a43b4ca52
commit 74e3c3bb9f
2 changed files with 83 additions and 7 deletions

View file

@ -29,6 +29,9 @@ if ( defined( 'MEDIAWIKI' ) ) {
global $wgCaptchaDirectory;
$wgCaptchaDirectory = "$wgUploadDirectory/captcha"; // bad default :D
global $wgCaptchaDirectoryLevels;
$wgCaptchaDirectoryLevels = 0; // To break into subdirectories
global $wgCaptchaSecret;
$wgCaptchaSecret = "CHANGE_THIS_SECRET!";
@ -110,9 +113,48 @@ class FancyCaptcha extends SimpleCaptcha {
* @return mixed tuple of (salt key, text hash) or false if no image to find
*/
function pickImage() {
global $wgCaptchaDirectory;
$n = mt_rand( 0, $this->countFiles( $wgCaptchaDirectory ) );
$dir = opendir( $wgCaptchaDirectory );
global $wgCaptchaDirectory, $wgCaptchaDirectoryLevels;
return $this->pickImageDir(
$wgCaptchaDirectory,
$wgCaptchaDirectoryLevels );
}
function pickImageDir( $directory, $levels ) {
if( $levels ) {
$dirs = array();
// Check which subdirs are actually present...
$dir = opendir( $directory );
while( false !== ($entry = readdir( $dir ) ) ) {
if( ctype_xdigit( $entry ) && strlen( $entry ) == 1 ) {
$dirs[] = $entry;
}
}
closedir( $dir );
$place = mt_rand( 0, count( $dirs ) - 1 );
// In case all dirs are not filled,
// cycle through next digits...
for( $j = 0; $j < count( $dirs ); $j++ ) {
$char = $dirs[($place + $j) % count( $dirs )];
$return = $this->pickImageDir( "$directory/$char", $levels - 1 );
if( $return ) {
return $return;
}
}
// Didn't find any images in this directory... empty?
return false;
} else {
return $this->pickImageFromDir( $directory );
}
}
function pickImageFromDir( $directory ) {
if( !is_dir( $directory ) ) {
return false;
}
$n = mt_rand( 0, $this->countFiles( $directory ) );
$dir = opendir( $directory );
$count = 0;
@ -121,7 +163,7 @@ class FancyCaptcha extends SimpleCaptcha {
while( false !== $entry ) {
$entry = readdir( $dir );
if( preg_match( '/^image_([0-9a-f]+)_([0-9a-f]+)\\.png$/', $entry, $matches ) ) {
$size = getimagesize( "$wgCaptchaDirectory/$entry" );
$size = getimagesize( "$directory/$entry" );
$pick = array(
'salt' => $matches[1],
'hash' => $matches[2],
@ -156,7 +198,6 @@ class FancyCaptcha extends SimpleCaptcha {
function showImage() {
global $wgOut, $wgRequest;
global $wgCaptchaDirectory;
$wgOut->disable();
@ -172,7 +213,7 @@ class FancyCaptcha extends SimpleCaptcha {
$salt = $info['salt'];
$hash = $info['hash'];
$file = $wgCaptchaDirectory . DIRECTORY_SEPARATOR . "image_{$salt}_{$hash}.png";
$file = $this->imagePath( $salt, $hash );
if( file_exists( $file ) ) {
header( 'Content-type: image/png' );
@ -183,6 +224,18 @@ class FancyCaptcha extends SimpleCaptcha {
wfHttpError( 500, 'Internal Error', 'Requested bogus captcha image' );
return false;
}
function imagePath( $salt, $hash ) {
global $wgCaptchaDirectory, $wgCaptchaDirectoryLevels;
$file = $wgCaptchaDirectory;
$file .= DIRECTORY_SEPARATOR;
for( $i = 0; $i < $wgCaptchaDirectoryLevels; $i++ ) {
$file .= $hash{$i};
$file .= DIRECTORY_SEPARATOR;
}
$file .= "image_{$salt}_{$hash}.png";
return $file;
}
/**
* Show a message asking the user to enter a captcha on edit

View file

@ -21,6 +21,7 @@
#
# Further tweaks by Brion Vibber <brion@pobox.com>:
# 2006-01-26: Add command-line options for the various parameters
# 2007-02-19: Add --dirs param for hash subdirectory splits
import random
import Image
@ -98,6 +99,22 @@ def gen_captcha(text, fontname, fontsize, file_name):
# save the image, in format determined from filename
im.save(file_name)
def gen_subdir(basedir, hash, levels):
"""Generate a subdirectory path out of the first _levels_
characters of _hash_, and ensure the directories exist
under _basedir_."""
subdir = None
for i in range(0, levels):
char = hash[i]
if subdir:
subdir = os.path.join(subdir, char)
else:
subdir = char
fulldir = os.path.join(basedir, subdir)
if not os.path.exists(fulldir):
os.mkdir(fulldir)
return subdir
if __name__ == '__main__':
"""This grabs random words from the dictionary 'words' (one
word per line) and generates a captcha image for each one,
@ -112,9 +129,10 @@ if __name__ == '__main__':
output = "."
count = 20
fill = 0
dirs = 0
verbose = False
opts, args = getopt.getopt(sys.argv[1:], "", ["font=", "wordlist=", "key=", "output=", "count=", "fill=", "verbose"])
opts, args = getopt.getopt(sys.argv[1:], "", ["font=", "wordlist=", "key=", "output=", "count=", "fill=", "dirs=", "verbose"])
for o, a in opts:
if o == "--font":
font = a
@ -128,6 +146,8 @@ if __name__ == '__main__':
count = int(a)
if o == "--fill":
fill = int(a)
if o == "--dirs":
dirs = int(a)
if o == "--verbose":
verbose = True
@ -149,6 +169,9 @@ if __name__ == '__main__':
# 64 bits of hash is plenty for this purpose
hash = md5.new(key+salt+word+key+salt).hexdigest()[:16]
filename = "image_%s_%s.png" % (salt, hash)
if dirs:
subdir = gen_subdir(output, hash, dirs)
filename = os.path.join(subdir, filename)
if verbose:
print filename
gen_captcha(word, font, 40, os.path.join(output, filename))