FancyCaptcha: Deprecate and add alternative for blacklist parameter in generation

Bug: T277936 Change-Id: I8e758023b38a4d450a0bf02b3bfc0b5033959be7
2024-09-23 10:22:37 +00:00 · 2022-03-29 18:42:29 +01:00 · 2022-03-29 18:42:29 +01:00 · db06ec876b
parent 2c08028ea4
commit db06ec876b
3 changed files with 25 additions and 22 deletions
--- a/captcha-old.py
+++ b/captcha-old.py
@ -126,7 +126,7 @@ def gen_subdir(basedir, md5hash, levels):
 			os.mkdir(fulldir)
 	return subdir

-def try_pick_word(words, blacklist, verbose, nwords, min_length, max_length):
+def try_pick_word(words, badwordlist, verbose, nwords, min_length, max_length):
 	if words is not None:
 		word = words[random.randint(0,len(words)-1)]
 		while nwords > 1:
@ -157,16 +157,16 @@ def try_pick_word(words, blacklist, verbose, nwords, min_length, max_length):
 			print("skipping word pair '%s' because it contains non-alphabetic characters" % word)
 		return None

-	for naughty in blacklist:
+	for naughty in badwordlist:
 		if naughty in word:
 			if verbose:
-				print("skipping word pair '%s' because it contains blacklisted word '%s'" % (word, naughty))
+				print("skipping word pair '%s' because it contains word '%s'" % (word, naughty))
 			return None
 	return word

-def pick_word(words, blacklist, verbose, nwords, min_length, max_length):
+def pick_word(words, badwordlist, verbose, nwords, min_length, max_length):
 	for x in range(1000): # If we can't find a valid combination in 1000 tries, just give up
-		word = try_pick_word(words, blacklist, verbose, nwords, min_length, max_length)
+		word = try_pick_word(words, badwordlist, verbose, nwords, min_length, max_length)
 		if word:
 			return word
 	sys.exit("Unable to find valid word combinations")
@ -180,13 +180,13 @@ def read_wordlist(filename):
 def run_in_thread(object):
 	count = object[0];
 	words = object[1]
-	blacklist = object[2]
+	badwordlist = object[2]
 	opts = object[3]
 	font = object[4]
 	fontsize = object[5]

 	for i in range(count):
-		word = pick_word(words, blacklist, verbose, opts.number_words, opts.min_length, opts.max_length)
+		word = pick_word(words, badwordlist, verbose, opts.number_words, opts.min_length, opts.max_length)
 		salt = "%08x" % random.randrange(2**32)
 		# 64 bits of hash is plenty for this purpose
 		md5hash = hashlib.md5((key+salt+word+key+salt).encode('utf-8')).hexdigest()[:16]
@ -215,7 +215,8 @@ if __name__ == '__main__':
 	parser.add_option("--font", help="The font to use (required)", metavar="FONT.ttf")
 	parser.add_option("--font-size", help="The font size (default 40)", metavar="N", type='int', default=40)
 	parser.add_option("--count", help="The maximum number of images to make (default 20)", metavar="N", type='int', default=20)
-	parser.add_option("--blacklist", help="A blacklist of words that should not be used", metavar="FILE", default=os.path.join(script_dir, "blacklist"))
+	parser.add_option("--badwordlist", help="A list of words that should not be used", metavar="FILE", default=os.path.join(script_dir, "badwordlist"))
+	parser.add_option("--blacklist", help="DEPRECATED: list of words that should not be used", metavar="FILE", default=os.path.join(script_dir, "blacklist"))
 	parser.add_option("--fill", help="Fill the output directory to contain N files, overrides count, cannot be used with --dirs", metavar="N", type='int')
 	parser.add_option("--dirs", help="Put the images into subdirectories N levels deep - $wgCaptchaDirectoryLevels", metavar="N", type='int')
 	parser.add_option("--verbose", "-v", help="Show debugging information", action='store_true')
@ -245,7 +246,7 @@ if __name__ == '__main__':
 	else:
 		sys.exit("Need to specify the location of a font")

-	blacklist = read_wordlist(opts.blacklist)
+	badwordlist = read_wordlist(opts.blacklist) + read_wordlist(opts.badwordlist)
 	count = opts.count
 	fill = opts.fill
 	dirs = opts.dirs
@ -276,6 +277,6 @@ if __name__ == '__main__':
 	data = []
 	print("Generating %s CAPTCHA images separated in %s image(s) per chunk run by %s threads..." % (count, chunks, threads))
 	for i in range(0, threads):
-		data.append([chunks, words, blacklist, opts, font, fontsize])
+		data.append([chunks, words, badwordlist, opts, font, fontsize])

 	p.map(run_in_thread, data)
--- a/captcha.py
+++ b/captcha.py
@ -143,7 +143,7 @@ def gen_subdir(basedir, md5hash, levels):
 			os.mkdir(fulldir)
 	return subdir

-def try_pick_word(words, blacklist, verbose, nwords, min_length, max_length):
+def try_pick_word(words, badwordlist, verbose, nwords, min_length, max_length):
 	if words is not None:
 		word = words[random.randint(0,len(words)-1)]
 		while nwords > 1:
@ -174,16 +174,16 @@ def try_pick_word(words, blacklist, verbose, nwords, min_length, max_length):
 			print("skipping word pair '%s' because it contains non-alphabetic characters" % word)
 		return None

-	for naughty in blacklist:
+	for naughty in badwordlist:
 		if naughty in word:
 			if verbose:
-				print("skipping word pair '%s' because it contains blacklisted word '%s'" % (word, naughty))
+				print("skipping word pair '%s' because it contains word '%s'" % (word, naughty))
 			return None
 	return word

-def pick_word(words, blacklist, verbose, nwords, min_length, max_length):
+def pick_word(words, badwordlist, verbose, nwords, min_length, max_length):
 	for x in range(1000): # If we can't find a valid combination in 1000 tries, just give up
-		word = try_pick_word(words, blacklist, verbose, nwords, min_length, max_length)
+		word = try_pick_word(words, badwordlist, verbose, nwords, min_length, max_length)
 		if word:
 			return word
 	sys.exit("Unable to find valid word combinations")
@ -197,13 +197,13 @@ def read_wordlist(filename):
 def run_in_thread(object):
 	count = object[0];
 	words = object[1]
-	blacklist = object[2]
+	badwordlist = object[2]
 	opts = object[3]
 	font = object[4]
 	fontsize = object[5]

 	for i in range(count):
-		word = pick_word(words, blacklist, verbose, opts.number_words, opts.min_length, opts.max_length)
+		word = pick_word(words, badwordlist, verbose, opts.number_words, opts.min_length, opts.max_length)
 		salt = "%08x" % random.randrange(2**32)
 		# 64 bits of hash is plenty for this purpose
 		md5hash = hashlib.md5((key+salt+word+key+salt).encode('utf-8')).hexdigest()[:16]
@ -232,7 +232,8 @@ if __name__ == '__main__':
 	parser.add_option("--font", help="The font to use (required)", metavar="FONT.ttf")
 	parser.add_option("--font-size", help="The font size (default 40)", metavar="N", type='int', default=40)
 	parser.add_option("--count", help="The maximum number of images to make (default 20)", metavar="N", type='int', default=20)
-	parser.add_option("--blacklist", help="A blacklist of words that should not be used", metavar="FILE", default=os.path.join(script_dir, "blacklist"))
+	parser.add_option("--badwordlist", help="A list of words that should not be used", metavar="FILE", default=os.path.join(script_dir, "badwordlist"))
+	parser.add_option("--blacklist", help="DEPRECATED: list of words that should not be used", metavar="FILE", default=os.path.join(script_dir, "blacklist"))
 	parser.add_option("--fill", help="Fill the output directory to contain N files, overrides count, cannot be used with --dirs", metavar="N", type='int')
 	parser.add_option("--dirs", help="Put the images into subdirectories N levels deep - $wgCaptchaDirectoryLevels", metavar="N", type='int')
 	parser.add_option("--verbose", "-v", help="Show debugging information", action='store_true')
@ -262,7 +263,7 @@ if __name__ == '__main__':
 	else:
 		sys.exit("Need to specify the location of a font")

-	blacklist = read_wordlist(opts.blacklist)
+	badwordlist = read_wordlist(opts.blacklist) + read_wordlist(opts.badwordlist)
 	count = opts.count
 	fill = opts.fill
 	dirs = opts.dirs
@ -293,6 +294,6 @@ if __name__ == '__main__':
 	data = []
 	print("Generating %s CAPTCHA images separated in %s image(s) per chunk run by %s threads..." % (count, chunks, threads))
 	for i in range(0, threads):
-		data.append([chunks, words, blacklist, opts, font, fontsize])
+		data.append([chunks, words, badwordlist, opts, font, fontsize])

 	p.map(run_in_thread, data)
--- a/maintenance/GenerateFancyCaptchas.php
+++ b/maintenance/GenerateFancyCaptchas.php
@ -42,7 +42,8 @@ class GenerateFancyCaptchas extends Maintenance {
 		$this->addOption( "wordlist", 'A list of words', true, true );
 		$this->addOption( "font", "The font to use", true, true );
 		$this->addOption( "font-size", "The font size ", false, true );
-		$this->addOption( "blacklist", "A blacklist of words that should not be used", false, true );
+		$this->addOption( "badwordlist", "A list of words that should not be used", false, true );
+		$this->addOption( "blacklist", "DEPRECATED: A list of words that should not be used", false, true );
 		$this->addOption( "fill", "Fill the captcha container to N files", true, true );
 		$this->addOption(
 			"verbose",
@ -104,7 +105,7 @@ class GenerateFancyCaptchas extends Maintenance {
 			wfEscapeShellArg( $wgCaptchaDirectoryLevels )
 		);
 		foreach (
-			[ 'wordlist', 'font', 'font-size', 'blacklist', 'verbose', 'threads' ] as $par
+			[ 'wordlist', 'font', 'font-size', 'blacklist', 'badwordlist', 'verbose', 'threads' ] as $par
 		) {
 			if ( $this->hasOption( $par ) ) {
 				$cmd .= " --$par " . wfEscapeShellArg( $this->getOption( $par ) );