Blocking spammers

Few years ago, I wrote a service called KasMail [kasmail.com] which provides free temporary email addresses. This service is mainly used by people who need to leave an email address on suspicious sites that might be collecting email addresses for spamming. To limit the spam, I use several reputation blacklists. The most efficient, with a very small amount of false positive and most reactive is called CBL [cbl.abuseat.org].

To limit the number of reverse lookups on my dns servers, as well as on abuseeat.org, I use a local copy of their blacklist database. This list is freely accessible at rsync://rsync.cbl.abuseat.org/cbl/list.txt.

Every 12 hours I have a cron entry that launches a make to update all my databases. Here is an extract of this Makefile. As you can see in the Makefile, the latest version if the database is synchronized with rsync. Then cdbmake.py is run to build a cdb database. The cdb database created by Daniel J. Bernstein, acts as an on-disk associative array, mapping keys to values. The main characteristic of the databse is its speed. It takes only 2 disk access to retrieve the data.

PATH=/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/sbin:/usr/local/bin:
DBE=/disks/databases/exim

cbl.txt.touch:
        touch cbl.txt.touch
cbl.txt:
        rsync -t rsync://rsync.cbl.abuseat.org/cbl/list.txt $(DBE)/cbl.txt
cbl.db: cbl.txt cbl.txt.touch
        cdbmake.py -v -f $(DBE)/cbl.txt
        @rm cbl.txt.touch

PATH=/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/sbin:/usr/local/bin:
DBE=/disks/databases/exim

cbl.txt.touch:
touch cbl.txt.touch
cbl.txt:
rsync -t rsync://rsync.cbl.abuseat.org/cbl/list.txt $(DBE)/cbl.txt
cbl.db: cbl.txt cbl.txt.touch
cdbmake.py -v -f $(DBE)/cbl.txt
@rm cbl.txt.touch


In order for exim to lookup in that database, I use the following rule.
check_rcpt:
  ...
  deny  message = Rejected - Blacklisted IP address check \
            http://cbl.abuseat.org/lookup.cgi?ip=$sender_host_address \
            for more info.
        log_message = "Rejected - IP address found in internal cbl copy"
        hosts = net-cdb;/disks/databases/exim/cbl.cdb
  ...

check_rcpt:
...
deny message = Rejected - Blacklisted IP address check \
http://cbl.abuseat.org/lookup.cgi?ip=$sender_host_address \
for more info.
log_message = "Rejected - IP address found in internal cbl copy"
hosts = net-cdb;/disks/databases/exim/cbl.cdb
...


The list of blacklisted IP addresses given by abuseat.org is a text file. In order to have good performance lookup we transform this text file into a cdb database with this following python program.
#!/usr/bin/env python
# Copyright (c) 2007, Fred C. Franquet
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#     * Redistributions of source code must retain the above copyright
#       notice, this list of conditions and the following disclaimer.
#     * Redistributions in binary form must reproduce the above copyright
#       notice, this list of conditions and the following disclaimer in the
#       documentation and/or other materials provided with the distribution.
#     * Neither the name of Fred C. Franquet nor the names of its contributors
#       may be used to endorse or promote products derived from this software
#       without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY FRED C. FRANQUET ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL <copyright holder> BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys
import re
import cdb
import getopt
import os.path

import time

sep = re.compile(r':')

usage = """Usage: cdbmake.py [-f filename | --file=filename]
	[-h | --help] print this message
	[-v] verbose, print statistics
"""

def main():
	try:
		opts, args = getopt.getopt(sys.argv[1:], "hf:v", ["help", "file="])
	except getopt.GetoptError:
		print usage
		sys.exit(2)

	filename = None
	verbose = False
	for o, a in opts:
		if o == '-v':
			verbose = True
		if o in ('-h', '--help'):
			print usage
			sys.exit()
		if o in ('-f', '--file'):
			filename = a
	#
	if filename is None:
		print usage
		sys.exit(1)

	# normalise differents file names
	infile = os.path.abspath(filename)
	cdw = os.path.dirname(infile)
	filename = os.path.splitext(os.path.basename(infile))[0]
	cdbfile = os.path.join(cdw, filename + '.cdb')
	tmpname = os.path.join(cdw, filename + '.tmp')

	if verbose:
		print "Input file: ", infile
		print "Output file: ", cdbfile
		print "Temp file: ", tmpname
		exectime = time.clock()
		
	maker = cdb.cdbmake(cdbfile, tmpname)
	
	try:
		fd = open(infile)
	except IOError, why:
		sys.stderr.write('%s\n' % str(why))
		sys.exit(1)

	for line in fd.xreadlines():
		if line[0] == '#':
			continue
		fields = sep.split(line[:-1])

		if len(fields) == 1:
			maker.add(fields[0], '')
		elif len(fields) == 2:
			maker.add(fields[0], fields[1])

	maker.finish()
	del(maker)
	
	if verbose:
		c = cdb.init(cdbfile)
		print "%s Keys inserted in %.2f sec" % (len(c), time.clock() - exectime)
		del(c)
		
if __name__ == "__main__":
	main()
	sys.exit()

#!/usr/bin/env python
# Copyright (c) 2007, Fred C. Franquet
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of Fred C. Franquet nor the names of its contributors
# may be used to endorse or promote products derived from this software
# without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY FRED C. FRANQUET ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL <copyright holder> BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys
import re
import cdb
import getopt
import os.path

import time

sep = re.compile(r':')

usage = """Usage: cdbmake.py [-f filename | --file=filename]
[-h | --help] print this message
[-v] verbose, print statistics
"""

def main():
try:
opts, args = getopt.getopt(sys.argv[1:], "hf:v", ["help", "file="])
except getopt.GetoptError:
print usage
sys.exit(2)

filename = None
verbose = False
for o, a in opts:
if o == '-v':
verbose = True
if o in ('-h', '--help'):
print usage
sys.exit()
if o in ('-f', '--file'):
filename = a
#
if filename is None:
print usage
sys.exit(1)

# normalise differents file names
infile = os.path.abspath(filename)
cdw = os.path.dirname(infile)
filename = os.path.splitext(os.path.basename(infile))[0]
cdbfile = os.path.join(cdw, filename + '.cdb')
tmpname = os.path.join(cdw, filename + '.tmp')

if verbose:
print "Input file: ", infile
print "Output file: ", cdbfile
print "Temp file: ", tmpname
exectime = time.clock()

maker = cdb.cdbmake(cdbfile, tmpname)

try:
fd = open(infile)
except IOError, why:
sys.stderr.write('%s\n' % str(why))
sys.exit(1)

for line in fd.xreadlines():
if line[0] == '#':
continue
fields = sep.split(line[:-1])

if len(fields) == 1:
maker.add(fields[0], '')
elif len(fields) == 2:
maker.add(fields[0], fields[1])

maker.finish()
del(maker)

if verbose:
c = cdb.init(cdbfile)
print "%s Keys inserted in %.2f sec" % (len(c), time.clock() - exectime)
del(c)

if __name__ == "__main__":
main()
sys.exit()


 

Leave a message

(Required)
(Required and not displayed)
(Optional)
obfuscated letters Enter the text shown in the image